I am extract data from email
but they will provide me output like these I want to get only email
this is link
This is my code
from scrapy import Spider
from scrapy.http import Request
class AuthorSpider(Spider):
name = 'pushpa'
start_urls = ['https://www.wlw.at/de/suche?q=hallenbau']
def parse(self, response):
books = response.xpath("//div[@class='company-title-link-wrap']/a/@href").extract()
for book in books:
url = response.urljoin(book)
yield Request(url, callback=self.parse_book)
def parse_book(self, response):
website=response.xpath("//a[@id='location-and-contact__website']//@href").get()
mail = response.xpath("//a[@id='location-and-contact__email']//span").get()
yield{
'website':website,
'email':mail
}
CodePudding user response:
Just add text()
in the selector's xpath.
from scrapy import Spider
from scrapy.http import Request
class AuthorSpider(Spider):
name = 'pushpa'
start_urls = ['https://www.wlw.at/de/suche?q=hallenbau']
def parse(self, response):
books = response.xpath("//div[@class='company-title-link-wrap']/a/@href").extract()
for book in books:
url = response.urljoin(book)
yield Request(url, callback=self.parse_book)
def parse_book(self, response):
website=response.xpath("//a[@id='location-and-contact__website']//@href").get()
mail = response.xpath("//a[@id='location-and-contact__email']//span//text()").get()
yield{
'website':website,
'email':mail
}