Home > front end >  Extract data from email
Extract data from email

Time:02-15

I am extract data from email but they will provide me output like these I want to get only email this is link enter image description here

This is my code

from scrapy import Spider
from scrapy.http import Request




class AuthorSpider(Spider):
    name = 'pushpa'
    start_urls = ['https://www.wlw.at/de/suche?q=hallenbau']
    def parse(self, response):
        books = response.xpath("//div[@class='company-title-link-wrap']/a/@href").extract()
        for book in books:
            url = response.urljoin(book)
            yield Request(url, callback=self.parse_book)



           

    def parse_book(self, response):
        website=response.xpath("//a[@id='location-and-contact__website']//@href").get()
        mail = response.xpath("//a[@id='location-and-contact__email']//span").get()
            
        yield{
            'website':website,
            'email':mail
        }

CodePudding user response:

Just add text() in the selector's xpath.

from scrapy import Spider
from scrapy.http import Request


class AuthorSpider(Spider):
    name = 'pushpa'
    start_urls = ['https://www.wlw.at/de/suche?q=hallenbau']

    def parse(self, response):
        books = response.xpath("//div[@class='company-title-link-wrap']/a/@href").extract()
        for book in books:
            url = response.urljoin(book)
            yield Request(url, callback=self.parse_book)

    def parse_book(self, response):
        website=response.xpath("//a[@id='location-and-contact__website']//@href").get()
        mail = response.xpath("//a[@id='location-and-contact__email']//span//text()").get()

        yield{
            'website':website,
            'email':mail
        }
  • Related