Home > Software engineering >  Trying to Scrape data provide empty result
Trying to Scrape data provide empty result

Time:02-17

I am trying to scrape data from these page enter image description here

from scrapy import Spider
from scrapy.http import Request




class AuthorSpider(Spider):
    name = 'pushpa'
    start_urls = ['http://www.cuma.fr/annuaires?page=1e']
    def parse(self, response):
        books = response.xpath("//h2/a/@href").extract()
        for book in books:
            url = response.urljoin(book)
            yield Request(url, callback=self.parse_book)
            
    def parse_book(self, response):
        coordinate=response.xpath("//div[@class='adr']/text()").getall()
        yield{
            'coordoness':coordinate
            }
       

    

CodePudding user response:

Read the comments.

from scrapy import Spider
from scrapy.http import Request


class AuthorSpider(Spider):
    name = 'pushpa'
    start_urls = ['http://www.cuma.fr/annuaires?page=1e']

    def parse(self, response):
        books = response.xpath("//h2/a/@href").extract()
        for book in books:
            url = response.urljoin(book)
            yield Request(url, callback=self.parse_book)

    def parse_book(self, response):
        # coordinate = response.xpath("//div[@class='adr']/text()").getall()
        # replace '/text()' with '//text()' to get all the text inside div tag:
        coordinate = response.xpath("//div[@class='adr']//text()").getall()
        # strip the strings in the list:
        coordinate = [i.strip() for i in coordinate]
        # remove empty strings:
        coordinate = [i for i in coordinate if i]
        yield{
            'coordoness': coordinate
        }    
  • Related