I am to scrape the table but they will provide me empty output theses is page link https://www.sidmartinbio.org/why-is-the-jugular-vein-so-important/
from scrapy.http import Request
import scrapy
class PushpaSpider(scrapy.Spider):
name = 'pushpa'
page_number = 1
start_urls = ['https://www.sidmartinbio.org/why-is-the-jugular-vein-so-important/']
custom_settings = {
'CONCURRENT_REQUESTS_PER_DOMAIN': 1,
'DOWNLOAD_DELAY': 1,
'USER_AGENT': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
}
def parse(self, response):
details={}
key=response.xpath("//table//tbody/tr/td[1]/text()").get()
value=response.xpath("//table//tbody/tr/td[2]/text()").get()
details[key]=value
yield details
CodePudding user response:
It was a bit hard to xpath selection correctly.Now it's working.
from scrapy.http import Request
import scrapy
class PushpaSpider(scrapy.Spider):
name = 'pushpa'
page_number = 1
start_urls = [
'https://www.sidmartinbio.org/why-is-the-jugular-vein-so-important']
def parse(self, response):
details={}
key=response.xpath("//td[contains(.,'Source')]/text()").get()
value=response.xpath("//td[contains(.,'Source')]/following-sibling::td/text()").get()
details[key]=value
yield details
Output:
{'Source': 'Sigmoid sinus and Inferior petrosal sinus'}