I've been trying to use async to get rid of additional callback within parse method. I know there is a library inline_requests which can do it.
However, I wish to stick with async. What I can't userstand is how I can issue a post requests within parse method.
When I issue a post request using inline_requests, I get success:
import scrapy
from inline_requests import inline_requests
class HkexNewsSpider(scrapy.Spider):
name = "hkexnews"
start_url = "http://www.hkexnews.hk/sdw/search/searchsdw.aspx"
def start_requests(self):
yield scrapy.Request(self.start_url,callback=self.parse_item)
@inline_requests
def parse_item(self,response):
payload = {item.css('::attr(name)').get(default=''):item.css('::attr(value)').get(default='') for item in response.css("input[name]")}
payload['__EVENTTARGET'] = 'btnSearch'
payload['txtStockCode'] = '00001'
payload['txtParticipantID'] = 'A00001'
resp = yield scrapy.FormRequest(self.start_url, formdata=payload, dont_filter=True)
total_value = resp.css(".ccass-search-total > .shareholding > .value::text").get()
yield {"Total Value":total_value}
While trying to issue a post requests using async, I get None as result:
async def parse(self,response):
payload = {item.css('::attr(name)').get(default=''):item.css('::attr(value)').get(default='') for item in response.css("input[name]")}
payload['__EVENTTARGET'] = 'btnSearch'
payload['txtStockCode'] = '00001'
payload['txtParticipantID'] = 'A00001'
request = response.follow(self.start_url,method='POST',body=payload, dont_filter=True)
resp = await self.crawler.engine.download(request, self)
total_value = resp.css(".ccass-search-total > .shareholding > .value::text").get()
yield {"Total Value":total_value}
How can I fetch result using the latter approach?
CodePudding user response:
import scrapy
class HkexNewsSpider(scrapy.Spider):
name = "hkexnews"
start_urls = ['http://www.hkexnews.hk/sdw/search/searchsdw.aspx']
async def parse(self, response):
payload = {item.css('::attr(name)').get(default=''): item.css('::attr(value)').get(default='') for item in response.css("input[name]")}
payload['__EVENTTARGET'] = 'btnSearch'
payload['txtStockCode'] = '00001'
payload['txtParticipantID'] = 'A00001'
request = scrapy.FormRequest(self.start_urls[0], formdata=payload, dont_filter=True)
resp = await self.crawler.engine.download(request, self)
total_value = resp.css(".ccass-search-total > .shareholding > .value::text").get()
yield {"Total Value": total_value}
Output:
{'Total Value': '2,546,531,648'}