I am trying to crawl API using scrapy form this link
The thing is the API request I was trying to get solves my all issues but I am not able to load the response in json form and I cannot proceed further.
Though code seems long but the code is only long due to header and cookies please suggest me how I can improve and find solution
Here is my scrapy code I did
from datetime import datetime
import json
from urllib.parse import urlencode
import scrapy
from bs4 import BeautifulSoup
from liveshare.items import AGMSpiderItems
class SubIndexSpider(scrapy.Spider):
name = "subindexes"
def start_requests(self):
headers = {
'authority': 'merolagani.com',
'accept': 'application/json, text/javascript, */*; q=0.01',
'accept-language': 'en-GB,en;q=0.9,en-US;q=0.8,ne;q=0.7,ru;q=0.6',
'cache-control': 'no-cache',
# 'cookie': 'ASP.NET_SessionId=bbjd1loebaad4ha2qwwxdcfp; _ga=GA1.2.810096005.1667463342; _gid=GA1.2.1263273763.1673850832; _gat=1; __atuvc=4|3; __atuvs=63c4efd0a14c6c9b003',
'pragma': 'no-cache',
'referer': 'https://merolagani.com/MarketSummary.aspx',
'sec-ch-ua': '"Not?A_Brand";v="8", "Chromium";v="108", "Google Chrome";v="108"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Linux"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
'x-requested-with': 'XMLHttpRequest',
}
params = {
'type': 'market_summary',
}
cookies = {
'ASP.NET_SessionId': 'bbjd1loebaad4ha2qwwxdcfp',
'_ga': 'GA1.2.810096005.1667463342',
'_gid': 'GA1.2.1263273763.1673850832',
'_gat': '1',
'__atuvc': '4|3',
'__atuvs': '63c4efd0a14c6c9b003',
}
api_url = f'https://merolagani.com/handlers/webrequesthandler.ashx{urlencode(params)}'
yield scrapy.Request(
url=api_url,
method='GET',
headers=headers,
cookies=cookies,
callback=self.parse,
dont_filter=True
)
def parse(self, response):
print(response.headers)
print(response.body)
json_response = json.loads(response.body)
print(json_response)
But I am getting JSON decode error I can't figure out the issue.
error traceback
File "C:\Users\Navar\AppData\Local\Programs\Python\Python39\lib\json\decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 3 column 1 (char 4)
CodePudding user response:
I've used the code - that I simplified - and I got no errors, the JSON data is returned successfully.
Code:
url_api = "https://merolagani.com/handlers/webrequesthandler.ashx?type=market_summary"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
"Referer": "https://merolagani.com"
}
page = requests.get(url_api, headers=headers)
js_data = json.loads(page.text)
print(js_data)
Check the JSON result here at anotepad.com
Probably the error is in the response of your code - i.e. the response is NOT a JSON object.