This code keeps throwing an error, and I cannot get it to function to save to a text file. It keeps getting stuck at
Traceback (most recent call last):
File "c:\Python39\scrape2.py", line 32, in
response = requests.get(url % page, headers=headers).json()
line 918, in json
raise RequestsJSONDecodeError(e.msg, e.doc, e.pos)
requests.exceptions.JSONDecodeError: [Errno Expecting value] : 0
import requests
import json
page = 1
url = f"https://api-prod.grip.events/1/container/4368/search?search=&sort=name&order=asc&type_id=4907,4906,5265,4964,4904,1026,4908&page=%d"
headers = {
'authority': 'api-prod.grip.events',
'accept': 'application/json',
'accept-language': 'en-gb',
'content-type': 'application/json',
'if-none-match': 'W/"7132-A/vrxQVW3GqTDiJFLQqx9lN Y0s"',
'login-source': 'web',
'origin': 'https://connect.money2020.com',
'referer': 'https://connect.money2020.com/money2020europe/app/home/network/list/34589',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="101", "Google Chrome";v="101"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'cross-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36',
'x-authorization': 'a422cc2a-31fb-4b4e-a1bd-a34b561adc6c',
'x-grip-version': 'Web/8.3.11',
}
s = requests.Session()
response = requests.post(url, headers=headers)
with open("list.txt", "w") as f:
for page in range(1, 1000):
response = requests.get(url % page, headers=headers).json()
contacts = response("data")
for contact in contacts:
target = "%s\t%s\t%s\t%s" % (contact["company_name"], contact["job_title"], contact["name"], contact["job_industry"])
f.write(target "\n")
print(target)
CodePudding user response:
The server is returning HTTP code 304 (Not Modified) because the if-not-match
header already matches an ETag server-side (since this was presumably copied from browser devtools).
Just get rid of this header (and some other unnecessary ones), and fix the typo (contacts = response["data"]
).
import requests
import json
url = "https://api-prod.grip.events/1/container/4368/search?search=&sort=name&order=asc&type_id=4907,4906,5265,4964,4904,1026,4908&page=%d"
headers = {
'x-authorization': 'a422cc2a-31fb-4b4e-a1bd-a34b561adc6c'
}
with open("list.txt", "w") as f:
for page in range(1, 1000):
response = requests.get(url % page, headers=headers).json()
contacts = response["data"]
for contact in contacts:
target = "%s\t%s\t%s\t%s" % (contact["company_name"], contact["job_title"], contact["name"], contact["job_industry"])
f.write(target "\n")
print(target)
You may also want to check out the csv
module for writing TSV files.
CodePudding user response:
Are you sure the response is valid json? It could be receiving an error and you haven't handled that scenario.
Try updating it to the following and it should print any errors.
with open("list.txt", "w") as f:
for page in range(1, 1000):
try:
response = requests.get(url % page, headers=headers)
if response.status_code == 200:
response = response.json()
contacts = response("data")
for contact in contacts:
target = "%s\t%s\t%s\t%s" % (contact["company_name"], contact["job_title"], contact["name"], contact["job_industry"])
f.write(target "\n")
print(target)
else:
print(f"Unsuccessful request: {response}")
except Exception as e:
print(f"Error: {e}")