I am using the following python code to scrape a website. The issue which I can't resolve is it scrapes multiple events successfully but only sends the result of the first event to excel. All help greatly appreciated
import requests
import json
import pandas as pd
import xlsxwriter
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0',
'Accept-Language' : 'en-US,en;q=0.5'}
for race in range(1, 3):
url = f"https://s3-ap-southeast-2.amazonaws.com/racevic.static/2019-01-01/flemington/sectionaltimes/race-{race}.json?callback=sectionaltimes_callback"
r = requests.get(url, headers=headers)
json_obj = json.loads(r.text.split('sectionaltimes_callback(')[1].rsplit(')', 1)[0])
print(json_obj)
#df = pd.DataFrame(json_obj['Horses'])
#writer = pd.ExcelWriter('horses.xlsx', engine='xlsxwriter')
#df.to_excel(writer, sheet_name='Sheet1')
#writer.save()
#print(df)
CodePudding user response:
First Create an empty df
and use concat
method to append DataFrame from loop df
will have concated data from the looped and convert to excel
df=pd.DataFrame()
for race in range(1, 3):
url = f"https://s3-ap-southeast-2.amazonaws.com/racevic.static/2019-01-01/flemington/sectionaltimes/race-{race}.json?callback=sectionaltimes_callback"
r = requests.get(url, headers=headers)
json_obj = json.loads(r.text.split('sectionaltimes_callback(')[1].rsplit(')', 1)[0])
main_df=pd.DataFrame(json_obj['Horses'])
df=pd.concat([df,main_df])
df.reset_index(drop=True, inplace=True)
df.to_excel("horses.xlsx")