I'm messing around with web scrapers for fun and I have a simple web scraper that extracts some data from https://www.adamchoi.co.uk/overs/detailed, when I save all this data in a JSON file it looks like this:
"date": {
"0": "13-08-2021",
"1": "22-08-2021",
"2": "28-08-2021",
"3": "11-09-2021"
},
"team_1": {
"0": "Brentford",
"1": "Arsenal",
"2": "Man City",
"3": "Arsenal"
},
"results": {
"0": "2 - 0",
"1": "0 - 2",
"2": "5 - 0",
"3": "1 - 0"
},
"team_2": {
"0": "Arsenal",
"1": "Chelsea",
"2": "Arsenal",
"3": "Norwich"
}
}
I want to know if there is any way I can merge the arrays, or how I can change the code so the new object looks like this:
{
"date": "13-08-2021",
"Team_1": "Brentford",
"results": "2 - 0",
"Team_2": "Arsenal",
}
{
"date": "22-08-2021",
"Team_1": "Arsenal",
"results": "0 - 2",
"Team_2": "Chelsea",
}
Here's the Python code:
import pandas
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
import pandas as pd
website = 'https://www.adamchoi.co.uk/overs/detailed'
s = Service('C:...\\chromedriver.exe')
driver = webdriver.Chrome(service=s)
driver.get(website)
all_matches_button = driver.find_element_by_xpath('//label[@analytics-event="All matches"]')
all_matches_button.click()
matches = driver.find_elements_by_tag_name('tr')
date = []
team_1 = []
results = []
team_2 = []
for match in matches:
date.append(match.find_element_by_xpath('./td[1]').text)
team_1.append(match.find_element_by_xpath('./td[2]').text)
results.append(match.find_element_by_xpath('./td[3]').text)
team_2.append(match.find_element_by_xpath('./td[4]').text)
print(team_1)
driver.quit()
data_frame = pd.dataFrame({'date' : date, 'team_1' : team_1, 'results' : results, 'team_2' : team_2 })
data_frame.to_json('results.json')
CodePudding user response:
You can use pandas library for easy transition
import pandas as pd
matches = {
"date": {
"0": "13-08-2021",
"1": "22-08-2021",
"2": "28-08-2021",
"3": "11-09-2021"
},
"team_1": {
"0": "Brentford",
"1": "Arsenal",
"2": "Man City",
"3": "Arsenal"
},
"results": {
"0": "2 - 0",
"1": "0 - 2",
"2": "5 - 0",
"3": "1 - 0"
},
"team_2": {
"0": "Arsenal",
"1": "Chelsea",
"2": "Arsenal",
"3": "Norwich"
}
}
df = pd.DataFrame(matches)
macth_results = df.T.to_dict()
""" Result
{'0': {'date': '13-08-2021',
'team_1': 'Brentford',
'results': '2 - 0',
'team_2': 'Arsenal'},
'1': {'date': '22-08-2021',
'team_1': 'Arsenal',
'results': '0 - 2',
'team_2': 'Chelsea'},
'2': {'date': '28-08-2021',
'team_1': 'Man City',
'results': '5 - 0',
'team_2': 'Arsenal'},
'3': {'date': '11-09-2021',
'team_1': 'Arsenal',
'results': '1 - 0',
'team_2': 'Norwich'}}
"""
CodePudding user response:
You need to use the orient='records'
option to to_json
:
df = pd.DataFrame({'date': {'0': '13-08-2021', '1': '22-08-2021', '2': '28-08-2021', '3': '11-09-2021'}, 'team_1': {'0': 'Brentford', '1': 'Arsenal', '2': 'Man City', '3': 'Arsenal'}, 'results': {'0': '2 - 0', '1': '0 - 2', '2': '5 - 0', '3': '1 - 0'}, 'team_2': {'0': 'Arsenal', '1': 'Chelsea', '2': 'Arsenal', '3': 'Norwich'}})
df.to_json(orient='records')
Output:
'[
{"date":"13-08-2021","team_1":"Brentford","results":"2 - 0","team_2":"Arsenal"},
{"date":"22-08-2021","team_1":"Arsenal","results":"0 - 2","team_2":"Chelsea"},
{"date":"28-08-2021","team_1":"Man City","results":"5 - 0","team_2":"Arsenal"},
{"date":"11-09-2021","team_1":"Arsenal","results":"1 - 0","team_2":"Norwich"}
]'