I have this kind of json I would transform it into a pandas dataframe, with specific columns names.
{
"data": [
{
"id": 1,
"name": "3Way Result",
"suspended": false,
"bookmaker": {
"data": [
{
"id": 27802,
"name": "Ladbrokes",
"odds": {
"data": [
{
"label": "1",
"value": "1.61",
"probability": "62.11%",
"dp3": "1.610",
"american": -164,
"factional": null,
"winning": null,
"handicap": null,
"total": null,
"bookmaker_event_id": null,
"last_update": {
"date": "2021-10-01 16:41:27.000000",
"timezone_type": 3,
"timezone": "UTC"
}
},
{
"label": "X",
"value": "3.90",
"probability": "25.64%",
"dp3": "3.900",
"american": 290,
"factional": null,
"winning": null,
"handicap": null,
"total": null,
"bookmaker_event_id": null,
"last_update": {
"date": "2021-10-01 16:41:27.000000",
"timezone_type": 3,
"timezone": "UTC"
}
},
{
"label": "2",
"value": "5.20",
"probability": "19.23%",
"dp3": "5.200",
"american": 420,
"factional": null,
"winning": null,
"handicap": null,
"total": null,
"bookmaker_event_id": null,
"last_update": {
"date": "2021-10-01 16:41:27.000000",
"timezone_type": 3,
"timezone": "UTC"
}
}
]
}
},
{
"id": 70,
"name": "Pncl",
"odds": {
"data": [
{
"label": "1",
"value": "1.65",
"probability": "60.61%",
"dp3": "1.645",
"american": -154,
"factional": null,
"winning": null,
"handicap": null,
"total": null,
"bookmaker_event_id": null,
"last_update": {
"date": "2021-10-01 16:59:18.000000",
"timezone_type": 3,
"timezone": "UTC"
}
},
{
"label": "X",
"value": "4.20",
"probability": "23.81%",
"dp3": "4.200",
"american": 320,
"factional": null,
"winning": null,
"handicap": null,
"total": null,
"bookmaker_event_id": null,
"last_update": {
"date": "2021-10-01 16:59:18.000000",
"timezone_type": 3,
"timezone": "UTC"
}
},
{
"label": "2",
"value": "5.43",
"probability": "18.42%",
"dp3": "5.430",
"american": 443,
"factional": null,
"winning": null,
"handicap": null,
"total": null,
"bookmaker_event_id": null,
"last_update": {
"date": "2021-10-01 16:59:18.000000",
"timezone_type": 3,
"timezone": "UTC"
}
}
]
}
}
]
}
}
],
"meta": {
"plans": [
{
"name": "Football Free Plan",
"features": "Standard",
"request_limit": "180,60",
"sport": "Soccer"
}
],
"sports": [
{
"id": 1,
"name": "Soccer",
"current": true
}
]
}
}
All columns name contains the name of the bookmaker plus the label value.
I would take the value in label and use it as column name with the name of the bookmaker in name
. Then the float
in value
use it as row of the dataframe
Here the Expected Output
1_LadBrokes X_LadBrokes 2_LadBrokes last_update_LadBrokes 1_Pncl X_Pncl 2_Pncl last_update_Pncl
0 1.61 3.9 5.2 2021-10-01 16:41:27.000000 1.65 4.2 5.43 2021-10-01 16:59:18.000000
CodePudding user response:
You can achieve it like so using json_normalize
apply
.
def set_values(x):
data = x["odds.data"]
label = data.get("label")
value = data.get("value")
last_update_date = data.get("last_update").get("date")
name = x["name"]
x[f"{label}_{name}"] = value
x[f"last_update_{name}"] = last_update_date
return x
df = (
pd.json_normalize(data["data"], record_path=["bookmaker", "data"])
.explode("odds.data")
.apply(lambda x: set_values(x), axis=1)
.drop(["odds.data", "id", "name"], axis=1)
.ffill()
.bfill()
.head(1)
)
In [39]: df
Out[39]:
1_Ladbrokes 1_Pncl 2_Ladbrokes 2_Pncl X_Ladbrokes X_Pncl last_update_Ladbrokes last_update_Pncl
0 1.61 1.65 5.20 5.43 3.90 4.20 2021-10-01 16:41:27.000000 2021-10-01 16:59:18.000000
CodePudding user response:
Use pd.json_normalize
and create two subdataframes for value
and last_update
them join them.
out = pd.json_normalize(
data=data['data'],
record_path=['bookmaker', 'data', 'odds', 'data'],
meta=[['bookmaker', 'data', 'name']]
)[['label', 'value', 'last_update.date', 'bookmaker.data.name']]
df1 = out.set_index(out['label'] '_' out['bookmaker.data.name'])['value']
df2 = out.set_index('bookmaker.data.name')['last_update.date'] \
.add_prefix('last_update_').drop_duplicates()
df = pd.concat([df1, df2]).to_frame().T
Output:
>>> df
1_Ladbrokes_Ladbrokes X_Ladbrokes_Ladbrokes 2_Ladbrokes_Ladbrokes 1_Pncl_Pncl X_Pncl_Pncl 2_Pncl_Pncl last_update_Ladbrokes last_update_Pncl
0 1.61 3.90 5.20 1.65 4.20 5.43 2021-10-01 16:41:27.000000 2021-10-01 16:59:18.000000