Sample of my data:
[
{
"id": "0001",
"sport_key": "americanfootball_nfl",
"sport_title": "NFL",
"commence_time": "2022-10-28T00:15:00Z",
"home_team": "Tampa Bay Buccaneers",
"away_team": "Baltimore Ravens",
"bookmakers": [
{
"key": "betonlineag",
"title": "BetOnline.ag",
"last_update": "2022-10-26T00:34:17Z",
"markets": [
{
"key": "h2h",
"outcomes": [
{
"name": "Baltimore Ravens",
"price": 1.8
},
{
"name": "Tampa Bay Buccaneers",
"price": 2.04
}
]
}
]
},
{
"key": "fanduel",
"title": "FanDuel",
"last_update": "2022-10-26T00:34:30Z",
"markets": [
{
"key": "h2h",
"outcomes": [
{
"name": "Baltimore Ravens",
"price": 1.85
},
{
"name": "Tampa Bay Buccaneers",
"price": 2.0
}
]
}
]
}
]
},
{
"id": "0002",
"sport_key": "americanfootball_nfl",
"sport_title": "NFL",
"commence_time": "2022-10-30T13:30:00Z",
"home_team": "Jacksonville Jaguars",
"away_team": "Denver Broncos",
"bookmakers": [
{
"key": "betonlineag",
"title": "BetOnline.ag",
"last_update": "2022-10-26T00:34:17Z",
"markets": [
{
"key": "h2h",
"outcomes": [
{
"name": "Denver Broncos",
"price": 2.2
},
{
"name": "Jacksonville Jaguars",
"price": 1.71
}
]
}
]
},
{
"key": "betrivers",
"title": "BetRivers",
"last_update": "2022-10-26T00:34:31Z",
"markets": [
{
"key": "h2h",
"outcomes": [
{
"name": "Denver Broncos",
"price": 2.26
},
{
"name": "Jacksonville Jaguars",
"price": 1.7
}
]
}
]
}
]
}
]
I would like for my data to look like this:
id | sport_title | home_team | away_team | bookmaker_name | market_type | home_team_odds | away_team_odds |
---|---|---|---|---|---|---|---|
0001 | NFL | Tampa Bay Buccaneers | Baltimore Ravens | belonlineag | h2h | 2.04 | 1.8 |
0001 | NFL | Tampa Bay Buccaneers | Baltimore Ravens | fanduel | h2h | 2.0 | 1.85 |
0002 | NFL | Jacksonville Jaguars | Denver Broncos | betonlineag | h2h | 1.71 | 2.2 |
0002 | NFL | Jacksonville Jaguars | Denver Broncos | betrivers | h2h | 1.7 | 2.26 |
What's tripping me up is that I don't know how to efficiently unpack the list of dictionaries and keep the data I want in a row.
CodePudding user response:
Let's use pd.json_normalize
df = pd.json_normalize(data, record_path =['bookmakers', 'markets'],
meta=['id', 'sport_title', 'home_team', 'away_team', ['bookmakers', 'key']])
df = (df.assign(home_team_odds=df['outcomes'].str[1].str['price'],
away_team_odds=df['outcomes'].str[0].str['price'])
.rename(columns={'bookmakers.key': 'bookmaker_name', 'key': 'market_type'})
.drop(columns='outcomes'))
print(df)
market_type id sport_title home_team away_team bookmaker_name home_team_odds away_team_odds
0 h2h 0001 NFL Tampa Bay Buccaneers Baltimore Ravens betonlineag 2.04 1.80
1 h2h 0001 NFL Tampa Bay Buccaneers Baltimore Ravens fanduel 2.00 1.85
2 h2h 0002 NFL Jacksonville Jaguars Denver Broncos betonlineag 1.71 2.20
3 h2h 0002 NFL Jacksonville Jaguars Denver Broncos betrivers 1.70 2.26
CodePudding user response:
Slightly different way.
df = pd.json_normalize(
data=data,
record_path=["bookmakers", "markets", "outcomes"],
meta=["id", "sport_title", "home_team", "away_team", ["markets", "key"], ["markets", "outcomes", "key"]],
).rename(columns={"markets.key": "bookmaker_name", "markets.outcomes.key": "market_type"})
df.columns = df.columns.str.split(".").str[-1]
df = df.assign(
home_team_odds=np.where(df["home_team"].eq(df["name"]), df["price"], ""),
away_team_odds=np.where(df["away_team"].eq(df["name"]), df["price"], "")
).drop(columns=["name", "price"])
df = df.groupby(
["id", "sport_title", "home_team", "away_team", "bookmaker_name", "market_type"]
).agg({"home_team_odds": "last", "away_team_odds": "first"}).reset_index()
print(df)
id sport_title home_team away_team bookmaker_name market_type home_team_odds away_team_odds
0 0001 NFL Tampa Bay Buccaneers Baltimore Ravens betonlineag h2h 2.04 1.8
1 0001 NFL Tampa Bay Buccaneers Baltimore Ravens fanduel h2h 2.0 1.85
2 0002 NFL Jacksonville Jaguars Denver Broncos betonlineag h2h 1.71 2.2
3 0002 NFL Jacksonville Jaguars Denver Broncos betrivers h2h 1.7 2.26