Home > OS >  How do I get API data into a Pandas DataFrame?
How do I get API data into a Pandas DataFrame?

Time:10-13

I am pulling in betting data from an API and I would like to get it into a DataFrame. I would like the DataFrame to have the following columns: [away_team, home_team, spread, overUnder] I am using the following code:

import cfbd

configuration = cfbd.Configuration()
configuration.api_key['Authorization'] = 'XXX'
configuration.api_key_prefix['Authorization'] = 'Bearer'

from __future__ import print_function
import time
import cfbd
from cfbd.rest import ApiException
from pprint import pprint

# create an instance of the API class
api_instance = cfbd.BettingApi(cfbd.ApiClient(configuration))
game_id = 56 # int | Game id filter (optional)
year = 56 # int | Year/season filter for games (optional)
week = 56 # int | Week filter (optional)
season_type = 'regular' # str | Season type filter (regular or postseason) (optional) (default to regular)
team = 'team_example' # str | Team (optional)
home = 'home_example' # str | Home team filter (optional)
away = 'away_example' # str | Away team filter (optional)
conference = 'conference_example' # str | Conference abbreviation filter (optional)

try:
    # Betting lines
    api_response = api_instance.get_lines(year=2021, week=7, season_type='regular', conference='SEC')
    pprint(api_response)
except ApiException as e:
    print("Exception when calling BettingApi->get_lines: %s\n" % e)

API Response:

[{'away_conference': 'SEC',
 'away_score': None,
 'away_team': 'Auburn',
 'home_conference': 'SEC',
 'home_score': None,
 'home_team': 'Arkansas',
 'id': 401282104,
 'lines': [{'awayMoneyline': 155,
            'formattedSpread': 'Arkansas -3.5',
            'homeMoneyline': -180,
            'overUnder': '53.5',
            'overUnderOpen': '53.0',
            'provider': 'Bovada',
            'spread': '-3.5',
            'spreadOpen': '-3.5'}],
 'season': 2021,
 'season_type': 'regular',
 'start_date': None,
 'week': 7},
 {'away_conference': 'SEC',
 'away_score': None,
 'away_team': 'Kentucky',
 'home_conference': 'SEC',
 'home_score': None,
 'home_team': 'Georgia',
 'id': 401282105,
 'lines': [{'awayMoneyline': 1000,
            'formattedSpread': 'Georgia -23.5',
            'homeMoneyline': -2200,
            'overUnder': '44.5',
            'overUnderOpen': '44.5',
            'provider': 'Bovada',
            'spread': '-23.5',
            'spreadOpen': '-23.5'}],
 'season': 2021,
 'season_type': 'regular',
 'start_date': None,
 'week': 7},
 {'away_conference': 'SEC',
 'away_score': None,
 'away_team': 'Florida',
 'home_conference': 'SEC',
 'home_score': None,
 'home_team': 'LSU',
 'id': 401282106,
 'lines': [{'awayMoneyline': -370,
            'formattedSpread': 'Florida -10.0',
            'homeMoneyline': 285,
            'overUnder': '58.5',
            'overUnderOpen': '58.0',
            'provider': 'Bovada',
            'spread': '10.0',
            'spreadOpen': '10.0'}],
 'season': 2021,
 'season_type': 'regular',
 'start_date': None,
 'week': 7},
 {'away_conference': 'SEC',
 'away_score': None,
 'away_team': 'Alabama',
 'home_conference': 'SEC',
 'home_score': None,
 'home_team': 'Mississippi State',
 'id': 401282107,
 'lines': [{'awayMoneyline': -950,
            'formattedSpread': 'Alabama -17.5',
            'homeMoneyline': 600,
            'overUnder': '57.5',
            'overUnderOpen': '59.0',
            'provider': 'Bovada',
            'spread': '17.5',
            'spreadOpen': '17.0'}],
 'season': 2021,
 'season_type': 'regular',
 'start_date': None,
 'week': 7},
 {'away_conference': 'SEC',
 'away_score': None,
 'away_team': 'Texas A&M',
 'home_conference': 'SEC',
 'home_score': None,
 'home_team': 'Missouri',
 'id': 401282108,
 'lines': [{'awayMoneyline': -310,
            'formattedSpread': 'Texas A&M -9.0',
            'homeMoneyline': 255,
            'overUnder': '60.5',
            'overUnderOpen': '61.0',
            'provider': 'Bovada',
            'spread': '9.0',
            'spreadOpen': '9.0'}],
 'season': 2021,
 'season_type': 'regular',
 'start_date': None,
 'week': 7},
 {'away_conference': 'SEC',
 'away_score': None,
 'away_team': 'Vanderbilt',
 'home_conference': 'SEC',
 'home_score': None,
 'home_team': 'South Carolina',
 'id': 401282109,
 'lines': [{'awayMoneyline': 750,
            'formattedSpread': 'South Carolina -18.5',
            'homeMoneyline': -1400,
            'overUnder': '51.0',
            'overUnderOpen': '51.0',
            'provider': 'Bovada',
            'spread': '-18.5',
            'spreadOpen': '-20.0'}],
 'season': 2021,
 'season_type': 'regular',
 'start_date': None,
 'week': 7},
 {'away_conference': 'SEC',
 'away_score': None,
 'away_team': 'Ole Miss',
 'home_conference': 'SEC',
 'home_score': None,
 'home_team': 'Tennessee',
 'id': 401282110,
 'lines': [{'awayMoneyline': -150,
            'formattedSpread': 'Ole Miss -3.0',
            'homeMoneyline': 130,
            'overUnder': '80.5',
            'overUnderOpen': '78.0',
            'provider': 'Bovada',
            'spread': '3.0',
            'spreadOpen': '3.0'}],
 'season': 2021,
 'season_type': 'regular',
 'start_date': None,
 'week': 7}]

I need help getting this output into a DataFrame. Thank you in advance.

CodePudding user response:

You could iterate over the json data, extracting the information that you need and creating a new structure to hold this data. After iterating over all your data, you can create a dataframe from what you extracted. I made an example using a dataclass to store the data you need:

import json

import pandas as pd

from dataclasses import dataclass


@dataclass
class BettingData:
    away_team: str
    home_team: str
    spread: str
    overUnder: str


json_data = json.loads(open('sample_data.json', 'r').read())

content = []
for entry in json_data:
    for line in entry['lines']:
        data = BettingData(away_team=entry['away_team'],
                           home_team=entry['home_team'],
                           spread=line['spread'],
                           overUnder=line['overUnder'])

        content.append(data)


df = pd.DataFrame(content)

print(df)

And the output is:

    away_team          home_team spread overUnder
0      Auburn           Arkansas   -3.5      53.5
1    Kentucky            Georgia  -23.5      44.5
2     Florida                LSU   10.0      58.5
3     Alabama  Mississippi State   17.5      57.5
4   Texas A&M           Missouri    9.0      60.5
5  Vanderbilt     South Carolina  -18.5      51.0
6    Ole Miss          Tennessee    3.0      80.5
  • Related