The following code gets player data but each dataset is different. The first data it sees is the quarterback data, so it uses these columns for all the data going forward. How can I change the header so that for every different dataset it encounters, the correct headers are used with the correct data?
import pandas as pd
import csv
from pprint import pprint
from bs4 import BeautifulSoup
import requests
url = 'https://www.espn.com/nfl/boxscore/_/gameId/401326313'# Create object page
soup = BeautifulSoup(requests.get(url).content, "html.parser")
rows = soup.select("table.mod-data tr")
#rows = soup.find_all("table.mod-data tr")
headers = [header.get_text(strip=True).encode("utf-8") for header in rows[0].find_all("th")]
data = [dict(zip(headers, [cell.get_text(strip=True).encode("utf-8") for cell in row.find_all("td")]))
for row in rows[1:]]
df = pd.DataFrame(data)
df.to_csv('_Data_{}.csv'.format(pd.datetime.now().strftime("%Y-%m-%d %H%M%S")),index=False)
# see what the data looks like at this point
pprint(data)
CodePudding user response:
Here is my attempt. A few things to note. I am not printing to CSV but just showing you the dataframes with the correct header information, you can handle the CSV output later.
You press enter after running the program to see the next tables with different headers.
import pandas as pd
import csv
from pprint import pprint
from bs4 import BeautifulSoup
import requests
#Made this function because the rows we want for different headers are different.
def get_individual_table(headers, rows, start,end):
'''for header in headers:
print(header)'''
data = [dict(zip(headers, [cell.get_text(strip=True).encode("utf-8") for cell in row.find_all("td")]))
for row in rows[start 1:end]]
df = pd.DataFrame(data)
print(df)
url = 'https://www.espn.com/nfl/boxscore/_/gameId/401326313'# Create object page
soup = BeautifulSoup(requests.get(url).content, "html.parser")
rows = soup.select("table.mod-data tr")
#rows = soup.find_all("table.mod-data tr")
#Only some of the row indicies have correct header information, we find them here.
correct_row_indices = []
for idx, row in enumerate(rows):
#print(idx, row.find_all("th"))
if len(row.find_all("th"))>0:
correct_row_indices.append(idx)
print(correct_row_indices)
headers = [header.get_text(strip=True).encode("utf-8") for header in rows[0].find_all("th")]
for i in range(len(correct_row_indices)-1):
headers = [header.get_text(strip=True).encode("utf-8") for header in rows[correct_row_indices[i]].find_all("th")]
#print(headers)
#Use the function to get the tables individually - you can transform to CSV later.
get_individual_table(headers, rows, correct_row_indices[i], correct_row_indices[i 1])
#Press enter to see each individual table.
input()
#df.to_csv('_Data_{}.csv'.format(pd.datetime.now().strftime("%Y-%m-%d %H%M%S")),index=False)
# see what the data looks like at this point
#pprint(data)
As you can see, we get each distinct DataFrame with the different headers as necessary. Observe how each DataFrame matches the table on the ESPN site:
You can choose how to process them individually later in the loop logic. Not all tables are shown. Please keep pressing enter after initially running the program and all the table information on that page will eventually display.
CodePudding user response:
As mentioned expected result is not that clear, but if you just wanna read the tables use pandas.read_html
to achieve your goal - index_col=0
avoids that the first column, that has no header is named Unnamed_0.
pd.read_html('https://www.espn.com/nfl/boxscore/_/gameId/401326313',index_col=0)
Example
import pandas as pd
for table in pd.read_html('https://www.espn.com/nfl/boxscore/_/gameId/401326313',index_col=0):
pd.DataFrame(table).to_csv('FILENAME')
As alternative you can reset_index()
and use to_csv(index=False)
:
pd.DataFrame(table).rename_axis('').reset_index().to_csv('FILENAME',index=False)
Output of your csv files
,1,2,3,4,T
MIA,7,3,7,0,17
NE,0,10,3,3,16
,C/ATT,YDS,AVG,TD,INT,SACKS,QBR,RTG
Tua TagovailoaT. Tagovailoa,16/27,202,7.5,1,1,2-17,47.5,79.6
TEAM,16/27,185,7.5,1,1,2-17,--,79.6
,C/ATT,YDS,AVG,TD,INT,SACKS,QBR,RTG
Mac JonesM. Jones,29/39,281,7.2,1,0,1-13,76.9,102.6
TEAM,29/39,268,7.2,1,0,1-13,--,102.6
,CAR,YDS,AVG,TD,LONG
Myles GaskinM. Gaskin,9,49,5.4,0,15
Malcolm BrownM. Brown,5,16,3.2,0,5
Jacoby BrissettJ. Brissett,2,4,2.0,0,2
Salvon AhmedS. Ahmed,3,4,1.3,0,8
Tua TagovailoaT. Tagovailoa,4,1,0.3,1,3
TEAM,23,74,3.2,1,15
,CAR,YDS,AVG,TD,LONG
Damien HarrisD. Harris,23,100,4.3,0,35
James WhiteJ. White,4,12,3.0,0,10
Jonnu SmithJ. Smith,1,6,6.0,0,6
Brandon BoldenB. Bolden,1,5,5.0,0,5
Rhamondre StevensonR. Stevenson,1,2,2.0,0,2
TEAM,30,125,4.2,0,35
,REC,YDS,AVG,TD,LONG,TGTS
DeVante ParkerD. Parker,4,81,20.3,0,30,7
Jaylen WaddleJ. Waddle,4,61,15.3,1,36,5
Myles GaskinM. Gaskin,5,27,5.4,0,12,5
Salvon AhmedS. Ahmed,2,24,12.0,0,18,3
Durham SmytheD. Smythe,1,9,9.0,0,9,2
Albert WilsonA. Wilson,0,0,0.0,0,0,2
Mike GesickiM. Gesicki,0,0,0.0,0,0,3
TEAM,16,202,12.6,1,36,27
,REC,YDS,AVG,TD,LONG,TGTS
Nelson AgholorN. Agholor,5,72,14.4,1,25,7
James WhiteJ. White,6,49,8.2,0,26,7
Jakobi MeyersJ. Meyers,6,44,7.3,0,22,9
Jonnu SmithJ. Smith,5,42,8.4,0,11,5
Hunter HenryH. Henry,3,31,10.3,0,16,3
Kendrick BourneK. Bourne,1,17,17.0,0,17,3
Damien HarrisD. Harris,2,17,8.5,0,9,3
Rhamondre StevensonR. Stevenson,1,9,9.0,0,9,1
TEAM,29,281,9.7,1,26,38
,FUM,LOST,REC
Xavien HowardX. Howard,0,0,1
Zach SielerZ. Sieler,0,0,1
TEAM,0,0,2
,FUM,LOST,REC
David AndrewsD. Andrews,0,0,1
Damien HarrisD. Harris,1,1,0
Rhamondre StevensonR. Stevenson,1,1,0
Jonnu SmithJ. Smith,1,0,1
Mac JonesM. Jones,1,0,0
TEAM,4,2,2
,tackles,tackles,tackles,tackles,misc,misc,misc,misc
,TOT,SOLO,SACKS,TFL,PD,QB HTS,TD,Unnamed: 8_level_1
Jerome BakerJ. Baker,12,9,0,0,0,0,0,
Eric RoweE. Rowe,9,6,0,0,0,0,0,
Byron JonesB. Jones,6,5,0,0,1,0,0,
Nik NeedhamN. Needham,6,5,0,0,0,0,0,
Sam EguavoenS. Eguavoen,6,2,0,0,0,3,0,
Xavien HowardX. Howard,5,4,0,0,0,0,0,
Jason McCourtyJ. McCourty,5,3,0,0,1,0,0,
Brennan ScarlettB. Scarlett,5,2,0,0,1,1,0,
Andrew Van GinkelA. Van Ginkel,5,2,0,0,0,1,0,
John JenkinsJ. Jenkins,4,4,0,0,0,0,0,
Emmanuel OgbahE. Ogbah,3,3,0,1,1,1,0,
Zach SielerZ. Sieler,3,2,0,1,0,0,0,
Christian WilkinsC. Wilkins,3,2,0,0,0,1,0,
Elandon RobertsE. Roberts,2,2,0,0,1,1,0,
Jamal PerryJ. Perry,2,2,0,0,0,0,0,
Brandon JonesB. Jones,2,2,0,0,0,0,0,
Jevon HollandJ. Holland,2,2,0,0,0,0,0,
Adam ButlerA. Butler,2,1,0,0,0,0,0,
Mack HollinsM. Hollins,2,0,0,0,0,0,0,
TeamTeam,1,1,1,0,0,0,0,
Mike GesickiM. Gesicki,1,1,0,0,0,0,0,
Durham SmytheD. Smythe,1,0,0,0,0,0,0,
Jaelan PhillipsJ. Phillips,0,0,0,0,0,1,0,
TEAM,87,60,1,2,5,9,0,
,tackles,tackles,tackles,tackles,misc,misc,misc,misc
,TOT,SOLO,SACKS,TFL,PD,QB HTS,TD,Unnamed: 8_level_1
Kyle DuggerK. Dugger,7,6,0,1,0,0,0,
Devin McCourtyD. McCourty,7,4,0,0,0,0,0,
Ja'Whaun BentleyJ. Bentley,4,4,0,1,0,0,0,
Matthew JudonM. Judon,4,3,0,1,0,1,0,
Lawrence GuyL. Guy,4,2,0,0,0,1,0,
Dont'a HightowerD. Hightower,4,2,0,0,0,0,0,
J.C. JacksonJ.C. Jackson,3,3,0,0,1,0,0,
Kyle Van NoyK. Van Noy,3,2,1,1,1,1,0,
Adrian PhillipsA. Phillips,3,2,0,2,0,0,0,
Davon GodchauxD. Godchaux,3,2,0,0,0,0,0,
Jalen MillsJ. Mills,2,2,0,0,1,0,0,
Josh UcheJ. Uche,1,1,1,1,0,1,0,
Carl DavisC. Davis,1,1,0,0,0,0,0,
Chase WinovichC. Winovich,1,1,0,0,0,0,0,
Joejuan WilliamsJ. Williams,1,1,0,0,0,0,0,
Christian BarmoreC. Barmore,1,0,0,0,0,0,0,
Jonathan JonesJ. Jones,0,0,0,0,1,0,0,
TEAM,49,36,2,7,4,4,0,
,INT,YDS,TD
No Miami Interceptions,,,
,INT,YDS,TD
Jonathan JonesJ. Jones,1,0,0
TEAM,1,0,0
,NO,YDS,AVG,LONG,TD
No Miami Kick Returns,,,,,
,NO,YDS,AVG,LONG,TD
Brandon BoldenB. Bolden,1,23,23.0,23,0
Gunner OlszewskiG. Olszewski,1,17,17.0,17,0
TEAM,2,40,20.0,23,0
,NO,YDS,AVG,LONG,TD
Jakeem Grant Sr.J. Grant Sr.,1,18,18.0,18,0
TEAM,1,18,18.0,18,0
,NO,YDS,AVG,LONG,TD
Gunner OlszewskiG. Olszewski,3,20,6.7,14,0
TEAM,3,20,6.7,14,0
,FG,PCT,LONG,XP,PTS
Jason SandersJ. Sanders,1/1,100.0,48,2/2,5
TEAM,1/1,100.0,48,2/2,5
,FG,PCT,LONG,XP,PTS
Nick FolkN. Folk,3/3,100.0,42,1/1,10
TEAM,3/3,100.0,42,1/1,10
,NO,YDS,AVG,TB,In 20,LONG
Michael PalardyM. Palardy,4,180,45.0,1,0,52
TEAM,4,180,45.0,1,0,52
,NO,YDS,AVG,TB,In 20,LONG
Jake BaileyJ. Bailey,2,99,49.5,1,0,62
TEAM,2,99,49.5,1,0,62
TEAM,W,L,T,PCT,PF,PA
Buffalo,9,6,0,0.6,427,264
New England,9,6,0,0.6,388,260
Miami,8,7,0,0.533,305,315
New York,4,11,0,0.267,276,449