I'm looking to print the output of this script to a new CSV. I'm new to this and trying to understand the different pieces, but I've tried two different print methods (each of which work when I don't include the rest of my script). Neither seem to be running?
Is there something off elsewhere? Or something I can't do here? Just not understanding why both work elsewhere.
import requests
import pandas as pd
import csv
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
}
urls = [
'https://stats.ncaa.org/game/play_by_play/12465',
'https://stats.ncaa.org/game/play_by_play/12755',
'https://stats.ncaa.org/game/play_by_play/12640',
'https://stats.ncaa.org/game/play_by_play/12290',
]
s = requests.Session()
s.headers.update(headers)
for url in urls:
r = s.get(url)
dfs = pd.read_html(r.text).to_csv('out.csv', index=False)
len(dfs)
for df in dfs:
print(df)
print('___________')
Also trying:
import requests
import pandas as pd
import csv
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
}
urls = [
'https://stats.ncaa.org/game/play_by_play/12465',
'https://stats.ncaa.org/game/play_by_play/12755',
'https://stats.ncaa.org/game/play_by_play/12640',
'https://stats.ncaa.org/game/play_by_play/12290',
]
s = requests.Session()
s.headers.update(headers)
for url in urls:
r = s.get(url)
dfs = pd.read_html(r.text)
len(dfs)
for df in dfs:
with open('pbptest.csv', 'w') as f:
writer = csv.writer(f)
writer.writerow(df)
print(df)
print('___________')
CodePudding user response:
Run my code and see the output whether it meets your expectation or not
import requests
import pandas as pd
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
}
urls = [
'https://stats.ncaa.org/game/play_by_play/12465',
'https://stats.ncaa.org/game/play_by_play/12755',
'https://stats.ncaa.org/game/play_by_play/12640',
'https://stats.ncaa.org/game/play_by_play/12290']
s = requests.Session()
s.headers.update(headers)
d=[]
for url in urls:
r = s.get(url)
dfs = pd.read_html(r.text)
for df in dfs:
d.append(df)
new_df=pd.concat(d).reset_index(drop=True).to_csv('out.csv', index=False)
#print(new_df)
CodePudding user response:
Presumably, you want individual csv's. If so, name the output separately.
import requests
import pandas as pd
import csv
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
}
urls = [
'https://stats.ncaa.org/game/play_by_play/12465',
'https://stats.ncaa.org/game/play_by_play/12755',
'https://stats.ncaa.org/game/play_by_play/12640',
'https://stats.ncaa.org/game/play_by_play/12290',
]
s = requests.Session()
s.headers.update(headers)
for url in urls:
r = s.get(url)
dfs = pd.read_html(r.text)
i = 1
for df in dfs:
df.to_csv(f'out_{i}.csv', index=False )
i =1
If you want all data in one csv, instead go for:
import requests
import pandas as pd
import csv
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
}
urls = [
'https://stats.ncaa.org/game/play_by_play/12465',
'https://stats.ncaa.org/game/play_by_play/12755',
'https://stats.ncaa.org/game/play_by_play/12640',
'https://stats.ncaa.org/game/play_by_play/12290',
]
s = requests.Session()
s.headers.update(headers)
for url in urls:
r = s.get(url)
dfs = pd.read_html(r.text)
for df in dfs:
df.to_csv('out.csv', mode='a', index=False )