I came with a question regarding this project today that was answered super quickly so here I am again. The code below scrapes through the provided website, pulls the data, and adds a column for what instance of the table it is scraping. The next battle I am facing with this is getting all of the Game Recency instances loaded into the big_df with a column to replicate what the game recency drop down is currently on. If anyone could help me with the last piece to my puzzle I would grealy appreciate it.
https://www.fantasypros.com/daily-fantasy/nba/fanduel-defense-vs-position.php
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time as t
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
big_df = pd.DataFrame()
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument("window-size=1280,720")
webdriver_service = Service(r'chromedriver\chromedriver') ## path to where you saved chromedriver binary
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
wait = WebDriverWait(driver, 20)
url = "https://www.fantasypros.com/daily-fantasy/nba/fanduel-defense-vs-position.php"
driver.get(url)
sleep(60)
tables_list = wait.until(EC.presence_of_all_elements_located((By.XPATH, '//ul[@]/li')))
for x in tables_list:
x.click()
print('selected', x.text)
t.sleep(2)
table = wait.until(EC.element_to_be_clickable((By.XPATH, '//table[@id="data-table"]')))
df = pd.read_html(table.get_attribute('outerHTML'))[0]
df['Category'] = x.text.strip()
big_df = pd.concat([big_df, df], axis=0, ignore_index=True)
print('done, moving to next table')
print(big_df)
big_df.to_csv('fanduel.csv')
CodePudding user response:
This is how you might achieve your end goal:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time as t
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
big_df = pd.DataFrame()
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument("window-size=1280,720")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
wait = WebDriverWait(driver, 20)
url = "https://www.fantasypros.com/daily-fantasy/nba/fanduel-defense-vs-position.php"
driver.get(url)
select_recency_options = [x.text for x in wait.until(EC.presence_of_all_elements_located((By.XPATH, '//select[@]/option')))]
for option in select_recency_options:
select_recency = Select(WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//select[@]'))))
select_recency.select_by_visible_text(option)
print('selected', option)
t.sleep(2)
tables_list = wait.until(EC.presence_of_all_elements_located((By.XPATH, '//ul[@]/li')))
for x in tables_list:
x.click()
print('selected', x.text)
t.sleep(2)
table = wait.until(EC.element_to_be_clickable((By.XPATH, '//table[@id="data-table"]')))
df = pd.read_html(table.get_attribute('outerHTML'))[0]
df['Category'] = x.text.strip()
df['Recency'] = option
big_df = pd.concat([big_df, df], axis=0, ignore_index=True)
print('done, moving to next table')
display(big_df)
big_df.to_csv('fanduel.csv')
The result is a (bigger) dataframe:
Team PTS REB AST 3PM STL BLK TO FD PTS Category Recency
0 HOUHouston Rockets 23.54 9.10 5.10 2.54 1.88 1.15 2.65 48.55 ALL Season
1 OKCOklahoma City Thunder 22.22 9.61 5.19 2.70 1.67 1.18 2.52 47.57 ALL Season
2 PORPortland Trail Blazers 22.96 8.92 5.31 2.74 1.63 0.99 2.65 46.84 ALL Season
3 SACSacramento Kings 23.00 9.10 5.03 2.58 1.61 0.95 2.50 46.65 ALL Season
4 ORLOrlando Magic 22.35 9.39 4.94 2.62 1.57 1.04 2.50 46.36 ALL Season
... ... ... ... ... ... ... ... ... ... ... ...
715 TORToronto Raptors 23.33 13.97 2.77 0.57 0.84 1.88 3.38 49.03 C Last 30
716 NYKNew York Knicks 19.78 15.40 2.94 0.53 0.90 1.92 2.17 48.96 C Last 30
717 BKNBrooklyn Nets 19.69 13.60 3.16 0.86 1.10 2.25 2.06 48.74 C Last 30
718 BOSBoston Celtics 17.79 11.95 3.75 0.41 1.66 1.80 2.54 45.60 C Last 30
719 MIAMiami Heat 17.41 14.19 2.16 0.50 1.01 1.52 1.75 43.52 C Last 30
720 rows × 11 columns