I am trying to be able to go to a tournament href, input that into the url then cycle through the rounds before closing that driver and opening a new one to do the same thing. For some reason i can't figure where to put the beginning url.
from selenium import webdriver
import time
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
PATH = "C:\Program Files (x86)\Chrome\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.maximize_window()
ytournaments = ['/dpworld-tour/abu-dhabi-hsbc-championship-2021/']
roundids = [1, 2, 3, 4]
for tournamentid in ytournaments:
for roundid in roundids:
page = driver.get(f"https://www.europeantour.com{ytournaments}leaderboard?holebyhole=true&round={roundid}")
time.sleep(5)
html = driver.page_source
soup = BeautifulSoup(html, 'lxml')
Tour = 'European Tour'
Year = '2021'
tournamentm = soup.find('h1', class_='event-hero__title').text
tournament = tournamentm.strip()
coursem = soup.find('p', class_='event-hero__location').text
course = coursem.strip()
datem = soup.find('p', class_='event-hero__date').text
date = datem.strip()
dfs = pd.read_html(driver.page_source)
df = dfs[0]
ndf = np.squeeze(dfs)
data = pd.DataFrame(ndf)
data["tournament"] = tournament
data["course"] = course
data["date"] = date
data["roundid"] = roundid
data["Tour"] = Tour
data["Year"] = Year
filename = f'{tournament}_{roundid}_{Year}.csv'
data.to_csv(filename)
driver.quit()
driver.quit()
CodePudding user response:
You do not need to .quit()
the driver
with every iteration, cause you are changing its target url.
Main issue should be to construct the correct url
, so change {ytournaments}
:
page = driver.get(f"https://www.europeantour.com{ytournaments}leaderboard?holebyhole=true&round={roundid}")
to {tournamentid}
page = driver.get(f"https://www.europeantour.com{tournamentid}leaderboard?holebyhole=true&round={roundid}")