Here's the full code (I added Adblock plus as a Chrome extension, which should be configured to test locally):
import pandas as pd
import bs4
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
import time
import os
#activate adblock plus
path_to_extension = '/home/andreas/.config/google-chrome/Default/Extensions/cfhdojbkjhnklbpkdaibdccddilifddb/3.11.4_0'
options = Options()
options.add_argument('load-extension=' path_to_extension)
#uses Chrome driver in usr/bin/ from https://chromedriver.chromium.org/downloads
driver = webdriver.Chrome(options=options)
#wait and switching back to tab with desired source
time.sleep(5)
driver.switch_to.window(driver.window_handles[0])
NO_OF_PREV_SEASONS = 5
df = pd.DataFrame()
urls = ['https://fbref.com/en/squads/247c4b67/Arminia-Stats']
for url in urls:
driver.get(url)
html = driver.page_source
soup = bs4.BeautifulSoup(html, 'html.parser')
#click button -> accept cookies
element = driver.find_element(By.XPATH, '//button[text()="AGREE"]')
element.click()
for i in range(NO_OF_PREV_SEASONS):
elements = driver.find_elements(By.XPATH, '//button[text()="Get table as CSV (for Excel)"]')
for element in elements:
element.click()
#todo: get data
#click button -> navigate to next page
time.sleep(5)
element = driver.find_element(By.LINK_TEXT, "Previous Season")
element.click()
driver.quit()
CodePudding user response:
button
is inside the drop-down list (i.e. <span>Share & Export</span>
) so you need to hover it first.
e.g.
from selenium.webdriver.common.action_chains import ActionChains
action_chain = ActionChains(driver)
hover = driver.find_element_by_xpath("// span[contains(text(),'Share & Export')]")
action_chain.move_to_element(hover).perform() # hover to show drop down list
driver.execute_script("window.scrollTo(0, 200)") # scroll down a bit
time.sleep(1) # wait for scrolling
button = driver.find_element_by_xpath("// button[contains(text(),'Get table as CSV (for Excel)')]")
action_chain.move_to_element(button).click().perform() # move to button and click
time.sleep(3)
CodePudding user response:
This also happens to me sometimes. One way to overcome this problem is by getting the X and Y coordinates of this button and clicking on it.
import pyautogui
for element in elements:
element_pos = element.location
element_size = element.size
x_coordinate, y_coordinate = elemnt_pos['x'], element_pos['y']
e_width, e_height = element_size['width'], element_size['height']
click_x = x_coordinate e_width/2
click_y = y_coordinate e_height/2
pyauotgui.click(click_x, click_y)
Other solution that you may try is to click on the tag that contains this button.
CodePudding user response:
There are several issues here:
- You have to click and open Share and Export tab and then click Get table as CSV button
- You have to scroll the page to access the non-first tables.
So, your code can be something like this:
import pandas as pd
import bs4
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
import time
import os
#activate adblock plus
path_to_extension = '/home/andreas/.config/google-chrome/Default/Extensions/cfhdojbkjhnklbpkdaibdccddilifddb/3.11.4_0'
options = Options()
options.add_argument('load-extension=' path_to_extension)
options.add_argument("window-size=1920,1080")
#uses Chrome driver in usr/bin/ from https://chromedriver.chromium.org/downloads
driver = webdriver.Chrome(options=options)
actions = ActionChains(driver)
#wait and switching back to tab with desired source
time.sleep(5)
driver.switch_to.window(driver.window_handles[0])
NO_OF_PREV_SEASONS = 5
df = pd.DataFrame()
urls = ['https://fbref.com/en/squads/247c4b67/Arminia-Stats']
for url in urls:
driver.get(url)
html = driver.page_source
soup = bs4.BeautifulSoup(html, 'html.parser')
#click button -> accept cookies
element = driver.find_element(By.XPATH, '//button[text()="AGREE"]')
element.click()
for i in range(NO_OF_PREV_SEASONS):
elements = driver.find_elements(By.XPATH, "//div[@class='section_heading_text']//li[@class='hasmore']")
for element in elements:
actions.move_to_element(element).perform()
time.sleep(0.5)
element.click()
wait.until(EC.visibility_of_element_located((By.XPATH, "//button[@tip='Get a link directly to this table on this page']"))).click()
#todo: get data