I want to scrape all tables from a site. The automation is required to reach the tables, so you might consider that. My attempt with research is the following:
from selenium.webdriver import Firefox
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
import time
driver = Firefox(executable_path='/Users/.../PycharmProjects/Sportwinner/geckodriver')
driver.get("https://bskv.sportwinner.de/")
element = driver.find_element(By.ID, "id-button-einstellungen")
actions = ActionChains(driver)
actions.move_to_element(element).perform()
driver.find_element(By.ID, "id-button-einstellungen").click()
element = driver.find_element(By.CSS_SELECTOR, "body")
actions = ActionChains(driver)
actions.move_to_element(element).perform()
driver.find_element(By.ID, "id-klub-name").click()
driver.find_element(By.ID, "id-klub-name").send_keys("Dreieck Schweinfurt")
driver.find_element(By.ID, "id-button-einstellungen-option-ok").click()
time.sleep(1)
driver.find_element(By.ID, "id-dropdown-liga").click()
driver.find_element(By.LINK_TEXT, "Letzte Spielwoche").click()
tableContent = driver.find_elements_by_css_selector("id-table-spiel tr")
for row in tableContent:
print(row.text)
Since I just heard about Selenium a couple of hours ago, I am a total noobie. I have no clue if this works, because I don't see any output. Is anybody able to help me with my attempt (I guess it's not correct) and how it is possible for me to see the result? I am using PyCharm for compiling.
CodePudding user response:
Once you reach to the desired page, by doing Letzte Spielwoche" in the drop down menu "Eine Liga auswählen" to see the tables
You can use this code :
wait = WebDriverWait(driver, 30)
table = wait.until(EC.visibility_of_element_located((By.ID, "id-table-spiel")))
size_of_table = driver.find_elements(By.XPATH, "//table[@id='id-table-spiel']//descendant::tr")
j = 1
for i in range(len(size_of_table)):
element = driver.find_elements(By.XPATH, f"(//table[@id='id-table-spiel']//descendant::tr)[{j}]")
driver.execute_script("arguments[0].scrollIntoView(true);", element)
print(element.get_attribute('innerText'))
j = j 1
Imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
CodePudding user response:
The execution was so fast that it was not able to extract details from the table.
You need apply Implicit wait
or Explicit waits
so that the table data shows up and can be able to extract details.
# Imports Required
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
...
driver = webdriver.Chrome(executable_path="chromedriver.exe") # Have tried in Chrome
driver.implicitly_wait(20)
# Or apply Explicit wait like below.
wait = WebDriverWait(driver,30)
wait.until(EC.presence_of_element_located((By.XPATH,"//table[@id='id-table-spiel']//tbody/tr")))
tableContent = driver.find_elements_by_xpath("//table[@id='id-table-spiel']//tbody/tr//div")
for row in tableContent:
print(row.get_attribute("innerText")) # row.text works too.
You can try like this:
tableContent = driver.find_elements_by_xpath("//table[@id='id-table-spiel']//tbody/tr//a")
for i in range(len(tableContent)):
tableContent[i].click() # Clicks on the " " icon
innerrows = driver.find_elements_by_xpath("//tr[@class='detail-view'][{}]//tr".format(i 1)) #Find the rows inside the 1st and 2nd row.
for inrow in innerrows:
elemnets = inrow.find_elements_by_xpath(".//div") # Data are in "div" tags
data = [] #Collect each row in a list
for j in elemnets:
data.append(j.text)
print(data)
['', '', '1', '', '2', '', '3', '', '4', '', 'Kegel', '', 'SP', '', 'MP', '', '', '', 'MP', '', 'SP', '', 'Kegel', '', '4', '', '3', '', '2', '', '1', '', '', '']
['Krug, Tobias', '141', '141', '136', '86', '141', '152', '124', '131', 'Brandl, Gerald']
['Keller, Ralf', '148', '135', '139', '130', '140', '111', '154', '145', 'Haschke, Jens']