Good day everyone:
I’d like to get the basketball game data from the web include league , date, time and score ….
The first level for loop works fine to get every league title
for league in leagues:
But the second level for loop
for row in _rows:
I always get all leagues rows ,I just need data for league by league
What should I do to fix it?
Any help will greatly appreciated.
from selenium import webdriver
#from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.action_chains import ActionChains
import time
from selenium.common.exceptions import NoSuchElementException
driver = webdriver.Chrome()
driver.set_window_size(1500,1350)
# open url (sorry for the url , cause system always report its a spam)
driver.get("https://" "we" "b2." "sa8" "8" "88.n" "et" "/sp" "ort/Ga" "mes.aspxdevice=pc")
# jump to basketball
locator = (By.XPATH, '//*[@id="menuList"]/div/ul/li[3]/div[2]/a[1]')
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
actions = ActionChains(driver)
actions.click(pointer).perform()
time.sleep(1)
# date menu
locator = (By.XPATH, '//*[@id="chooseDate"]')
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
actions = ActionChains(driver)
actions.click(pointer).perform()
# jump to date 1
locator = (By.XPATH, '//*[@id="dateOption"]/a[1]/span[1]')
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
actions = ActionChains(driver)
actions.click(pointer).perform()
# close AD by double clicl
locator = (By.ID, 'btn_close')
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
actions = ActionChains(driver)
actions.click(pointer).perform()
actions = ActionChains(driver)
actions.click(pointer).perform()
# list all leagues schedule
leagues = []
leagues = driver.find_elements(By.XPATH, '//*[@id="scheduleBottom"]/table[*]')
for league in leagues:
#print("Block.text=",Block.text,"\n")
#_rows = Block.find_elements(By.TAG_NAME, "tr")
league_Title = league.find_element(By.TAG_NAME ,'caption')
_rows = []
_rows = league.find_elements(By.XPATH, "//*[contains(@id, '_mainRow') or contains(@id, '_secondRow')]")
print("\nleague : ",league_Title.text, 'len(_rows)=',len(_rows))
for row in _rows:
print(league_Title,row.text) #," / _rows=",_rows)
# first_rows = Block.find_element(By.XPATH, "//*[contains(@id, '_mainRow')]")
# second_rows = Block.find_element(By.XPATH, "//*[contains(@id, '_secondRow')]")
print("\trow : ",row.text)
time.sleep(1)
time.sleep(120)
driver.quit()
CodePudding user response:
I think find_element() or find() is for Only one element on page. You will get just the first element of list of elements, if you use find_element() for multi elements on page. And find_elements or findAll() is for all elements on page. This function will return data in Array format. hope this help you some.
CodePudding user response:
I can't run code because page shows Error 404
.
You have to use dot .
at the beginning of xpath
to use path relative to league
_rows = league.find_elements(By.XPATH, ".//...rest...") # <-- dot before `//`
You use absolute xpath
and it searchs in full HTML.