I have the following code:
# Import the necessary modules from the Selenium library
import selenium.webdriver.common.by
from selenium import webdriver, common
# Set up the headless browser
# Headless browser is a web browser without a graphical user interface
options = webdriver.ChromeOptions()
options.add_argument("--headless")
# Create a new instance of the Chrome browser
# This is used to open the website specified in the `get()` method
browser = webdriver.Chrome(options=options)
# Open the specified website
browser.get("https://dex.pokemonshowdown.com/pokemon/")
# Find the element on the page with the specified class name
# This element represents a chart of Pokemon on the page
chart = browser.find_element(by=selenium.webdriver.common.by.By.CLASS_NAME, value='utilichart')
# Find all the `li` elements within the chart element
# These elements represent individual rows in the chart
rows = chart.find_elements(by=selenium.webdriver.common.by.By.TAG_NAME, value='li')
# Loop through each row in the chart
for row in rows:
try:
# Find the `a` element within the row
# This element represents a link to a Pokemon in the chart
poke = row.find_element(by=selenium.webdriver.common.by.By.TAG_NAME, value='a')
except selenium.common.exceptions.NoSuchElementException:
# If the element is not found, skip to the next iteration
continue
# Get the `data-entry` attribute of the link element
# This attribute contains the name of the Pokemon
poke = poke.get_attribute('data-entry')[8:]
# Print the name of the Pokemon
print(poke)
The problem is that despite there are like thousands of <li> elements inside the chart, only the first 21 are picked.
I've tryed to get the full page html using browser.page
*source
*and analyzing it line by line. It results that even that doesn't get the full page. Also, the page_source I get has a notizable lack of '\n's. I've tryed to give it some time for the page to charge using browser.implicitly_wait()
but it doesn't work.
CodePudding user response:
Problem is that you must do scroll till you are at the end of the page, for example:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome()
url = 'https://dex.pokemonshowdown.com/pokemon/'
driver.get(url)
# We save the text input and we press key for going down till the end of the page
text_input = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, "//*[@class='textbox searchbox']")))
for i in range(1, 100):
text_input.send_keys(Keys.PAGE_DOWN);
# Now all pokemon are loaded, We save the pokemon list
pokemons = WebDriverWait(driver, 30).until(EC.presence_of_all_elements_located((By.XPATH, "//li/a[contains(@href,'/pokemon/')]")))
# We make a loop once per pokemon in the loop
for i in range(1, len(pokemons)):
# We save name
name = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, f"(//li/a[contains(@href,'/pokemon/')])[{i}]/span[3]"))).text
print(name)