Home > other >  Loop through pages with a static URL
Loop through pages with a static URL

Time:03-03

I have this line of code below that scrapes/prints 250 stock symbols from page 1.

print([my_elem.text for my_elem in WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "table[id^='gridview-1070-record']")))])

Then I have this line of code to click next page and bring me to page 2.

wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="button-1157"]'))).click()

Then I scrape/print the next 250 stock symbols from Page 2 and keep going through all the pages by repeating the 2 lines of code.

print([my_elem.text for my_elem in WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "table[id^='gridview-1070-record']")))])

Can someone show me how to code a loop so I don't have to keep listing these 2 lines for all 60 pages?

Full Code

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
import pandas as pd
import requests
  
options = webdriver.ChromeOptions() 
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
ser = Service("./chromedriver.exe")
browser = driver = webdriver.Chrome(service=ser)

driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
  "source": """
    Object.defineProperty(navigator, 'webdriver', {
      get: () => undefined
    })
  """
})
driver.execute_cdp_cmd("Network.enable", {})
driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36'})
wait = WebDriverWait(driver, 30)
driver.get("https://stockrover.com")
wait.until(EC.visibility_of_element_located((By.XPATH, "/html/body/div[1]/div/section[2]/div/ul/li[2]"))).click()
user = driver.find_element(By.NAME, "username")
password = driver.find_element(By.NAME, "password")
user.clear()
user.send_keys("vibajajo64")
password.clear()
password.send_keys("vincer64")
driver.find_element(By.NAME, "Sign In").click()
wait = WebDriverWait(driver, 30)
print([my_elem.text for my_elem in WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "table[id^='gridview-1070-record']")))])
wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="button-1157"]'))).click()
print([my_elem.text for my_elem in WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "table[id^='gridview-1070-record']")))])
wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="button-1157"]'))).click()
print([my_elem.text for my_elem in WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "table[id^='gridview-1070-record']")))])

CodePudding user response:

You can try like below.

As already said you can use for loop to loop through pages.

# Get the number of pages - 18
pages = driver.find_element(By.XPATH,"//div[contains(@id,'tbtext')][2]").text.split()
num_pages = int(pages[1])

# Iterate over that number of pages
for i in range(num_pages-1):
    print([my_elem.text for my_elem in WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "table[id^='gridview-1070-record']")))])
    wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="button-1157"]'))).click()

Or you can keep trying to extract details and click on Next button until the next button is disabled.

try:
    while True:
        # Print the stock symbols
        print([my_elem.text for my_elem in WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "table[id^='gridview-1070-record']")))])
        # Click on next page button
        wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="button-1157"]'))).click()
except:
        print("Next button disabled")

Update to store all the stocks in one list.

stocks_list = []
try:
    while True:
        # Print the stock symbols
        stocks_list.extend([my_elem.text for my_elem in WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "table[id^='gridview-1070-record']")))])
        # Click on next page button
        wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="button-1157"]'))).click()
except:
        print("Next button disabled")
print(stocks_list) # Prints entire list of stocks
  • Related