How to navigate through each page without using driver.current_url? In my full code, I get a bunch of errors once I navigate through the page for a loop. Without it, it runs fine but can only go through one page. I want to navigate through as many pages. Any help appreciated, thanks.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
driver_service = Service(executable_path="C:\Program Files (x86)\chromedriver.exe")
driver = webdriver.Chrome(service=driver_service)
driver.maximize_window() # load web driver
wait = WebDriverWait(driver, 5)
url_test = driver.get('https://www.seek.com.au/data-jobs-in-information-communication-technology/in-All-Perth-WA')
url_template = driver.current_url
template = url_template '?page={}'
for page in range(2,5):
link_job = [x.get_attribute('href') for x in driver.find_elements(By.XPATH, "//a[@data-automation='jobTitle']")]
for job in link_job:
driver.get(job)
try:
quick_apply = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, "(//a[@data-automation='job-detail-apply' and @target='_self'])")))
quick_apply.click()
#sleep(3)
except:
print("No records found " job)
pass
sleep(3)
driver.get(template.format(page))
CodePudding user response:
If I understand you correctly you want to determine dynamically how many pages there are and loop over each of them. I have managed to achieve this by using a while loop and look on each page if the "Next" button at the bottom is visible. If not, the last page was reached and you can exit the loop.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from time import sleep
driver_service = Service(executable_path="C:\\Users\\Stefan\\bin\\chromedriver.exe")
driver = webdriver.Chrome(service=driver_service)
driver.maximize_window() # load web driver
wait = WebDriverWait(driver, 5)
url_test = driver.get('https://www.seek.com.au/data-jobs-in-information-communication-technology/in-All-Perth-WA')
url_template = driver.current_url
template = url_template '?page={}'
page = 1
while True:
# check if "Next" button is visible
# -> if not, the last page was reached
try:
driver.find_element(By.XPATH, "//a[@title='Next']")
except:
# last page reached
break
link_job = [x.get_attribute('href') for x in driver.find_elements(By.XPATH, "//a[@data-automation='jobTitle']")]
for job in link_job:
driver.get(job)
try:
quick_apply = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, "(//a[@data-automation='job-detail-apply' and @target='_self'])")))
quick_apply.click()
#sleep(3)
except:
print("No records found " job)
pass
sleep(3)
page = 1
driver.get(template.format(page))
driver.close()
CodePudding user response:
Seems your problem is with StaleElementException
when you getting back from job page to jobs search results page.
The simplest approach to overcome this problem is to keep the jobs search results page url.
Actually I changed your code only with this point and it works.
I also changed driver.find_elements(By.XPATH, "//a[@data-automation='jobTitle']")
with wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//a[@data-automation='jobTitle']")))
for better performance.
The code below works, but the web site itself responds badly.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
wait = WebDriverWait(driver, 10)
url = 'https://www.seek.com.au/data-jobs-in-information-communication-technology/in-All-Perth-WA?page={p}'
for p in range(1,20):
driver.get(url)
link_job = [x.get_attribute('href') for x in wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//a[@data-automation='jobTitle']")))]
for job in link_job:
driver.get(job)
try:
wait.until(EC.element_to_be_clickable((By.XPATH, "(//a[@data-automation='job-detail-apply' and @target='_self'])"))).click()
print("applied")
except:
print("No records found " job)
pass
driver.get(url)