Hello I am trying to use selenium to automatically scrape the products titles and prices, i am using ActionChains and move_to_element, but somehow it gave me timeout exception, Is there a better way to do it? titles in the tab
https://denago.com/collections/ebikes
#For Dynamic webpage, import selenium
!apt-get update
!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
!pip install selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#set up Chrome driver
options=webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
#Define web driver as a Chrome driver
driver=webdriver.Chrome('chromedriver',options=options)
driver.implicitly_wait(10)
driver.get('https://denago.com/collections/ebikes')
action = ActionChains(driver)
ourbike = WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.XPATH, "/html/body/div[6]/div/header/nav/ul/li[1]/a/span")))
ActionChains(driver).move_to_element(ourbike).perform()
Titles=driver.find_elements(By.CLASS_NAME,'mm-title')
for i in range(len(Titles)):
print(Titles[i].text)
CodePudding user response:
There are a couple of problems:
- Browser opens at the default size which is small and the element you are searching for to hover is absent on the page. So you need to set
options.add_argument('window-size=1200,1980')
. - There is a message about cookies that overlaps elements on the page. It's better to close it:
driver.find_element(By.ID, 'CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll').click()
- The element you tried to hover could not be found by the XPATH you used. It can easily be found with
(By.XPATH, '(//li[@itemid="m9RVB"])')
, but there are two such elements on the page and the first is hidden. So you need to hover the second one, so add [2] to the locator:(By.XPATH, '(//li[@itemid="m9RVB"])[2]')
So, here is the code:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#set up Chrome driver
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('window-size=1200,1980')
#Define web driver as a Chrome driver
driver = webdriver.Chrome('chromedriver', options=options)
driver.implicitly_wait(10)
driver.get('https://denago.com/collections/ebikes')
driver.find_element(By.ID, 'CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll').click()
action = ActionChains(driver)
ourbike = WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.XPATH, '(//li[@itemid="m9RVB"])[2]')))
ActionChains(driver).move_to_element(ourbike).perform()
Titles = driver.find_elements(By.CLASS_NAME, 'mm-title')
for i in range(len(Titles)):
print(Titles[i].text)
driver.quit()
CodePudding user response:
I think you are looking something like this:
# Needed libs
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# We create the driver
options=webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
# We maximize the window, because if not the page will be different
driver.maximize_window()
# We navigate to the url
driver.get('https://denago.com/collections/ebikes')
# We wait for the first title, I think it is enough
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, "(//h5)[1]")))
# We get all the titles elements
titles=driver.find_elements(By.XPATH,'//h5')
# For each title element we get the text and also we get the price
for i in range(0,len(titles)):
product_name = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, f'(//h5)[{i 1}]'))).text
product_price = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, f'(//div[@])[{i 1}]'))).text
print(f"Product {i 1}: {product_name} - Price: {product_price}")
driver.quit()
CodePudding user response:
There are 5 bikes on that page. Here is a more pythonic (and more selenium..ish) way of getting those titles (and other info on each bike, if you want):
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument("window-size=1280,720")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
wait = WebDriverWait(driver, 25)
driver.get('https://denago.com/collections/ebikes')
try:
wait.until(EC.element_to_be_clickable((By.ID, "CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll"))).click()
print('accepted cookies')
except Exception as e:
print('no cookie button!')
bikes= wait.until(EC.presence_of_all_elements_located((By.XPATH, '//div[@]//h5/a')))
for bike in bikes:
print(bike.text.strip())
Printout in terminal:
accepted cookies
DENAGO CITY MODEL 1 STEP-THRU EBIKE
DENAGO CITY MODEL 1 TOP-TUBE EBIKE
DENAGO COMMUTE MODEL 1 STEP-THRU EBIKE
DENAGO FAT TIRE STEP-THRU EBIKE
DENAGO COMMUTE MODEL 1 TOP-TUBE EBIKE
Selenium docs: https://www.selenium.dev/documentation/