I couldn't get other "href" in other pages,but I still could get "href" in first page. What's wrong with it? Could I get total "href" in all pages if I change Xpath ?
!pip install selenium
from selenium import webdriver
import time
import pandas as pd
browser = webdriver.Chrome(executable_path='./chromedriver.exe')
browser.implicitly_wait(5)
# https://tw.mall.yahoo.com/store/屈臣氏Watsons:watsons (original page)
url = "https://tw.mall.yahoo.com/search/product?p=屈臣氏&pg=2"
browser.get(url)
# 商品連結
# 如何取得正確的Xpath ?
linkPath = "//section[contains(@class,'MainListing__StoreBoothWrap')]/div/div/div/ul/li/a"
product_links = browser.find_elements_by_xpath(linkPath)
print(len(product_links))
for link in product_links:
print(link.get_attribute("href"))
CodePudding user response:
You can use below locators to extract the product link.
Xpath:
//ul[@class='gridList']/li/a
CSS - selector:
ul.gridList > li > a
The Code:
# Imports Required
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
wait = WebDriverWait(driver,30)
for i in range(1,5): # Iterate from page 1 to 4
driver.get("https://tw.mall.yahoo.com/search/product?p=屈臣氏&pg={}".format(i))
# Wait Until the product appear
wait.until(EC.presence_of_element_located((By.XPATH,"//ul[@class='gridList']")))
# Get the products
product_links = driver.find_elements(By.XPATH,"//ul[@class='gridList']/li/a")
# Iterate over 'product_links' to get all the 'href' values
for j,link in zip(range(len(product_links)),product_links):
print(f"{j} : {link.get_attribute('href')}")