I want to scrap data from this website(Ignore the perfume that it loads when you scroll down).
For each perfume i want to get its size. In order to see its size I need to click on the perfume which leading me to another page. Assuming I can get the size of a perfume when Im in its url, How can I make a program that will give me the url of every perfume's page in the website?
This is the code that finds the perfume`s size when I Have the right url:
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
urlM = 'https://www.myperfume.co.il/155567-כל-המותגים-לגב' \
'ר?order=up_title&page=0'
scope = ["https://spreadsheets.google.com/feeds", 'https://www.googleapis.com/auth/spreadsheets',
"https://www.googleapis.com/auth/drive.file", "https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
client = gspread.authorize(creds)
spreadsheet = client.open("Perfumes")
options = ChromeOptions()
options.headless = True
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.get(# [THE PERFUME'S URL]... )
info = driver.find_element_by_xpath('//*[(@id = "item_current_sub_title")]//span').text
res = ''
for i in info[:info.find('\n')].replace('גודל', ''):
if i.isdigit() or i.isalpha():
res = i
print(res)
CodePudding user response:
Here you will need the following:
Per each product hover over the product to make "more details" and "add to cart" buttons appear.
Click the "more details" button.
In the opened page get the product size (and any other details).
Get back to the main page.
In order to do that for many products you will have to get the list of products again on the main page. Otherwise you will get stale element exception.
So, your code can be something like this:
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
actions = ActionChains(driver)
wait = WebDriverWait(driver, 20)
wait.until(EC.visibility_of_element_located((By.XPATH, "//div[contains(@class,'layout_list_item')]")))
time.sleep(1)
products = driver.find_elements_by_xpath("//div[contains(@class,'layout_list_item')]")
for i in range(len(products)):
wait.until(EC.visibility_of_element_located((By.XPATH, "//div[contains(@class,'layout_list_item')]")))
time.sleep(1)
product = driver.find_elements_by_xpath("//div[contains(@class,'layout_list_item')]")[i]
#hover over the product block
actions.move_to_element(product).perform()
#click the "mode details button
product.find_element_by_xpath(".//p[contains(@class,'extra_button')]").click()
#in the details page get the product sub-title containing the product size
product_size = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div#item_current_sub_title"))).text
#get back to the main page
driver.execute_script("window.history.go(-1)")
UPD
This is exactly what I run:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time
urlM = 'https://www.myperfume.co.il/155567-כל-המותגים-לגב' \
'ר?order=up_title&page=0'
driver = webdriver.Chrome(executable_path='chromedriver.exe')
wait = WebDriverWait(driver, 20)
actions = ActionChains(driver)
driver.maximize_window()
driver.get(urlM)
wait.until(EC.visibility_of_element_located((By.XPATH, "//div[contains(@class,'layout_list_item')]")))
time.sleep(1)
products = driver.find_elements_by_xpath("//div[contains(@class,'layout_list_item')]")
for i in range(len(products)):
wait.until(EC.visibility_of_element_located((By.XPATH, "//div[contains(@class,'layout_list_item')]")))
time.sleep(1)
product = driver.find_elements_by_xpath("//div[contains(@class,'layout_list_item')]")[i]
#hover over the product block
actions.move_to_element(product).perform()
#click the "mode details button
product.find_element_by_xpath(".//p[contains(@class,'extra_button')]").click()
#in the details page get the product sub-title containing the product size
product_size = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div#item_current_sub_title"))).text
product_size = product_size.split('\n')[0]
print(product_size)
#get back to the main page
driver.execute_script("window.history.go(-1)")
And it prints me the products sizes like גודל: 100 ML