I have the website with the product set which have 2 color and 6 variations -> total 12 products. How can I get all information while 'color' click same level with 'variation' click ?! Thank you very much
from selenium import webdriver
from time import sleep
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
import time, re
import requests
import pandas as pd
from bs4 import BeautifulSoup
PATH = "C:/Users/aston/Documents/playground_python/chromedriver.exe"
driver = webdriver.Chrome(PATH)
headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0'}
test_url = 'https://moho.com.vn/products/bo-ban-an-go-cao-su-tu-nhien-moho-vline-601'
test = BeautifulSoup(requests.get(test_url, headers=headers).content, 'html.parser')
btn_count = []
for btn_cnt in test.select('.select-swap > div'):
btn_cnt = btn_cnt['data-value'].count('class')
btn_count.append(btn_cnt)
vari = len(btn_count)
xpath = []
for b in range(1,vari):
btn_path = '''//div[@]/div/div/div[''' str(b) ''']/label'''
print(btn_path)
xpath.append(btn_path)
xpath.remove('//div[@]/div/div/div[7]/label')
for i in xpath:
driver.get('https://moho.com.vn/products/bo-ban-an-go-cao-su-tu-nhien-moho-vline-601')
time.sleep(1)
driver.find_element_by_xpath(i).click()
time.sleep(1)
sku = driver.find_element_by_xpath('.//span[@id="pro_sku"]').text
price = driver.find_element_by_xpath('.//span[@]').text
print({"sku": "price"})
Firstly, I count and get all the xpath all 12 variations of the set products. Secondly, by each xpath, I get sku and price of each product. At the moment I only get df like this: {SKU: MFDTSVLBR2.N16: 6,590,000₫, SKU: MFDTSVLDC4.B16: 7,390,000₫, SKU: MFDTSVLBR1DC2.N16: 7,190,000₫, SKU: MFDTSVLBR2.N16: 6,590,000₫, SKU: MFDTSVLBD2.N16: 5,790,000₫, SKU: MFDTSVLBR1DB1.N16: 6,190,000₫}. The df are missing 1 variation of product set with natural color (SKU: MFDTSVLDC4.N16:7,390,000₫) and 5 variations of product set with brown color. I want to know how to write a coding for 'xpath' which will select 2 conditions (one for color and two for variations of set)
CodePudding user response:
I Hope this is the data you are trying to extarct.
driver.get("https://moho.com.vn/products/bo-ban-an-go-cao-su-tu-nhien-moho-vline-601")
# Get the Colors available
colors = driver.find_elements(By.XPATH,"//div[@id='variant-swatch-0']//label")
data = []
# Iterate over the colors
for color in colors:
color.click()
# Get all the 6 options for that color
options = driver.find_elements(By.XPATH,"//div[@data-option='option2']/div[2]/div")
# Iterate over the options to extract respective sku and price.
for option in options:
option.click()
sku = driver.find_element(By.XPATH,"//span[@id='pro_sku']").get_attribute("innerText")
price = driver.find_element(By.XPATH,"//span[@class='pro-price']").get_attribute("innerText")
print(f"{color.text}: {option.text} - {sku} - {price}")
Màu Tự Nhiên: 4 Ghế Đơn - SKU:MFDTSVLDC4.N16 - 7,390,000₫
Màu Tự Nhiên: 1 Ghế Băng Dài 2 Ghế Đơn - SKU:MFDTSVLDC4.N16 - 7,390,000₫
Màu Tự Nhiên: 1 Ghế Băng Tựa 2 Ghế Đơn - SKU:MFDTSVLDC4.N16 - 7,390,000₫
Màu Tự Nhiên: 2 Ghế Băng Tựa - SKU:MFDTSVLDC4.N16 - 7,390,000₫
Màu Tự Nhiên: 2 Ghế Băng Dài - SKU:MFDTSVLDC4.N16 - 7,390,000₫
Màu Tự Nhiên: 1 Ghế Băng Dài 1 Ghế Băng Tựa - SKU:MFDTSVLDC4.N16 - 7,390,000₫
Nâu: 4 Ghế Đơn - SKU:MFDTSVLDC4.N16 - 7,390,000₫
Nâu: 1 Ghế Băng Dài 2 Ghế Đơn - SKU:MFDTSVLDC4.N16 - 7,390,000₫
Nâu: 1 Ghế Băng Tựa 2 Ghế Đơn - SKU:MFDTSVLDC4.N16 - 7,390,000₫
Nâu: 2 Ghế Băng Tựa - SKU:MFDTSVLDC4.N16 - 7,390,000₫
Nâu: 2 Ghế Băng Dài - SKU:MFDTSVLDC4.N16 - 7,390,000₫
Nâu: 1 Ghế Băng Dài 1 Ghế Băng Tựa - SKU:MFDTSVLDC4.N16 - 7,390,000₫