I am trying to scrape each product page from this website: https://www.aliexpress.com/wholesale?catId=0&initiative_id=SB_20220315022920&SearchText=bluetooth earphones
Especially I want to get comments and custumer countries as I mentionned in the photo: enter image description here
The main issue is that my code does not inspect the right elements and this is what I am struggling with . First, I tried my scraping on this product : https://www.aliexpress.com/item/1005003801507855.html?spm=a2g0o.productlist.0.0.1e951bc72xISfE&algo_pvid=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad&algo_exp_id=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad-8&pdp_ext_f={"sku_id":"12000027213624098"}&pdp_pi=-1;40.81;-1;-1@salePrice;MAD;search-mainSearch
Here is my code :
from selenium import webdriver
from selenium.webdriver.common.by import By
from lxml import html
import cssselect
from time import sleep
from itertools import zip_longest
import csv
driver = webdriver.Edge(executable_path=r"C:/Users/OUISSAL/Desktop/wscraping/XEW/scraping/codes/msedgedriver")
url = "https://www.aliexpress.com/item/1005003801507855.html?spm=a2g0o.productlist.0.0.1e951bc72xISfE&algo_pvid=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad&algo_exp_id=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad-8&pdp_ext_f={"sku_id":"12000027213624098"}&pdp_pi=-1;40.81;-1;-1@salePrice;MAD;search-mainSearch"
with open ("data.csv", "w", encoding="utf-8") as csvfile:
wr = csv.writer(csvfile)
wr.writerow(["Comment","Custumer country"])
driver.get(url)
driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
review_buttom = driver.find_element_by_xpath('//li[@ae_button_type="tab_feedback"]')
review_buttom.click()
html_source = driver.find_element_by_xpath('//div[@id="transction-feedback"]')
tree = html.fromstring(html_source)
#tree = html.fromstring(driver.page_source)
for rvw in tree.xpath('//div[@]'):
country = rvw.xpath('//div[@]//b/text()')
if country:
country = country[0]
else:
country = ''
print('country:', country)
comment = rvw.xpath('//dt[@id="buyer-feedback"]//span/text()')
if comment:
comment = comment[0]
else:
comment = ''
print('comment:', comment)
driver.close()
Thank you !!
CodePudding user response:
What happens?
There is one main issue, the feedback you are looking for is in an iframe
, so you wont get your information by calling the elements directly.
How to fix?
Scroll into view of element that holds the iframe
navigate to its source and interact with its pagination to get all the feedbacks.
Example
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
url = 'https://www.aliexpress.com/item/1005003801507855.html?spm=a2g0o.productlist.0.0.1e951bc72xISfE&algo_pvid=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad&algo_exp_id=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad-8&pdp_ext_f={"sku_id":"12000027213624098"}&pdp_pi=-1;40.81;-1;-1@salePrice;MAD;search-mainSearch'
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.maximize_window()
driver.get(url)
wait = WebDriverWait(driver, 10)
driver.execute_script("arguments[0].scrollIntoView();", wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.tab-content'))))
driver.get(wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#product-evaluation'))).get_attribute('src'))
data=[]
while True:
for e in driver.find_elements(By.CSS_SELECTOR, 'div.feedback-item'):
try:
country = e.find_element(By.CSS_SELECTOR, '.user-country > b').text
except:
country = None
try:
comment = e.find_element(By.CSS_SELECTOR, '.buyer-feedback span').text
except:
comment = None
data.append({
'country':country,
'comment':comment
})
try:
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#complex-pager a.ui-pagination-next'))).click()
except:
break
pd.DataFrame(data).to_csv('filename.csv',index=False)