The link I am using is https://www.influenster.com/reviews/loreal-paris-elvive-extraordinary-oil-deep-nourishing-shampoo-and-conditioner-set-126-fl-oz. Please guide me on how I can get the stars as there is no aria label or numerical value to scrape.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import configparser
from datetime import datetime
parser = configparser.RawConfigParser()
parser.read('config.ini')
url= parser['PROPERTIES']['URL']
END_DATE = datetime.strptime(parser['DATE']['END'], '%Y-%m-%d')
START_DATE=datetime.strptime(parser['DATE']['START'],'%Y-%m-%d')
# Setting up driver options
options = webdriver.ChromeOptions()
# Setting up Path to chromedriver executable file
CHROMEDRIVER_PATH =r'C:\Users\HP\Desktop\INTERNSHIP\influenster\chromedriver.exe'
# Adding options
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)
# Setting up chrome service
service = ChromeService(executable_path=CHROMEDRIVER_PATH)
# Establishing Chrom web driver using set services and options
driver = webdriver.Chrome(service=service, options=options)
wait = WebDriverWait(driver, 20)
driver.get(url)
# The 2 lines below is what I actually added here necessary imports
# and `wait` object initialization
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button#onetrust-accept-btn-handler"))).click()
reviews = wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, ".conversations-left .item")))
count=0
item_list = []
for review in reviews:
item={
#stars
'username': review.find_element_by_xpath(".//a[contains(@class,'name')]").text,
'userurl':review.find_element_by_xpath(".//a[contains(@class,'name')]").get_attribute("href"),
'title': 'NA',
# 'review_text':review.find_element_by_xpath(".//div[contains(@class,'review-text')]").text,
# 'permalink': 'NA',
# 'date':curr_date,
# 'subproduct name': 'NA',
# 'subproduct link': 'NA',
}
item_list.append(item)
print(item_list)
CodePudding user response:
This is one way of achieving your stated goal (get the product star rating):
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument("window-size=1280,720")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)
actions = ActionChains(browser)
wait = WebDriverWait(browser, 20)
url = 'https://www.influenster.com/reviews/loreal-paris-elvive-extraordinary-oil-deep-nourishing-shampoo-and-conditioner-set-126-fl-oz'
browser.get(url)
try:
wait.until(EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))).click()
print('accepted cookies')
except Exception as e:
print('no cookie button!')
stars = wait.until(EC.presence_of_element_located((By.XPATH, '//h1/following-sibling::div/following-sibling::div/div')))
actions.move_to_element(stars).perform()
print('moved to stars')
star_rating = wait.until(EC.presence_of_element_located((By.XPATH, '//*[contains(text(),"out of 5 stars")]')))
print(star_rating.text)
Result in terminal:
accepted cookies
moved to stars
4.5 out of 5 stars
Selenium documentation can be found at https://www.selenium.dev/documentation/
EDIT: In case you want the stars for each individual review, that's not doable with Selenium, sadly. Happily, it is doable with other method:
import cloudscraper
import pandas as pd
from bs4 import BeautifulSoup
from tqdm import tqdm
headers = {'x-requested-with': 'XMLHttpRequest',
'content-type': 'application/x-www-form-urlencoded; charset=utf-8',
'referer': 'https://www.influenster.com/reviews/loreal-paris-elvive-extraordinary-oil-deep-nourishing-shampoo-and-conditioner-set-126-fl-oz',
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
}
scraper = cloudscraper.create_scraper(disableCloudflareV1=True)
reviews_df = pd.DataFrame()
for x in tqdm(range(1, 12)):
url = f'https://www.influenster.com/reviews/loreal-paris-elvive-extraordinary-oil-deep-nourishing-shampoo-and-conditioner-set-126-fl-oz?paginate=true&review_page={x}&sort=featured'
r = scraper.get(url, headers=headers)
df = pd.DataFrame(r.json()['items'])
reviews_df = pd.concat([reviews_df, df], axis=0, ignore_index=True)
print(reviews_df)
Result in terminal:
id stars like_count got_from_influenster user_liked timestamp author badge text media merchant product varieties incentivized comment_count user_profile_questions share_links
0 78801375 5 55 False False 2021-07-09 20:42:45.554925 00:00 {'id': 8354487, 'username': 'genesist19', 'sho... None ☑️ I really like the products that come in Kit... [{'id': 78801376, 'comment_count': 0, 'text': ... None {'id': 3598670, 'name': 'LOréal Paris Elvive E... [] None 0 [] {'Facebook': 'https://www.facebook.com/sharer/...
1 75217735 5 31 False False 2021-03-03 14:41:18.119159 00:00 {'id': 8825695, 'username': 'gina78', 'short_n... None <a href="https://www.influenster.com/reviews/l... [{'id': 75217738, 'comment_count': 0, 'text': ... Walmart {'id': 3598670, 'name': 'LOréal Paris Elvive E... [] None 0 [] {'Facebook': 'https://www.facebook.com/sharer/...
2 81276486 5 26 False False 2021-10-28 06:49:49.490998 00:00 {'id': 9540955, 'username': 'ayas30', 'short_n... None I really trust this brand, I alway purchase th... [] Rite Aid {'id': 3598670, 'name': 'LOréal Paris Elvive E... [] None 0 [] {'Facebook': 'https://www.facebook.com/sharer/...
3 77920545 5 20 False False 2021-06-08 03:50:20.528189 00:00 {'id': 9037996, 'username': 'member-dee631', '... None it was amazing it made me glow like the sun wa... [{'id': 77920550, 'comment_count': 1, 'text': ... None {'id': 3598670, 'name': 'LOréal Paris Elvive E... [] None 0 [] {'Facebook': 'https://www.facebook.com/sharer/...
4 71949054 5 14 False False 2020-11-19 20:57:11.267771 00:00 {'id': 8502511, 'username': 'gaiam12', 'short_... None Hi