Home > Mobile >  I am performing web scraping on influenster.com but I am not able to scrape the star rating
I am performing web scraping on influenster.com but I am not able to scrape the star rating

Time:09-16

The link I am using is https://www.influenster.com/reviews/loreal-paris-elvive-extraordinary-oil-deep-nourishing-shampoo-and-conditioner-set-126-fl-oz. Please guide me on how I can get the stars as there is no aria label or numerical value to scrape.

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import configparser
from datetime import datetime

parser = configparser.RawConfigParser()
parser.read('config.ini')

url= parser['PROPERTIES']['URL']
END_DATE = datetime.strptime(parser['DATE']['END'], '%Y-%m-%d')
START_DATE=datetime.strptime(parser['DATE']['START'],'%Y-%m-%d')
# Setting up driver options
options = webdriver.ChromeOptions()
# Setting up Path to chromedriver executable file
CHROMEDRIVER_PATH =r'C:\Users\HP\Desktop\INTERNSHIP\influenster\chromedriver.exe'
# Adding options
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)
# Setting up chrome service
service = ChromeService(executable_path=CHROMEDRIVER_PATH)
# Establishing Chrom web driver using set services and options
driver = webdriver.Chrome(service=service, options=options)
wait = WebDriverWait(driver, 20)
driver.get(url)
# The 2 lines below is what I actually added here   necessary imports
# and `wait` object initialization   
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button#onetrust-accept-btn-handler"))).click()
reviews = wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, ".conversations-left .item")))
count=0
item_list = []

for review in reviews:
    item={  
         #stars     
        'username': review.find_element_by_xpath(".//a[contains(@class,'name')]").text,
        'userurl':review.find_element_by_xpath(".//a[contains(@class,'name')]").get_attribute("href"),
        'title': 'NA',
        # 'review_text':review.find_element_by_xpath(".//div[contains(@class,'review-text')]").text,
        #  'permalink': 'NA',
        #  'date':curr_date,
        #  'subproduct name': 'NA',
        #  'subproduct link': 'NA',
}
    item_list.append(item)
print(item_list)

CodePudding user response:

This is one way of achieving your stated goal (get the product star rating):

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains


chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument('disable-notifications')

chrome_options.add_argument("window-size=1280,720")

webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)
actions = ActionChains(browser)
wait = WebDriverWait(browser, 20)
url = 'https://www.influenster.com/reviews/loreal-paris-elvive-extraordinary-oil-deep-nourishing-shampoo-and-conditioner-set-126-fl-oz'
browser.get(url) 
try:
    wait.until(EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))).click()
    print('accepted cookies')
except Exception as e:
    print('no cookie button!')
stars = wait.until(EC.presence_of_element_located((By.XPATH, '//h1/following-sibling::div/following-sibling::div/div')))
actions.move_to_element(stars).perform()
print('moved to stars')
star_rating = wait.until(EC.presence_of_element_located((By.XPATH, '//*[contains(text(),"out of 5 stars")]')))
print(star_rating.text)

Result in terminal:

accepted cookies
moved to stars
4.5 out of 5 stars

Selenium documentation can be found at https://www.selenium.dev/documentation/

EDIT: In case you want the stars for each individual review, that's not doable with Selenium, sadly. Happily, it is doable with other method:

import cloudscraper
import pandas as pd
from bs4 import BeautifulSoup
from tqdm import tqdm

headers = {'x-requested-with': 'XMLHttpRequest',
           'content-type': 'application/x-www-form-urlencoded; charset=utf-8',
           'referer': 'https://www.influenster.com/reviews/loreal-paris-elvive-extraordinary-oil-deep-nourishing-shampoo-and-conditioner-set-126-fl-oz',
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
}

scraper = cloudscraper.create_scraper(disableCloudflareV1=True)
reviews_df = pd.DataFrame()
for x in tqdm(range(1, 12)):
    url = f'https://www.influenster.com/reviews/loreal-paris-elvive-extraordinary-oil-deep-nourishing-shampoo-and-conditioner-set-126-fl-oz?paginate=true&review_page={x}&sort=featured'

    r = scraper.get(url, headers=headers)
    df = pd.DataFrame(r.json()['items'])
    reviews_df = pd.concat([reviews_df, df], axis=0, ignore_index=True)
print(reviews_df)

Result in terminal:

id  stars   like_count  got_from_influenster    user_liked  timestamp   author  badge   text    media   merchant    product varieties   incentivized    comment_count   user_profile_questions  share_links
0   78801375    5   55  False   False   2021-07-09 20:42:45.554925 00:00    {'id': 8354487, 'username': 'genesist19', 'sho...   None    ☑️ I really like the products that come in Kit...   [{'id': 78801376, 'comment_count': 0, 'text': ...   None    {'id': 3598670, 'name': 'LOréal Paris Elvive E...   []  None    0   []  {'Facebook': 'https://www.facebook.com/sharer/...
1   75217735    5   31  False   False   2021-03-03 14:41:18.119159 00:00    {'id': 8825695, 'username': 'gina78', 'short_n...   None    <a href="https://www.influenster.com/reviews/l...   [{'id': 75217738, 'comment_count': 0, 'text': ...   Walmart {'id': 3598670, 'name': 'LOréal Paris Elvive E...   []  None    0   []  {'Facebook': 'https://www.facebook.com/sharer/...
2   81276486    5   26  False   False   2021-10-28 06:49:49.490998 00:00    {'id': 9540955, 'username': 'ayas30', 'short_n...   None    I really trust this brand, I alway purchase th...   []  Rite Aid    {'id': 3598670, 'name': 'LOréal Paris Elvive E...   []  None    0   []  {'Facebook': 'https://www.facebook.com/sharer/...
3   77920545    5   20  False   False   2021-06-08 03:50:20.528189 00:00    {'id': 9037996, 'username': 'member-dee631', '...   None    it was amazing it made me glow like the sun wa...   [{'id': 77920550, 'comment_count': 1, 'text': ...   None    {'id': 3598670, 'name': 'LOréal Paris Elvive E...   []  None    0   []  {'Facebook': 'https://www.facebook.com/sharer/...
4   71949054    5   14  False   False   2020-11-19 20:57:11.267771 00:00    {'id': 8502511, 'username': 'gaiam12', 'short_...   None    Hi            
  • Related