Home > Back-end >  Python Selenium: Changing from three loops to one loop repeat the same information
Python Selenium: Changing from three loops to one loop repeat the same information

Time:10-11

I am extracting google reviews of a resturant. I am interested in extracting reviewer name, rating given by reviewer, and text of the review. I used following code for the extraction:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import time

driver = webdriver.Chrome('')
base_url = 'https://www.google.com/search?tbs=lf:1,lf_ui:9&tbm=lcl&sxsrf=AOaemvJFjYToqQmQGGnZUovsXC1CObNK1g:1633336974491&q=10 famous restaurants in Dunedin&rflfq=1&num=10&sa=X&ved=2ahUKEwiTsqaxrrDzAhXe4zgGHZPODcoQjGp6BAgKEGo&biw=1280&bih=557&dpr=2#lrd=0xa82eac0dc8bdbb4b:0x4fc9070ad0f2ac70,1,,,&rlfi=hd:;si:5749134142351780976,l,CiAxMCBmYW1vdXMgcmVzdGF1cmFudHMgaW4gRHVuZWRpbiJDUjEvZ2VvL3R5cGUvZXN0YWJsaXNobWVudF9wb2kvcG9wdWxhcl93aXRoX3RvdXJpc3Rz2gENCgcI5Q8QChgFEgIIFkiDlJ7y7YCAgAhaMhAAEAEQAhgCGAQiIDEwIGZhbW91cyByZXN0YXVyYW50cyBpbiBkdW5lZGluKgQIAxACkgESaXRhbGlhbl9yZXN0YXVyYW50mgEkQ2hkRFNVaE5NRzluUzBWSlEwRm5TVU56ZW5WaFVsOUJSUkFCqgEMEAEqCCIEZm9vZCgA,y,2qOYUvKQ1C8;mv:[[-45.8349553,170.6616387],[-45.9156414,170.4803685]]'
driver.get(base_url)
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//div[./span[text()='Newest']]"))).click()

total_reviews_text =driver.find_element_by_xpath("//div[@class='review-score-container']//div//div//span//span[@class='z5jxId']").text
num_reviews = int (total_reviews_text.split()[0])

all_reviews = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
time.sleep(2)
total_reviews = len(all_reviews)

 while total_reviews < num_reviews:
        driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])
        WebDriverWait(driver, 5, 0.25).until_not(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[class$="activityIndicator"]')))
        #all_reviews = driver.find_elements_by_css_selector('div.gws-localreviews__google-review')
        time.sleep(5)
        all_reviews = WebDriverWait(driver, 5).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
        print(total_reviews)
        total_reviews  =5

person_info = driver.find_elements_by_xpath("//div[@id='reviewSort']//div[contains(@class,'google-review')]")
rating_info = driver.find_elements_by_xpath("//div[@class='PuaHbe']")
review_text = driver.find_elements_by_xpath("//div[@class='Jtu6Td']")

for person in person_info:
    name = person.find_element_by_xpath("./div/div/div/a").text
    print(name)

for rating in rating_info:  
    rating_txt = person.find_element_by_xpath("./g-review-stars/span").get_attribute('aria-label')
    print(rating_txt)

for text in review_text:
    texts = text.find_element_by_xpath("./span").text
    print(texts)

The above code worked as per expectations. I want to make slight change in above code. Instead of using three loops to display name, rating, and review_text. I wanted to extract the same information using one loop. So I made following changes in the above code:

reviews_info = driver.find_elements_by_xpath("//div[@class='jxjCjc']")
for review_info in reviews_info:
    name = review_info.find_element_by_xpath("./div/div/a").text
    rating = review_info.find_element_by_xpath("//div[@class='PuaHbe']//g-review-stars//span").get_attribute('aria-label')
    text = review_info.find_element_by_xpath("//div[@class='Jtu6Td']//span").text
    print(name)
    print(rating)
    print(text)
    print()

The problem with a change in code is that it displays the same information (i.e. rating and text) for all reviewers names. I am not sure where am I making the mistake. Any help to fix the issue would be really appreciated.

CodePudding user response:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

driver = webdriver.Chrome()
base_url = 'https://www.google.com/search?tbs=lf:1,lf_ui:9&tbm=lcl&sxsrf=AOaemvJFjYToqQmQGGnZUovsXC1CObNK1g:1633336974491&q=10 famous restaurants in Dunedin&rflfq=1&num=10&sa=X&ved=2ahUKEwiTsqaxrrDzAhXe4zgGHZPODcoQjGp6BAgKEGo&biw=1280&bih=557&dpr=2#lrd=0xa82eac0dc8bdbb4b:0x4fc9070ad0f2ac70,1,,,&rlfi=hd:;si:5749134142351780976,l,CiAxMCBmYW1vdXMgcmVzdGF1cmFudHMgaW4gRHVuZWRpbiJDUjEvZ2VvL3R5cGUvZXN0YWJsaXNobWVudF9wb2kvcG9wdWxhcl93aXRoX3RvdXJpc3Rz2gENCgcI5Q8QChgFEgIIFkiDlJ7y7YCAgAhaMhAAEAEQAhgCGAQiIDEwIGZhbW91cyByZXN0YXVyYW50cyBpbiBkdW5lZGluKgQIAxACkgESaXRhbGlhbl9yZXN0YXVyYW50mgEkQ2hkRFNVaE5NRzluUzBWSlEwRm5TVU56ZW5WaFVsOUJSUkFCqgEMEAEqCCIEZm9vZCgA,y,2qOYUvKQ1C8;mv:[[-45.8349553,170.6616387],[-45.9156414,170.4803685]]'
driver.get(base_url)
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//div[./span[text()='Newest']]"))).click()

total_reviews_text =driver.find_element_by_xpath("//div[@class='review-score-container']//div//div//span//span[@class='z5jxId']").text
num_reviews = int (total_reviews_text.split()[0])
print("NUm reviews=", num_reviews)

all_reviews = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
time.sleep(2)
total_reviews = len(all_reviews)
print("Total reviews=", total_reviews)
s = "(//div[@id='reviewSort']//div[contains(@class,'google-review')])[0]"
b = '0'
a = 1  # Index of Review button

for i in range(10):
    c = str(a)
    s = s.replace(b, c)  # Updating Xpath's index in every loop so that it can focus on new review everytime.
    b = str(a)
    a = a   1
    WebDriverWait(driver, 5, 0.25).until_not(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[class$="activityIndicator"]')))
    time.sleep(5)
    all_reviews = WebDriverWait(driver, 5).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
    total_reviews  =1
    Info = driver.find_element_by_xpath(s).text
    print(Info)
    print("<------------------------------------------------------>\n\n")

Output:- Click Here to See Program Output

  • Related