I am trying to extract google reviews of a resturant using Python Selenium. I tried to extract the reviews posted by each reviewers. Here is my code:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import time
driver = webdriver.Chrome('')
base_url = 'https://www.google.com/search?tbs=lf:1,lf_ui:9&tbm=lcl&sxsrf=AOaemvJFjYToqQmQGGnZUovsXC1CObNK1g:1633336974491&q=10 famous restaurants in Dunedin&rflfq=1&num=10&sa=X&ved=2ahUKEwiTsqaxrrDzAhXe4zgGHZPODcoQjGp6BAgKEGo&biw=1280&bih=557&dpr=2#lrd=0xa82eac0dc8bdbb4b:0x4fc9070ad0f2ac70,1,,,&rlfi=hd:;si:5749134142351780976,l,CiAxMCBmYW1vdXMgcmVzdGF1cmFudHMgaW4gRHVuZWRpbiJDUjEvZ2VvL3R5cGUvZXN0YWJsaXNobWVudF9wb2kvcG9wdWxhcl93aXRoX3RvdXJpc3Rz2gENCgcI5Q8QChgFEgIIFkiDlJ7y7YCAgAhaMhAAEAEQAhgCGAQiIDEwIGZhbW91cyByZXN0YXVyYW50cyBpbiBkdW5lZGluKgQIAxACkgESaXRhbGlhbl9yZXN0YXVyYW50mgEkQ2hkRFNVaE5NRzluUzBWSlEwRm5TVU56ZW5WaFVsOUJSUkFCqgEMEAEqCCIEZm9vZCgA,y,2qOYUvKQ1C8;mv:[[-45.8349553,170.6616387],[-45.9156414,170.4803685]]'
driver.get(base_url)
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//div[./span[text()='Newest']]"))).click()
total_reviews_text =driver.find_element_by_xpath("//div[@class='review-score-container']//div//div//span//span[@class='z5jxId']").text
num_reviews = int (total_reviews_text.split()[0])
all_reviews = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
time.sleep(2)
total_reviews = len(all_reviews)
while total_reviews < num_reviews:
driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])
WebDriverWait(driver, 5, 0.25).until_not(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[class$="activityIndicator"]')))
#all_reviews = driver.find_elements_by_css_selector('div.gws-localreviews__google-review')
time.sleep(5)
all_reviews = WebDriverWait(driver, 5).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
print(total_reviews)
total_reviews =5
review_info = driver.find_elements_by_xpath("//div[@class='PuaHbe']")
for person in person_infos:
rating = person.find_element_by_xpath("./span").get_attribute('aria-label')
print(rating)
However, the above code produces/print 'none'. I am not sure where I made the mistake. Any help to fix the issue would be appreciated.
CodePudding user response:
You are using a wrong XPath locator.
Instead of
rating = person.find_element_by_xpath("./span").get_attribute('aria-label')
Try using
rating = person.find_element_by_xpath("./g-review-stars/span").get_attribute('aria-label')