Home > Software engineering >  Get total number of review of permanently closed place in google map without API
Get total number of review of permanently closed place in google map without API

Time:03-10

I am learning python as well as web scrapping and I want to get number of review from google map of a permanently closed restaurant but I cannot do that, would you please help? Thank you

from bs4 import BeautifulSoup

url = 'https://www.google.com/maps?q=asia halal restaurant aichi japan open date&safe=strict&rlz=1C1GCEA_enID892ID892&sxsrf=ALeKk01NqaBLM8bXeVVS6M6tv9kAy0G6qQ:1616997971678&gs_lcp=Cgdnd3Mtd2l6EAM6BwgjELADECc6BQghEKABOgQIIRAVOgcIIRAKEKABUIUIWKojYOckaABwAHgAgAHHAogB7RGSAQcxLjUuNC4ymAEAoAEBqgEHZ3dzLXdpesgBAcABAQ&uact=5&um=1&ie=UTF-8&sa=X&ved=2ahUKEwjbhef-7NTvAhWa93MBHaFHCzYQ_AUoAXoECAEQAw'

import requests
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
ps = soup.find_all(string = 'クチコミ')
ps

I also tried to use find 'class' and 'span aria-label' based on developer tool of chrome below but still cannot do that

browser picture for html class

#ps = soup.find_all(class_='h0ySl-wcwwM-E70qVe-list')
#ps = soup.find_all('span aria-label')
#total_rev = ps.get_text()
#total_rev

Here is the code that I tried using selenium

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from bs4 import BeautifulSoup
driver = webdriver.Chrome('F:/Download/SW/chromedriver_win32/chromedriver.exe')

url = 'https://www.google.com/maps/place/アッバシ スイーツ/@35.0903185,136.8551766,17z/data=!3m1!4b1!4m5!3m4!1s0x600378381c4bb1f7:0x8e9d356b9ded5bcc!8m2!3d35.0903185!4d136.8573653'
driver.get(url)

I have tried to get number of review using this code in "still operating" restaurant, but when it comes to permanently closed one I cannot get the number of review

span_review = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "section-star")]'))).click()


#Find the total number of reviews
total_number_of_reviews = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[2]/div[2]/div/div[2]/div[2]').text.split(" ")[0]
total_number_of_reviews = int(total_number_of_reviews.replace(',','')) if ',' in total_number_of_reviews else int(total_number_of_reviews)#Find scroll layout


total_reviews = driver.find_element_by_class_name("h0ySl-wcwwM-E70qVe-list")
total_reviews #= driver.get('aria-label')
total_reviews = total_reviews.get_text('aria-label')
total_reviews
total_reviews
total_number_of_reviews = total_reviews.text[0:]

total_number_of_reviews

Hopefully I can learn

Thanks!

CodePudding user response:

I can't find your xpath in HTML. There is no <button> with text section-star but <li >.

And aria-label is not text but attribute and you have to use .get_attribute('aria-label')

But I found other xpath //button[jsaction="pane.rating.moreReviews"] and it works for me for permanent closed and still operating


Tested on Firefox and Chrome, Linux.

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait

driver = webdriver.Chrome('F:/Download/SW/chromedriver_win32/chromedriver.exe')
#driver = webdriver.Chrome()
#driver = webdriver.Firefox()

all_urls = [
    # permanent closed
    'https://www.google.com/maps/place/アッバシ スイーツ/@35.0903185,136.8551766,17z/data=!3m1!4b1!4m5!3m4!1s0x600378381c4bb1f7:0x8e9d356b9ded5bcc!8m2!3d35.0903185!4d136.8573653',
    # still operating
    'https://www.google.com/maps/place/Seaside Restaurant Higashiyama Garden - Port Bldg./@35.0841323,136.8474088,14z/data=!3m1!5s0x6003790a61e056e7:0x7f307de064680a96!4m9!1m2!2m1!1srestaurants!3m5!1s0x600379a07cd9fcc7:0x89f84cc9f0422e30!8m2!3d35.0895485!4d136.8809243!15sCgtyZXN0YXVyYW50c1oNIgtyZXN0YXVyYW50c5IBCnJlc3RhdXJhbnQ',
]

for url in all_urls:
    driver.get(url)
    total_reviews = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[@jsaction="pane.rating.moreReviews"]')))
    total_reviews = total_reviews.get_attribute('aria-label')
    print(total_reviews)
  • Related