Home > Software engineering >  Navigating to next page of reviews on booking.com using beautifulsoup
Navigating to next page of reviews on booking.com using beautifulsoup

Time:04-14

I'm trying to scrape all the reviews of a hotel on booking.com and following is my code. With this code I'm able to scrape the first page of reviews. I want to know how to navigate the next page from here.

import urllib.request
from bs4 import BeautifulSoup
url='https://www.booking.com/hotel/us/tides-inn-lauderdale-by-the-sea.html#tab-reviews'
req = urllib.request.Request(
    url,
    headers={
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
    }
)

reviews = []

f = urllib.request.urlopen(req)
soup = BeautifulSoup(f.read().decode('utf-8'),'html.parser')

for text in soup.find_all('span', class_="c-review__body"):
    reviews.append(text.getText())

CodePudding user response:

To get all the reviews, you have to use external url meaning api url.Because your requested url is dynamic but data is in html DOM.

import requests
from bs4 import BeautifulSoup
url='https://www.booking.com/reviewlist.html'

reviews = []
params={
    'aid': '304142',
    'label': 'gen173nr-1DCAso7AFCH3RpZGVzLWlubi1sYXVkZXJkYWxlLWJ5LXRoZS1zZWFIM1gEaBSIAQGYATG4ARfIAQzYAQPoAQH4AQKIAgGoAgO4ArbR15IGwAIB0gIkZTYwMWNiZjEtMTRhZC00YmQ4LThlMDItMmY1Zjg3OWMxNjUy2AIE4AIB',
    'sid': '10e690a22047577aa3c492ae38e9d49c',
    'cc1': 'us',
    'dist': '1',
    'pagename': 'tides-inn-lauderdale-by-the-sea',   
    'type': 'total',
    'rows': '10',
    '_': '1649797306493',   
    'offset': '130'
    }
for review in range(1,133,10):
    params['rows']=review
    req = requests.get(
        url,
        params=params,
        headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
        })

    soup = BeautifulSoup(req.text,'html.parser')

    for text in soup.find_all('span', class_="c-review__body"):
        reviews.append(text.getText())
print(reviews)
  • Related