import requests
from bs4 import BeautifulSoup
url = "https://boulder.noshdelivery.co/restaurants"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
restaurant_wrapper = soup.find(class_ = "dd_rest_list")
restaurants = restaurant_wrapper.find_all(class_="menu__vendor-name")
restaurant_street_address = restaurant_wrapper.find("span", itemprop="streetAddress")
restaurant_address_locality = restaurant_wrapper.find("span", itemprop="addressLocality")
def extract_restaurant_data(restaurant):
restaurant_title = restaurant_wrapper.find(class_="menu__vendor-name")
return {
"title" : restaurant_title.text.strip(),
"streetAddress": restaurant_street_address.text.strip(),
"addressLocality": restaurant_address_locality.text.strip()
}
results = [extract_restaurant_data(restaurant) for restaurant in restaurants]
print(results)
I would like to know why this code, prints exactly the same info and does not iterate over the list of restaurants.
My output is this
{'title': '5280 Cafe At Rallysport', 'streetAddress': '2727 29th St.',
'addressLocality': 'Boulder'},
{'title': '5280 Cafe At Rallysport', 'streetAddress': '2727 29th St.', ' addressLocality': 'Boulder'}........
The info is the same. I do not know why my code does not iterate over the different names from the list of "restaurants"
CodePudding user response:
You only did one find
the data. Do a find_all
on each section and then zip
them together!
restaurant_details = zip(
restaurant_wrapper.find_all(class_="menu__vendor-name"),
restaurant_wrapper.find_all("span", itemprop="streetAddress"),
restaurant_wrapper.find_all("span", itemprop="addressLocality"),
)
results = [
{
"title": title.text.strip(),
"streetAddress": street_address.text.strip(),
"addressLocality": address_locality.text.strip()
}
for title, street_address, address_locality in restaurant_details
]
print(results)
CodePudding user response:
You function has restaurant_wrapper.find(class_="menu__vendor-name")
written in it, so each time it runs it would print only the first occurence of the class menu__vendor-name
.
To print a new restaurant's detail in each iteration you would have to access each web element individually.
The code below would allow you to get the details for all restaurants.
restwords = restaurant_wrapper.find_all("div", {"class": "dd_restwords"})
def extract_restaurant_data(restaurant):
title = restaurant.find("div", {"class": "menu__vendor-name"}).text
streetAddress = restaurant.find("span", {"itemprop": "streetAddress"}).text
addressLocality = restaurant.find("span", {"itemprop": "addressLocality"}).text
rest_data = {
"title": title,
"streetAddress": streetAddress,
"addressLocality": addressLocality
}
return rest_data
for restaurant in restwords:
print(extract_restaurant_data(restaurant))