i am learning how to scrape data from booking using python and i get this error for this line :
for h in data["hotels"]:
'NoneType' object is not iterable
This is my code :
e = Extractor.from_yaml_file('C:/Users/pc/OneDrive/Bureau/booking-hotel-scraper-master/booking.yml')
def scrape(url):
headers = {
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'DNT': '1',
'Upgrade-Insecure-Requests': '1',
# You may want to change the user agent if you get blocked
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36',
'Accept': 'text/html,application/xhtml xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Referer': 'https://www.booking.com/index.en-gb.html',
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8',
}
# Download the page using requests
print("Downloading %s"%url)
r = requests.get(url, headers=headers)
# Pass the HTML of the page and create
return e.extract(r.text,base_url=url)
with open("C:/Users/pc/OneDrive/Bureau/booking-hotel-scraper-master/urls.txt",'r') as urllist, open('C:/Users/pc/OneDrive/Bureau/booking-hotel-scraper-master/data.csv','w') as outfile:
fieldnames = [
"name",
"location",
"price",
"room_type",
"beds",
"number_of_ratings",
"url"
]
writer = csv.DictWriter(outfile, fieldnames=fieldnames,quoting=csv.QUOTE_ALL)
writer.writeheader()
for url in urllist.readlines():
data = scrape(url)
if data:
for h in data["hotels"]:
writer.writerow(h)
CodePudding user response:
You can try this:
for h in data["hotels"] if h else None:
CodePudding user response:
You can check if data has hotels or not before looping
if data and data['hotels'] is not None:
for h in data["hotels"]:
writer.writerow(h)