import bs4 as bs
import urllib.request
import csv
import requests
import re
from urllib.request import urlopen
from urllib.error import HTTPError
import pandas as pd
import time
import urllib.error
book_ids = ["9781408110416","9789604249671","9781405950305"]
def get_description(book_id):
my_urls = 'https://www.bookdepository.com/Enid-Blytons-Christmas-Tales-Enid-Blyton/' book_id
source = urlopen(my_urls).read()
soup = bs.BeautifulSoup(source, 'lxml')
description = soup.find('div', class_='item-img-content')
if description:
return description
else:
return "[No description"
for book_id in book_ids:
print(book_id)
print(get_description(book_id))
time.sleep(2)
The error I get -
HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 404: Not Found
Hello, I am working on script to scrap image urls from bookdepository. My main problem is that certain books return 404 because they are not available on the platform.
How can I keep the script simply go to the next code and not stop the entire loop.
Thanks in advance.
CodePudding user response:
You can put the code in get_description()
function in a try
and except
block. Something like this,
def get_description(book_id):
my_urls = 'https://www.bookdepository.com/Enid-Blytons-Christmas-Tales-Enid-Blyton/' book_id
try:
source = urlopen(my_urls).read()
soup = bs.BeautifulSoup(source, 'lxml')
description = soup.find('div', class_='item-img-content')
if description:
return description
else:
return "[No description"
except:
pass
Now if the code encounters an erorr, it'll just skip that book_id
and move to the enxt one.