Hey how can I change this code to enter each page and get the info I want ( the book name and the url of the book )
the url is : http://books.toscrape.com/
i wrote ( with google help ) this code but i want to get all the books from all the pages ( 50 pages )
# import web grabbing client and
# HTML parser
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import requests
# variable to store website link as string
booksURL = 'http://books.toscrape.com/'
# grab website and store in variable urlClient
urlClient = uReq(booksURL)
# read and close HTML
page_html = urlClient.read()
urlClient.close()
# call BeautifulSoup for parsing
page_soup = soup(page_html, "html.parser")
# grabs all the products under list tag
bookshelf = page_soup.findAll(
"li", {"class": "col-xs-6 col-sm-4 col-md-3 col-lg-3"})
for books in bookshelf:
# collect title of all books
book_title = books.h3.a["title"]
book_url = books.find("a")["href"]
#books_url = books.h3.a["url"]
print(book_title "-" booksURL book_url)
i tried to add this code but i dont know how to add it to my
for i in range(51): # Number of pages plus one
url = "https://books.toscrape.com/catalogue/page-{}.html".format(i)
r = requests.get(url)
soup = BeautifulSoup(r.content)
CodePudding user response:
This might work. I have removed uReq
because I prefer using requests ;)
# import web grabbing client and
# HTML parser
from bs4 import BeautifulSoup as soup
import requests
for i in range(1, 51): # Number of pages plus one
url = "https://books.toscrape.com/catalogue/page-{}.html".format(i)
response = requests.get(url)
# call BeautifulSoup for parsing
page_soup = soup(response.content, "html.parser")
# grabs all the products under list tag
bookshelf = page_soup.findAll(
"li", {"class": "col-xs-6 col-sm-4 col-md-3 col-lg-3"})
for books in bookshelf:
# collect title of all books
book_title = books.h3.a["title"]
book_url = books.find("a")["href"]
print(book_title " - " book_url)