There is my code
def parser():
flag = True
url = 'https://quotes.toscrape.com'
while flag:
responce = requests.get(url)
soup = BeautifulSoup(responce.text, 'html.parser')
quote_l = soup.find_all('span', {'class': 'text'})
q_count = 0
for i in range(len(quote_l)):
if q_count >= 5:
flag = False
break
quote = soup.find_all('span', {'class': 'text'})[i]
if not Quote.objects.filter(quote=quote.string).exists():
author = soup.find_all('small', {'class': 'author'})[i]
if not Author.objects.filter(name=author.string).exists():
a = Author.objects.create(name=author.string)
Quote.objects.create(quote=quote.string, author_id=a.id)
q_count = 1
else:
a = Author.objects.get(name=author.string)
Quote.objects.create(quote=quote.string, author_id=a.id)
q_count = 1
url = soup.find('li', {'class': 'next'}).a['href']
I need to get the next page but I have this Exc. 'NoneType' object has no attribute 'a'
How to fix that and maybe how I can optimize my Code.Thx
CodePudding user response:
Upon reaching the last page there will be no Next button so you need an exit condition check prior to attempting to access the href for next page. One possibility would be to add the following lines before your current last line:
next_page = soup.find('li', {'class': 'next'})
if not next_page: flag = False # or return
Or simply return
at that point.
You'd also update the last line to use the variable, of course, and ensure you are not continuously extending url with suffixes of next page. For example, one could add the suffix during the requests call:
def parser():
flag = True
url = 'https://quotes.toscrape.com'
suffix = ''
while flag:
responce = requests.get(url suffix)
soup = BeautifulSoup(responce.text, 'html.parser')
# other code
next_page = soup.find('li', {'class': 'next'})
if not next_page:
return
suffix = next_page.a['href']