I am using Beautifulsoup to filter data from a website. To do this, I pass several search terms in a loop using the site's built-in search box.
If the search term does not find any content, the following loop in soup breaks.
AttributeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_25352/1005464644.py in <cell line: 10>()
21
22 soup = BeautifulSoup(driver.page_source, "html.parser")
---> 23 results = soup.find('ul', {'class':'result-list'}).find_all('li')
24
25 for i in results:
AttributeError: 'NoneType' object has no attribute 'find_all'
Because no data was found, logically no data could be transferred to the soup.
How can I catch this error?
Thanks for your help.
Here is the code:
# Imports
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get("https://www.google.com")
from bs4 import BeautifulSoup
import requests
import pandas as pd
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36'}
ausschreibungsliste = []
sb_1 = ['66512200', '85140000', '75000000', '75130000', '75131000', '79200000' , '79210000' , '79510000']
for z in sb_1:
time.sleep(1)
driver.get('https://www.service.bund.de/Content/DE/Ausschreibungen/Suche/Formular.html')
was_sb1 = driver.find_element("xpath", '//input[@id="f4641464d4642144"]')
was_sb1.send_keys(z)
was_sb1.send_keys(Keys.RETURN)
while True:
soup = BeautifulSoup(driver.page_source, "html.parser")
results = soup.find('ul', {'class':'result-list'}).find_all('li')
for i in results:
# Liste erzeugen
# Ausschreibung
ausschreibung = i.find_all('h3')[0].get_text().strip().replace(u'Ausschreibung', u'').replace(u'\xad', u'')
# Vergabestelle
organisation = i.find_all('p')[0].get_text().strip().replace(u'Vergabestelle ', u'')
# Ausschreibungsdatum
verdatum = i.find_all('p')[1].get_text().strip().replace(u'Veröffentlicht ', u'')
# Frist
frist = i.find_all('p')[2].get_text().replace(u'Angebotsfrist ', u'')
# Typ
typ = 'Ausschreibung'
# Website
website = 'service.bund.de'
# Prüfung ab
pruefdatum_format = 'fehlt'
# Datei erzeugt
jetzt = 'fehlt'
i_info = {
'Vergabedatum': verdatum,
'Frist': frist,
'Organisation': organisation,
'Ausschreibung': ausschreibung,
'Typ': typ,
'Website': website,
'Prüfung ab': pruefdatum_format,
'Datei erzeugt': jetzt
}
ausschreibungsliste.append(i_info)
try:
time.sleep(2)
driver.find_element("xpath", '//*[@id="main"]/div/div/section[2]/div[1]/div/form/fieldset/div[2]/div[1]/ul/li[2]/a').click()
except NoSuchElementException:
break
CodePudding user response:
Something in the lines like:
if soup.find('ul', {'class':'result-list'}):
results = soup.find('ul', {'class':'result-list'}).find_all('li')
for i in results:
etc.
else:
pass
CodePudding user response:
I have now found a solution based on the comments of Scott Hunter and AudioBaten.
Here is the (shortened) code:
ausschreibungsliste = []
# CPV-Codes die zu suchen sind
cpvcode = ['32441300', '64226000' , '66512200']
for z in cpvcode:
time.sleep(1)
driver.get('the_url')
suchfeld = driver.find_element("xpath", '//input[@id="f4641464d4642144"]')
suchfeld.clear()
suchfeld.send_keys(z)
suchfeld.send_keys(Keys.RETURN)
try:
soup = BeautifulSoup(driver.page_source, "html.parser")
results = soup.find('ul', {'class':'result-list'}).find_all('li')
while True:
for i in results:
# Liste erzeugen
.... etc. ....
i_info = {
'Vergabedatum': verdatum,
'Frist': frist,
'Organisation': organisation,
'Ausschreibung': ausschreibung,
'CPV-Code': z,
'Link': linkausschreibung,
'Typ': typ,
'Website': website,
'Prüfung ab': pruefdatum_format,
'Datei erzeugt': jetzt
}
ausschreibungsliste.append(i_info)
# Nächster Seitenaufruf bis letzte Seite erreicht
if not soup.select_one('span', {'class':'disabled'}):
next=driver.find_element("xpath", '//*[@id="main"]/div/div/section[2]/div[1]/div/form/fieldset/div[2]/div[1]/ul/li[2]/a').click()
else:
print('Ausschreibungen gefunden :', len(ausschreibungsliste))
break
except:
continue
Thanks for your help.