I am trying to webscrape the particular text in the following but I keep getting errors. I am also unable to print out to CSV file.
My goal is to webscrape the Athletes' nicknames, full names, weight, and record for every athlete based on the HTML tags. Here is my code:
from bs4 import BeautifulSoup
import requests
from csv import writer
import json
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)
Chrome/104.0.0.0 Safari/537.36'
}
url="https://www.ufc.com/athletes/all"
page = requests.get(url,headers=headers)
soup = BeautifulSoup(page.content, 'html.parser')
lists = soup.find_all('div', class_="c-listing-athlete__text")
with open(r"UFCstats.csv", 'w', encoding='utf8', newline='') as f:
thewriter = writer(f)
header = ['Nickname', 'Fullname', 'Weight', 'Record']
thewriter.writerow(header)
for list in lists:
nickname = list.find('span', class_="c-listing-athlete__nickname").find('div', class_="field__item").text
"""
fullName = list.find('span', class_="c-listing-athlete__name")
weight = list.find('div', class_="field__item").text
record = list.find('div.span.span', class_="c-listing-athlete__record")
info =[nickname, fullName, weight, record]
"""
info =[nickname]
print(info)
thewriter.writerow(info)
Error:
AttributeError: 'NoneType' object has no attribute 'text'
The URL page source code:
Source Code of URL Please assist thanks
CodePudding user response:
In some cases, soup is not able to find text
against objects, that's why the exception is observed. Try following code, if it resolves your issue
from bs4 import BeautifulSoup
import requests
from csv import writer
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome / 104.0.0.0 Safari / 537.36'
}
url = "https://www.ufc.com/athletes/all"
page = requests.get(url, headers=headers)
soup = BeautifulSoup(page.content, 'html.parser')
lists = soup.find_all('div', class_="c-listing-athlete__text")
f = open(r"UFCstats.csv", 'w', encoding='utf8', newline='')
csv_writer = writer(f)
header = ['Nickname', 'Fullname', 'Weight', 'Record']
csv_writer.writerow(header)
for athlete_card in lists:
nickname = ""
fullname = ""
weight = ""
record = ""
athlete_name = athlete_card.find('span', class_="c-listing-athlete__nickname")
if athlete_name is not None:
nickname = athlete_name.text.strip()
athlete_fullname = athlete_card.find('span', class_="c-listing-athlete__name")
if athlete_fullname is not None:
fullname = athlete_fullname.text.strip()
athlete_weight = athlete_card.find('span', class_="c-listing-athlete__title")
if athlete_weight is not None:
weight = athlete_weight.text.strip()
athlete_record = athlete_card.find('span', class_="c-listing-athlete__record")
if athlete_record is not None:
record = athlete_record.text.strip()
print([nickname, fullname, weight, record])
info = [nickname, fullname, weight, record]
csv_writer.writerow(info)
f.close()