I'm running some simple web scraping tutorials but I find it difficult to go forward.
In particular, 'title' is the only one of the elements from which the text is extracted. For the remaining 'price' and 'status' it always gives me the same error.
AttributeError: 'NoneType' object has no attribute 'text'
import requests
from bs4 import BeautifulSoup
import pandas as pd
url = 'https://www.ebay.it/sch/i.html?_from=R40&_trksid=p2380057.m570.l1313&_nkw=monitor&_sacat=0'
def get_data(url):
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
return soup
def parse(soup):
productlist = []
results = soup.find_all('div', {'class' : 's-item__info clearfix'})
for item in results:
product = {
'title': item.find('h3', {'class': 's-item__title'}).text,
'price': float(item.find('span', {'class': 's-item__price'})text.replace('EUR','').strip()),
'status': item.find('span',{'class':'SECONDARY_INFO'})text,
}
productlist.append(product)
return productlist
def output(productlist):
productsdf = pd.DataFrame(productlist)
productsdf.to_csv('output.csv', index = False)
print('Saved to CSV')
return productsdf
soup = get_data(url)
productlist =parse(soup)
ug = output(productlist)
Thanks to anyone who wants to help me
CodePudding user response:
Change the selector where you select all items:
import requests
from bs4 import BeautifulSoup
import pandas as pd
url = "https://www.ebay.it/sch/i.html?_from=R40&_trksid=p2380057.m570.l1313&_nkw=monitor&_sacat=0"
def get_data(url):
r = requests.get(url)
soup = BeautifulSoup(r.text, "html.parser")
return soup
def parse(soup):
productlist = []
results = soup.select("#srp-river-results .s-item__info") # <-- change here
for item in results:
product = {
"title": item.find("h3", {"class": "s-item__title"}).text,
"price": float(
item.find("span", {"class": "s-item__price"})
.text.replace("EUR", "")
.replace(",", ".")
.strip()
.split()[0]
),
"status": item.find("span", {"class": "SECONDARY_INFO"}).text,
}
productlist.append(product)
return productlist
def output(productlist):
productsdf = pd.DataFrame(productlist)
# productsdf.to_csv("output.csv", index=False)
# print("Saved to CSV")
return productsdf
soup = get_data(url)
productlist = parse(soup)
ug = output(productlist)
print(ug)
Prints:
title price status
0 FASCIO a due monitor 2 x 17" Dual stand incluso 65.26 Ricondizionato
1 MONITOR USATO RICONDIZIONATO DA 17" 19" 22" SCHERMO LCD PER PC O DVR VARI MARCHI 35.00 Ricondizionato
2 Terra LCD/LED monitor 27" 2760w, Earphone, audio, HDMI, DVI, VGA 20.00 Di seconda mano
3 Nuova inserzione22" LG Business monitor LED TFT 55,9 cm Nero USB ALTOPARLANTI 45.90 Ricondizionato
4 LG 24mb56hq-b 60cm 24" IPS MONITOR LED HDMI VGA 5ms altezza regolabile, VESA 25.50 Di seconda mano
5 MONITOR PC HP 22" ELITEDISPLAY E222 1920X1080 LED HD HDMI VGA DP USB GRADO A 80.00 Di seconda mano
6 Lenovo ThinkCentre tio24gen3 23,8 pollici Full HD IPS Monitor Led-Nero Nuovo OVP 66.00 Nuovo (Altro)
7 DELL E2216H 22" LED-LCD (TFT) TN FHD (1080p) del monitor 39.55 Ricondizionato
...