Home > OS >  Problems extracting text with the BeautifulSoup function
Problems extracting text with the BeautifulSoup function

Time:10-26

I'm running some simple web scraping tutorials but I find it difficult to go forward.

In particular, 'title' is the only one of the elements from which the text is extracted. For the remaining 'price' and 'status' it always gives me the same error.

AttributeError: 'NoneType' object has no attribute 'text'

import requests
from bs4 import BeautifulSoup
import pandas as pd
  
url = 'https://www.ebay.it/sch/i.html?_from=R40&_trksid=p2380057.m570.l1313&_nkw=monitor&_sacat=0'
   
def get_data(url):
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    return soup

def parse(soup):
    productlist = []
    results = soup.find_all('div', {'class' : 's-item__info clearfix'})
    for item in results:   
        product = {
            'title': item.find('h3', {'class': 's-item__title'}).text,
            'price': float(item.find('span', {'class': 's-item__price'})text.replace('EUR','').strip()),
            'status': item.find('span',{'class':'SECONDARY_INFO'})text, 
        }
        productlist.append(product)
    return productlist



def output(productlist):
    productsdf = pd.DataFrame(productlist)
    productsdf.to_csv('output.csv', index = False)
    print('Saved to CSV')
    return  productsdf

  soup = get_data(url)
    productlist =parse(soup)
    ug = output(productlist)

Thanks to anyone who wants to help me

CodePudding user response:

Change the selector where you select all items:

import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://www.ebay.it/sch/i.html?_from=R40&_trksid=p2380057.m570.l1313&_nkw=monitor&_sacat=0"


def get_data(url):
    r = requests.get(url)
    soup = BeautifulSoup(r.text, "html.parser")
    return soup


def parse(soup):
    productlist = []
    results = soup.select("#srp-river-results .s-item__info")  # <-- change here
    for item in results:
        product = {
            "title": item.find("h3", {"class": "s-item__title"}).text,
            "price": float(
                item.find("span", {"class": "s-item__price"})
                .text.replace("EUR", "")
                .replace(",", ".")
                .strip()
                .split()[0]
            ),
            "status": item.find("span", {"class": "SECONDARY_INFO"}).text,
        }
        productlist.append(product)
    return productlist


def output(productlist):
    productsdf = pd.DataFrame(productlist)
    # productsdf.to_csv("output.csv", index=False)
    # print("Saved to CSV")
    return productsdf


soup = get_data(url)
productlist = parse(soup)
ug = output(productlist)
print(ug)

Prints:

                                                                                            title   price              status
0                                                 FASCIO a due monitor 2 x 17" Dual stand incluso   65.26      Ricondizionato
1                MONITOR USATO RICONDIZIONATO DA 17" 19" 22" SCHERMO LCD PER PC O DVR VARI MARCHI   35.00      Ricondizionato
2                                Terra LCD/LED monitor 27" 2760w, Earphone, audio, HDMI, DVI, VGA   20.00     Di seconda mano
3                   Nuova inserzione22" LG Business monitor LED TFT 55,9 cm Nero USB ALTOPARLANTI   45.90      Ricondizionato
4                    LG 24mb56hq-b 60cm 24" IPS MONITOR LED HDMI VGA 5ms altezza regolabile, VESA   25.50     Di seconda mano
5                    MONITOR PC HP 22" ELITEDISPLAY E222 1920X1080 LED HD HDMI VGA DP USB GRADO A   80.00     Di seconda mano
6                Lenovo ThinkCentre tio24gen3 23,8 pollici Full HD IPS Monitor Led-Nero Nuovo OVP   66.00       Nuovo (Altro)
7                                        DELL E2216H 22" LED-LCD (TFT) TN FHD (1080p) del monitor   39.55      Ricondizionato

...
  • Related