Home > Net >  Python scraped data returns None
Python scraped data returns None

Time:10-05

so I was trying to scrape some data from e-commerce website but when I try to print out the name every single result is None, what is the solution??

import requests
from bs4 import BeautifulSoup


baseurl = 'https://www.yoox.com'

r = requests.get(baseurl)

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
}
productlinks = []
#this is for going through all pages in the website
for x in range(1):
    r = requests.get(f'https://www.yoox.com/us/men/shoponline/sneakers_c#/dept=shoesmen&gender=U&page={x}&attributes={'ctgr':['snkrs5']}&season=X')
    soup = BeautifulSoup(r.content, 'lxml')
    productlist = soup.find_all('div', class_='col-8-24')

    for item in productlist:
        for link in item.find_all('a', href=True):
            productlinks.append(baseurl   link['href'])

for link in productlinks:
    r = requests.get(link, headers=headers)
    soup = BeautifulSoup(r.content, 'lxml')
    name = soup.find('div', class_='brand font-bold text-uppercase')
    print(name)

CodePudding user response:

You pulling out the wrong class attribute of 'brand font-bold text-uppercase' which is not in the html. Also, you are doubling up on the links, so you may want to eliminate the duplicates. I had a different code to pull it, but changed to utilize our code.

import requests
from bs4 import BeautifulSoup
import re

baseurl = 'https://www.yoox.com'

r = requests.get(baseurl)

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
}
productlinks = []
#this is for going through all pages in the website
for x in range(1):
    url = f'https://www.yoox.com/us/men/shoponline/sneakers_c#/dept=shoesmen&gender=U&page={x}&attributes={'ctgr':['snkrs5']}&season=X'
    r = requests.get(url)
    soup = BeautifulSoup(r.content, 'html.parser')
    productlist = soup.find_all('div', class_='col-8-24')

    for item in productlist:
        for link in item.find_all('a', href=True):
            productlinks.append(baseurl   link['href'])

productlinks = [x for x in productlinks if 'SearchResult' in x]
for link in productlinks:
    r = requests.get(link, headers=headers)
    soup = BeautifulSoup(r.content, 'lxml')
    name = soup.find('h1', class_='MuiTitle3-title3').text
    current_price = soup.find('div', class_='MuiTitle4-title4 currentPrice_135Ct').text
    product_category = soup.find('h2', class_='MuiBody1-body1 microcat_q5Pet').text
    # print(product_category)
    product_list = {
        'link': link,
        'name': name,
        'current_price': current_price,
        'product-cateegory': product_category
    }

    print(product_list)

CodePudding user response:

Try this

import time

from bs4 import BeautifulSoup
from selenium import webdriver

from webdriver_manager.microsoft import EdgeChromiumDriverManager

driver = webdriver.Edge(EdgeChromiumDriverManager().install())
url = "https://www.yoox.com/us/men/shoponline/sneakers_c#/dept=shoesmen&gender=U&page=1&attributes={'ctgr':" \
      "['snkrs5']}&season=X "

driver.get(url)
time.sleep(3)

soup = BeautifulSoup(driver.page_source, 'html.parser')

links = soup.find_all('div', attrs={"class": "brand font-bold text-uppercase"})

# print(links)

for name in links:
    print(name.text)

will give

ADIDAS ORIGINALS
NEW BALANCE
NEW BALANCE
NEW BALANCE
NEW BALANCE
ADIDAS ORIGINALS
ADIDAS ORIGINALS
NEW BALANCE
NEW BALANCE
PUMA
ADIDAS ORIGINALS
ADIDAS ORIGINALS
ADIDAS ORIGINALS
ADIDAS ORIGINALS
ADIDAS ORIGINALS
ADIDAS ORIGINALS
ADIDAS ORIGINALS
ADIDAS ORIGINALS
ADIDAS ORIGINALS
PUMA
PUMA
ADIDAS ORIGINALS
ADIDAS ORIGINALS
ADIDAS ORIGINALS
ADIDAS ORIGINALS
ADIDAS ORIGINALS
ADIDAS ORIGINALS
ADIDAS
ADIDAS
ADIDAS ORIGINALS
ADIDAS ORIGINALS
ADIDAS
ADIDAS ORIGINALS
ADIDAS
NEW BALANCE
ADIDAS ORIGINALS
REEBOK
NEW BALANCE
ADIDAS
REEBOK
PUMA
REEBOK
REEBOK
ADIDAS ORIGINALS
NEW BALANCE
REEBOK
ADIDAS ORIGINALS
ADIDAS ORIGINALS
ADIDAS ORIGINALS
ADIDAS ORIGINALS
ADIDAS ORIGINALS
REEBOK
REEBOK
NEW BALANCE
PUMA
ADIDAS ORIGINALS
PUMA
NEW BALANCE
REEBOK
REEBOK
NEW BALANCE
REEBOK
REEBOK
ADIDAS ORIGINALS
REEBOK
PUMA
PS PAUL SMITH
CAMPER
PS PAUL SMITH
CAMPER
CAMPER
CAMPER
SELECTED HOMME
JACK & JONES
JACK & JONES
ARTIGIANI AURELIO GIOCONDI
ARTIGIANI AURELIO GIOCONDI
MALDINI
LEMARÉ
MALDINI
LEMARÉ
STEFANO BONFIGLIOLI
ARTIGIANI AURELIO GIOCONDI
KENZO
CAMPER
SELECTED HOMME
STEFANO BONFIGLIOLI
STEFANO BONFIGLIOLI
STEFANO BONFIGLIOLI
CAMPER
KENZO
PS PAUL SMITH
FILLING PIECES
CAMPER
PS PAUL SMITH
RARE
RARE
ZESPÀ
CALVIN KLEIN JEANS
CAMPER
CAMPER
CAMPER
CAMPER
LEMARÉ
LEMARÉ
LEMARÉ
FILLING PIECES
ARTIGIANI AURELIO GIOCONDI
SELECTED HOMME
SELECTED HOMME
SELECTED HOMME
SELECTED HOMME
KENZO
PS PAUL SMITH
PS PAUL SMITH
VIVIENNE WESTWOOD
SELECTED HOMME
PS PAUL SMITH
KENZO
CAMPER
  • Related