Issue With Web Scraping In Python-CodePudding

So, for some reason when I try and get the results for this script, it just crashes and shows no error at all before I get anything, someone please help me to get this to work. I don't know why this is, I think it may have to do with getting the Items Variable in some regard, but I just can't figure it out! Any help would be appreciated.

Here Is The Script:

from bs4 import BeautifulSoup
import requests
import re
import time

print("Computer Deal Finder")

print("\nBy:  ViridianTelamon.")

print("\nThis Program Will Help You Find The Best  Computers, Adapters, Electronics, And Computer Components Using The Website New Egg.")

item_thing = input("\nEnter The Item You Want To Find The Best Deals On:  ")

time.sleep(2)

#url = f"https://www.amazon.com/s?k={item}&page=1&crid=1BE844NMMQSV7&sprefix={item},aps,1923&ref=nb_sb_noss_1"

url = f"https://www.newegg.ca/p/pl?d={item_thing}&N=4131"
page = requests.get(url).text
doc = BeautifulSoup(page, "html.parser")

#page_text = doc.find(class_="s-pagination-item s-pagination-selected")

page_text = doc.find(class_="list-tool-pagination-text").strong
pages = int(str(page_text).split("/")[-2].split(">")[-1][:-1])

items_found = []

for page in range(1, pages   1):
    url = f"https://www.newegg.ca/p/pl?d={item_thing}&N=4131page={page}"
    page = requests.get(url).text
    doc = BeautifulSoup(page, "html.parser")

    items = doc.find_all(text=re.compile(item_thing))

    #items = div.find_all(text=re.compile(item_thing))

    for item in items:
        parent = item.parent
        link = None
        if parent.name != "a":
            continue

        link = parent['href']

        next_parent = item.find_parent(class_="item-container")
        try:
            price = next_parent.find(class_="price-current").find("strong").string
            items_found[item] = {"Price:  ": int(price.replace(",", "")), "URL:  ": link}
        except:
            pass

#sorted_items = sorted(items_found.items(), key=lambda x: x[1]['price'])
sorted_items = sorted(items_found, key=lambda x: x[1]['price'])

print("\n--------------------")

for item in sorted_items:
    print("\n"f"Name:  {item[0]}")
    print("\n"f"Price:  ${items[1]['price']}")
    print("\n"f"URL:  items[1]['link']")
    print("\n--------------------")
    time.sleep(0.2)

CodePudding user response：

most likely the site is generated by javascript, since javascript is only generated on client side, data cannot be scrapped by bs4.

Maybe this link will be useful:

https://pythonprogramming.net/javascript-dynamic-scraping-parsing-beautiful-soup-tutorial/

CodePudding user response：

I suggest you test the result of your .find() calls as not all items contain the information you need. For example:

from bs4 import BeautifulSoup
import requests
import re
import time

item_thing = "adapter"

url = f"https://www.newegg.ca/p/pl?d={item_thing}&N=4131"
page = requests.get(url).text
doc = BeautifulSoup(page, "html.parser")
page_text = doc.find(class_="list-tool-pagination-text").strong
pages = int(str(page_text).split("/")[-2].split(">")[-1][:-1])
items_found = []

for page in range(1, pages   1):
    print(f"Getting page {page}")
    url = f"https://www.newegg.ca/p/pl?d={item_thing}&N=4131&page={page}"
    req = requests.get(url)
    doc = BeautifulSoup(req.content, "html.parser")

    for div in doc.find_all('div', class_="item-container"):
        li_price = div.find(class_='price-current')
        price = 0   # assume unknown price
        
        if li_price:
            strong = li_price.find('strong')
            
            if strong:
                price = float(strong.text.replace(',', ''))
            
        a_tag = div.find('a', class_='item-title', href=True)
        items_found.append([price, a_tag['href'], a_tag.text])
        
for price, link, name in sorted(items_found):
    print(f"Name:  {name}")
    print(f"Price:  ${price}")
    print(f"URL:  {link}")
    print("--------------------")

This would give you results starting:

Name:  axGear Universal Brass 3.5mm Male to 6.5mm Female Stereo Audio Adapter Jack Connector
Price:  $3.0
URL:  https://www.newegg.ca/p/231-0099-00023?Description=adapter&cm_re=adapter-_-9SIAD1NC9E3870-_-Product
--------------------
Name:  axGear USB-C Female to USB 3.0 Male Adapter Converter Type C to USB 3 F/M
Price:  $7.0
URL:  https://www.newegg.ca/p/231-0099-00018?Description=adapter&cm_re=adapter-_-9SIAD1NB4E4533-_-Product
--------------------
Name:  ORICO USB to Bluetooth 4.0 Portable Adapter Wireless Receiver Adapter Dongle -White
Price:  $8.0
URL:  https://www.newegg.ca/orico-bta-403/p/0XM-000H-00009?Description=adapter&cm_re=adapter-_-0XM-000H-00009-_-Product
--------------------