So, for some reason when I try and get the results for this script, it just crashes and shows no error at all before I get anything, someone please help me to get this to work. I don't know why this is, I think it may have to do with getting the Items Variable in some regard, but I just can't figure it out! Any help would be appreciated.
Here Is The Script:
from bs4 import BeautifulSoup
import requests
import re
import time
print("Computer Deal Finder")
print("\nBy: ViridianTelamon.")
print("\nThis Program Will Help You Find The Best Computers, Adapters, Electronics, And Computer Components Using The Website New Egg.")
item_thing = input("\nEnter The Item You Want To Find The Best Deals On: ")
time.sleep(2)
#url = f"https://www.amazon.com/s?k={item}&page=1&crid=1BE844NMMQSV7&sprefix={item},aps,1923&ref=nb_sb_noss_1"
url = f"https://www.newegg.ca/p/pl?d={item_thing}&N=4131"
page = requests.get(url).text
doc = BeautifulSoup(page, "html.parser")
#page_text = doc.find(class_="s-pagination-item s-pagination-selected")
page_text = doc.find(class_="list-tool-pagination-text").strong
pages = int(str(page_text).split("/")[-2].split(">")[-1][:-1])
items_found = []
for page in range(1, pages 1):
url = f"https://www.newegg.ca/p/pl?d={item_thing}&N=4131page={page}"
page = requests.get(url).text
doc = BeautifulSoup(page, "html.parser")
items = doc.find_all(text=re.compile(item_thing))
#items = div.find_all(text=re.compile(item_thing))
for item in items:
parent = item.parent
link = None
if parent.name != "a":
continue
link = parent['href']
next_parent = item.find_parent(class_="item-container")
try:
price = next_parent.find(class_="price-current").find("strong").string
items_found[item] = {"Price: ": int(price.replace(",", "")), "URL: ": link}
except:
pass
#sorted_items = sorted(items_found.items(), key=lambda x: x[1]['price'])
sorted_items = sorted(items_found, key=lambda x: x[1]['price'])
print("\n--------------------")
for item in sorted_items:
print("\n"f"Name: {item[0]}")
print("\n"f"Price: ${items[1]['price']}")
print("\n"f"URL: items[1]['link']")
print("\n--------------------")
time.sleep(0.2)
CodePudding user response:
most likely the site is generated by javascript, since javascript is only generated on client side, data cannot be scrapped by bs4.
Maybe this link will be useful:
https://pythonprogramming.net/javascript-dynamic-scraping-parsing-beautiful-soup-tutorial/
CodePudding user response:
I suggest you test the result of your .find()
calls as not all items contain the information you need. For example:
from bs4 import BeautifulSoup
import requests
import re
import time
item_thing = "adapter"
url = f"https://www.newegg.ca/p/pl?d={item_thing}&N=4131"
page = requests.get(url).text
doc = BeautifulSoup(page, "html.parser")
page_text = doc.find(class_="list-tool-pagination-text").strong
pages = int(str(page_text).split("/")[-2].split(">")[-1][:-1])
items_found = []
for page in range(1, pages 1):
print(f"Getting page {page}")
url = f"https://www.newegg.ca/p/pl?d={item_thing}&N=4131&page={page}"
req = requests.get(url)
doc = BeautifulSoup(req.content, "html.parser")
for div in doc.find_all('div', class_="item-container"):
li_price = div.find(class_='price-current')
price = 0 # assume unknown price
if li_price:
strong = li_price.find('strong')
if strong:
price = float(strong.text.replace(',', ''))
a_tag = div.find('a', class_='item-title', href=True)
items_found.append([price, a_tag['href'], a_tag.text])
for price, link, name in sorted(items_found):
print(f"Name: {name}")
print(f"Price: ${price}")
print(f"URL: {link}")
print("--------------------")
This would give you results starting:
Name: axGear Universal Brass 3.5mm Male to 6.5mm Female Stereo Audio Adapter Jack Connector
Price: $3.0
URL: https://www.newegg.ca/p/231-0099-00023?Description=adapter&cm_re=adapter-_-9SIAD1NC9E3870-_-Product
--------------------
Name: axGear USB-C Female to USB 3.0 Male Adapter Converter Type C to USB 3 F/M
Price: $7.0
URL: https://www.newegg.ca/p/231-0099-00018?Description=adapter&cm_re=adapter-_-9SIAD1NB4E4533-_-Product
--------------------
Name: ORICO USB to Bluetooth 4.0 Portable Adapter Wireless Receiver Adapter Dongle -White
Price: $8.0
URL: https://www.newegg.ca/orico-bta-403/p/0XM-000H-00009?Description=adapter&cm_re=adapter-_-0XM-000H-00009-_-Product
--------------------