import requests
from bs4 import BeautifulSoup
from itertools import zip_longest
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:94.0) Gecko/20100101 Firefox/94.0',
'Accept-Language': 'en-US, en;q=0.5'
}
amazn = requests.get("https://a.co/d/cTzyJwv",headers=headers)
amazn_src = amazn.content
soup =BeautifulSoup(amazn_src,"lxml")
gpu_s3r = soup.find_all("span",{"class":"a-price aok-align-center reinventPricePriceToPayMargin priceToPay"})
gpu_s3r_ap =[]
for i in range(len(gpu_s3r)):
gpu_s3r_ap.append(gpu_s3r[i].text)
the output is ['$389.00$389.00']
how i can make the gpu_s3r_ap making ['$389.00'] only this output
i mean how i can make it not duplicated
CodePudding user response:
Improve your selector:
[...]
for i in range(len(gpu_s3r)):
gpu_s3r_ap.append(gpu_s3r[i].select_one('span[]').text)
BeautifulSoup documentation: https://beautiful-soup-4.readthedocs.io/en/latest/
CodePudding user response:
You need to select the nested Span class, just add this line and use find()
gpu_s3r = gpu_s3r.find("span",{"class":"a-offscreen"})
full code
gpu_s3r = soup.find("span",{"class":"a-price aok-align-center
reinventPricePriceToPayMargin priceToPay"})
gpu_s3r = gpu_s3r.find("span",{"class":"a-offscreen"})
gpu_s3r_ap =[]
gpu_s3r_ap.append(gpu_s3r.text)
print(gpu_s3r_ap)
output
['$389.61']