I am not getting price they give me empty output this is page link https://www.amazon.com/dp/B00M0DWQYI?th=1
from selenium import webdriver
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.select import Select
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
url='https://www.amazon.com/dp/B00M0DWQYI?th=1'
PATH="C:\Program Files (x86)\chromedriver.exe"
driver =webdriver.Chrome(PATH)
driver.get(url)
item=dict()
try:
item['price'] = driver.find_element(By.XPATH, "//div[@id='corePrice_feature_div'] //span[@class='a-offscreen']").text
except:
item['price']=''
print(item)
CodePudding user response:
You may want to wait for that element to properly load, prior to locating it:
[...]
wait = WebDriverWait(driver, 10)
item['price'] = wait.until(EC.element_to_be_clickable((By.XPATH, "//div[@id='corePrice_feature_div']//span[@class='a-offscreen']"))).text
Selenium documentation can be found at https://www.selenium.dev/documentation/
EDIT: Here is a complete example of how you can get that information:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import time as t
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument("window-size=1920,1080")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
wait = WebDriverWait(driver, 5)
items = dict()
driver.get('https://www.amazon.com/dp/B00M0DWQYI?th=1')
t.sleep(1)
driver.refresh()
items['price'] = wait.until(EC.element_to_be_clickable((By.XPATH, '//div[@id="corePrice_feature_div"]//span[@]'))).text.replace('\n', '.')
print(items)
Result in terminal:
{'price': '$32.98'}
CodePudding user response:
Solution with bs4 just by injecting cookie and user-agent as headers.
from bs4 import BeautifulSoup
import requests
url = "https://www.amazon.com/dp/B00M0DWQYI?th=1"
headers = {
'cookie':'csm-sid=710-2771389-1033974',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
}
resp=requests.get(url,headers=headers)
soup = BeautifulSoup(resp.content, 'html.parser')
price = soup.select_one('span[] > span').text
print(price)
Output:
$32.98
CodePudding user response:
You need to wait for element visibility and then to extract it's text.
The following Selenium code works:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
url = 'https://www.amazon.com/dp/B00M0DWQYI'
driver.get(url)
wait = WebDriverWait(driver, 10)
print(wait.until(EC.visibility_of_element_located((By.XPATH, "//div[@id='corePrice_feature_div']"))).text)
The output is
$32
98