I am writing a selenium based webscraper in python and it keeps throwing:
no such element: unable to locate element
Even though I can see the element in the selenium browser that is launched. here is the link it keeps failing on: https://www.neimanmarcus.com/p/givenchy-g-chain-ring-prod250190244?childItemId=NMY5X1R_&navpath=cat000000_cat4870731_cat50910737_cat2650734&page=0&position=11
Here is my code for the driver:
def getDriver():
try:
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(ChromeDriverManager().install(), chrome_options=options)
return driver
except Exception:
traceback.print_exc()
print(Exception)
Here is the scraper:
def getProduct(domain, url):
# find the yaml file for the domain
driver = getDriver()
noYaml = False
try:
markupPath = 'markups/%s.yml' % domain
except Exception as error:
noYaml = True
if(noYaml == False):
with open(markupPath, 'r') as file:
yamlElements = yaml.safe_load(file)
titleXpath = '//span[contains(concat(" ",normalize-space(@class)," ")," Titlestyles__ProductName-fRyAwr ")]'
priceXpath = '//span[contains(concat(" ",normalize-space(@class)," ")," Pricingstyles__RetailPrice-eYMMwV ")]'
print("XXXXXXPATHHHHHHHHHHHHHS: ",titleXpath, priceXpath)
driver.get(url)
driver.implicitly_wait(10)
try:
# WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH,titleXpath)))
title = driver.find_element(By.XPATH, titleXpath)
except Exception as error:
print("ERROR: ", error)
title = None
try:
# WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH,priceXpath)))
price = driver.find_element_by_xpath(priceXpath)
# price = driver.find_element(By.XPATH,priceXpath)
print(price)
except Exception as error:
print("ERRROR: ", error)
price = None
driver.execute_script("window.stop();")
data={}
if(title != None):
print(title.get_attribute('innerHTML'))
data['title'] = title.get_attribute('innerHTML')
if(price != None):
print(price.get_attribute('innerHTML'))
data['price'] = price.get_attribute('innerHTML')
if 'price' not in data or data['price'] == "" or data['price'] == None:
driver.quit()
return False
getProduct(domain, url)
driver.quit()
return data```
I have been stuck here for a while and can't figure out why it is throwing an error
Message: no such element: Unable to locate element: {"method":"xpath","selector":"//span[contains(concat(" ",normalize-space(@class)," ")," Pricingstyles__RetailPrice-eYMMwV ")]"}
CodePudding user response:
It seems like your xpaths are missing the last bit of the string. You may want to consider simplifying them as well by doing this:
titleXpath = '//span[@]'
priceXpath = '//span[@]'
And also, if you are still recieving the error you may want to look into adding a user agent to your webdriver options. Hope this helps
CodePudding user response:
You are having trouble debugging your xpaths, and I am too--they are hard to read. These CSS Selectors work for me:
CssSelectorTitle = 'span.Titlestyles__ProductName-fRyAwr'
CssSelectorPrice = 'span.Pricingstyles__RetailPrice-eZcFGu'
CodePudding user response:
It appears that website will automatically redirect visitor according to the country determined from visitor's IP, and it will also change the classes for item name and price accordingly. Here is a more robust solution, which will account for such changes, and will also wait for the element to load in page:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)
url='https://www.neimanmarcus.com/en-jp/p/givenchy-g-chain-ring-prod250190244?childItemId=NMY5X1R_&navpath=cat000000_cat4870731_cat50910737_cat2650734&page=0&position=11'
browser.get(url)
try:
WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.ID, "closeButton"))).click()
except Exception as e:
print('no pop-up, moving on')
title = WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.XPATH, "//span[@data-test='pdp-title']")))
price = WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[@data-test='pdp-pricing']")))
print(title.text.strip(), price.text.strip())
Result:
G-Chain Ring JPY 46709
CodePudding user response:
The second part of the xpaths i.e. fRyAwr
, eYMMwV
are dynamically generated and is bound to change sooner/later. They may change next time you access the application afresh or even while next application startup. So can't be used in locators.
Solution
To print the product name and price instead of presence_of_element_located() you need to induce WebDriverWait for the visibility_of_element_located() and you can use either of the following locator strategies:
Using CSS_SELECTOR:
driver.execute("get", {'url': 'https://www.neimanmarcus.com/en-in/p/givenchy-g-chain-ring-prod250190244?childItemId=NMY5X1R_&navpath=cat000000_cat4870731_cat50910737_cat2650734&page=0&position=11'}) # Close Now shipping to India popup WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a#closeButton"))).click() # Close cookie consent WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div.close_button_panel>i.close_icon"))).click() print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "span[data-test='pdp-title']"))).text) print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div[data-test='pdp-pricing']>span"))).text)
Using XPATH:
driver.execute("get", {'url': 'https://www.neimanmarcus.com/en-in/p/givenchy-g-chain-ring-prod250190244?childItemId=NMY5X1R_&navpath=cat000000_cat4870731_cat50910737_cat2650734&page=0&position=11'}) # Close Now shipping to India popup WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//a[@id='closeButton']"))).click() # Close cookie consent WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[@class='close_button_panel']/i[@class='close_icon']"))).click() print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//span[@data-test='pdp-title']"))).text) print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//div[@data-test='pdp-pricing']/span"))).text)
Console output:
G-Chain Ring INR 27573
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC