Using this url I want to locate div tags which has attribute data-asin
. When I use //div[@data-asin]
in Chrome Inspect mode it gives 21 elements. But while trying to get these elements via Selenium in both ways, explicit wait and direct length gives 0. As I guess Selenium remote browser is unable to get anyone of these elements as a DOM tree. code is below
import pandas as pd
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
#reading from csv file url-s
def readCSV(path_csv):
df=pd.read_csv(path_csv)
return df
fileCSV=readCSV(r'C:\Users\Admin\Downloads\urls.csv')
length_of_column_urls=fileCSV['linkamazon'].last_valid_index()
def create_driver():
chrome_options = Options()
chrome_options.headless = True
chrome_options.add_argument("start-maximized")
# options.add_experimental_option("detach", True)
chrome_options.add_argument("--no-sandbox")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
webdriver_service = Service(r'C:\Users\Admin\Downloads\chromedriver107v\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
return driver
#going to urls 1-by-1
def goToUrl_Se(driver):
global counter
counter = 0
for i in range(0, length_of_column_urls 1):
xUrl = fileCSV.iloc[i, 1]
print(xUrl,i)
# going to url(amazn) via Selenium WebDriver
driver.get(xUrl)
parse_data()
counter =1
driver.quit()
#fetch-parse the data from url page
def parse_data():
global asin, title, bookform, priceNewProd,author
wait=WebDriverWait(driver,timeout=77)
try:
x_index=wait.until(EC.visibility_of_all_elements_located((By.TAG_NAME,'//div[@data-asin]')))###Attention here
print(len(x_index))
except:
y_index=driver.find_elements(By.TAG_NAME,'//div[@data-asin]')###Anf attention here
print(len(y_index))
driver=create_driver()
goToUrl_Se(driver)
CodePudding user response:
You have to mention XPATH not TAG_NAME:
try:
x_index=wait.until(EC.visibility_of_all_elements_located((By.XPATH,'//div[@data-asin]')))###Attention here
print(len(x_index))
except:
y_index=driver.find_elements(By.XPATH,'//div[@data-asin]')###Anf attention here
print(len(y_index))