I am try to scrape title
they will go inside every link and scrape the title but they will show me error
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from time import sleep
PATH="C:\Program Files (x86)\chromedriver.exe"
url='https://www.supplyvan.com/power-tools/cordless-powertools/cordless-drills.html'
driver =webdriver.Chrome(PATH)
wait = WebDriverWait(driver, 20)
driver.get(url)
list_button = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "h4.card-title"))).click()
title=driver.find_element_by_xpath('h1').text()
print(title)
CodePudding user response:
You need to change the selector to get the h1
tag text.
In this snippet, the scraper will visit the first link and print the title
# click the single link
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "h4.card-title"))).click()
time.sleep(2)
# parse the h1 tag text
title = driver.find_element(By.CSS_SELECTOR, 'h1.productView-title').text
print(title)
The driver will visit every link and scrape the title
# parse all the links
page_links = [element.get_attribute('href') for element in
driver.find_elements(By.CSS_SELECTOR, "h4.card-title > a")]
# visit all the links
for link in page_links:
driver.get(link)
time.sleep(2)
title = driver.find_element(By.CSS_SELECTOR, 'h1.productView-title').text
# parse title for all the links
print(title)
time.sleep(2)
full code with both snippet included -
import time
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
options = webdriver.ChromeOptions()
# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
chrome_driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)
def supplyvan_scraper():
with chrome_driver as driver:
driver.implicitly_wait(15)
URL = 'https://www.supplyvan.com/power-tools/cordless-powertools/cordless-drills.html'
driver.get(URL)
time.sleep(3)
# opt #1 visit first link, print the title uncomment to see
# click the single link
# WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "h4.card-title"))).click()
# time.sleep(2)
#
# # parse the h1 tag text
# title = driver.find_element(By.CSS_SELECTOR, 'h1.productView-title').text
# print(title)
# opt #2 visit all links, print titles
# parse all the links
page_links = [element.get_attribute('href') for element in
driver.find_elements(By.CSS_SELECTOR, "h4.card-title > a")]
# visit all the links
for link in page_links:
driver.get(link)
time.sleep(2)
title = driver.find_element(By.CSS_SELECTOR, 'h1.productView-title').text
# parse title for all the links
print(title)
# driver.back()
time.sleep(2)
time.sleep(2)
driver.quit()
supplyvan_scraper()
Output for all the visited links -
Bosch Professional Cordless Drill, GSR-120-Li, 12V, Blue/Black
Makita LXT Cordless Drill Driver, DDF481RTJ, 18V, 13MM
Bosch Cordless Drill, GSR-1000, 10.8V
.....