import time
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
chrome_driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)
def supplyvan_scraper():
with chrome_driver as driver:
driver.implicitly_wait(15)
URL = 'https://zoekeenadvocaat.advocatenorde.nl/zoeken?q=&type=advocaten&limiet=10&sortering=afstand&filters[rechtsgebieden]=[]&filters[specialisatie]=0&filters[toevoegingen]=0&locatie[adres]=Holland&locatie[geo][lat]=52.132633&locatie[geo][lng]=5.291266&locatie[straal]=56&locatie[hash]=67eb2b8d0aab60ec69666532ff9527c9&weergave=lijst&pagina=1'
driver.get(URL)
time.sleep(3)
page_links = [element.get_attribute('href') for element in
driver.find_elements(By.XPATH, "//span[@class='h4 no-margin-bottom']//a")]
# visit all the links
for link in page_links:
driver.get(link)
time.sleep(2)
try:
title = driver.find_element(By.CSS_SELECTOR, '.title h3').text
except:
pass
details=driver.find_elements(By.XPATH,"//section[@class='lawyer-info']")
for detail in details:
try:
email=detail.find_element(By.XPATH, "//div[@class='row'][3]//div[@class='column small-9']").get_attribute('href')
except:
pass
try:
website=detail.find_element(By.XPATH, "//div[@class='row'][4]//div[@class='column small-9']").get_attribute('href')
except:
pass
print(title,email,website)
# driver.back()
time.sleep(2)
time.sleep(2)
driver.quit()
supplyvan_scraper()
I am trying to scrape email and website but they will give me none
for website
and email
the What suitable Xpath for email and website is there any fesiable kindly tell us this is page link
CodePudding user response:
Change xpath for email:
//div[@class='row'][3]//div[@class='column small-9']/a
and for website:
//div[@class='row'][4]//div[@class='column small-9']/a