Show error 'WebElement' object has no attribute 'startsWith'-CodePudding

import time
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager

options = webdriver.ChromeOptions()

# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")

chrome_driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)


def supplyvan_scraper():
    with chrome_driver as driver:
        driver.implicitly_wait(15)
        URL = 'https://www.ifep.ro/justice/lawyers/lawyerspanel.aspx'
        driver.get(URL)
        time.sleep(3)
        
        link=driver.find_elements(By.XPATH, "//div[@class='list-group']//a")
        for links in link:
            if(links.startsWith("https://www.ifep.ro/")):
                print(links.get_attribute("href"))

They show me error in these line there are some unwanted link and I want to remove it these is the page link

CodePudding user response：

This is because the WebElement is not a string. You have to first extract the text from the WebElement and then use startsWith on the resulting text.

Here is the complete code:

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time

chrome_options = Options()
# chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--window-size=1920x1080")
chrome_options.add_argument("--disable-extensions")

driver = webdriver.Chrome(executable_path="./chromedriver", options=chrome_options)
driver.get("https://www.ifep.ro/justice/lawyers/lawyerspanel.aspx")
driver.maximize_window()
time.sleep(3)

        
links = driver.find_elements_by_xpath("//div[@class='list-group']//a")
for link in links:
        link_href = link.get_attribute("href")
        if link_href.startswith("https://www.ifep.ro/"):
                print(link_href)

You can use this modified code only:

links = driver.find_elements_by_xpath("//div[@class='list-group']//a")
for link in links:
        link_href = link.get_attribute("href")
        if link_href.startswith("https://www.ifep.ro/"):
                print(link_href)

Output:

https://www.ifep.ro/justice/lawyers/LawyerFile.aspx?RecordId=33353&Signature=387599
https://www.ifep.ro/justice/lawyers/LawyerFile.aspx?RecordId=34493&Signature=387599
https://www.ifep.ro/justice/lawyers/LawyerFile.aspx?RecordId=15868&Signature=387599
https://www.ifep.ro/justice/lawyers/LawyerFile.aspx?RecordId=33526&Signature=387599
https://www.ifep.ro/justice/lawyers/LawyerFile.aspx?RecordId=33459&Signature=387599
https://www.ifep.ro/justice/lawyers/LawyerFile.aspx?RecordId=9100&Signature=387599
https://www.ifep.ro/justice/lawyers/LawyerFile.aspx?RecordId=27125&Signature=387599
https://www.ifep.ro/justice/lawyers/LawyerFile.aspx?RecordId=24811&Signature=387599
https://www.ifep.ro/justice/lawyers/LawyerFile.aspx?RecordId=1932&Signature=387599
https://www.ifep.ro/justice/lawyers/LawyerFile.aspx?RecordId=7746&Signature=387599
https://www.ifep.ro/justice/lawyers/LawyerFile.aspx?RecordId=18864&Signature=387599
https://www.ifep.ro/justice/lawyers/LawyerFile.aspx?RecordId=23966&Signature=387599
https://www.ifep.ro/justice/lawyers/LawyerFile.aspx?RecordId=3840&Signature=387599
https://www.ifep.ro/justice/lawyers/LawyerFile.aspx?RecordId=16192&Signature=387599
https://www.ifep.ro/justice/lawyers/LawyerFile.aspx?RecordId=16350&Signature=387599

CodePudding user response：

Trying to filter links by partial @href you're trying to solve an X-Y issue. There is no need to filter links- just use correct XPath to select required links:

links = driver.find_elements(By.XPATH, "links = driver.find_elements('xpath', "//td/div[@class='list-group']/a")")
for link in links:
    print(link.get_attribute("href"))

CodePudding user response：

Links have multiple attributes, target, location, text...

You most likely want text

 links.getText()....

should work