import urllib3
import certifi
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
import requests
from bs4 import BeautifulSoup
import time
import ssl
http = urllib3.PoolManager(ca_certs=certifi.where())
chrome_options = Options()
chrome_options.add_argument("--incognito")
driver = webdriver.Chrome(options=chrome_options, executable_path="D:\\python works\\driver\\chromedriver.exe")
URL= "https://physicians.wustl.edu/people/christopher-d-abraham-md/"
driver.get(URL)
time.sleep(10)
a = driver.find_element_by_xpath("//div[@class='wuphys-ppl affiliations']").text
print(a)
time.sleep(10)
HTML
<div >
<h3>Hospital Affiliations</h3>
<ul><li>Barnes-Jewish Hospital</li>
<li>Barnes-Jewish West County Hospital</li>
<li>Christian Hospital - Northeast</li></ul>
</div>
**Required_output:**
Barnes-Jewish Hospital
Barnes-Jewish West County Hospital
Christian Hospital - Northeast
The output should be Barnes-Jewish Hospital, Barnes-Jewish West County Hospital, Christian Hospital - Northeast. How to get the list from h3 tag using selnium
CodePudding user response:
Your xpath
should look like this, to get only the texts from the <li>
and not from the <h3>
:
//div[@class='wuphys-ppl affiliations']/ul//li
To get all texts you have to use find_elements_by_xpath()
ant iterat over ResultSet
:
a = driver.find_elements_by_xpath("//div[@class='wuphys-ppl affiliations']/ul//li")
for e in a:
print(e.text)
Output:
Barnes-Jewish Hospital
Barnes-Jewish West County Hospital
Christian Hospital - Northeast