I have a dataframe df which has two columns (PMID, DOI), I want to pass each PMID into a website as input value and get the DOI href link as output and store it in the second column (DOI), an example is shown below,
PMID | DOI
20022636 10.1016/j.molimm.2009.11.027
20023032 10.1128/JB.01375-09
2002360
2002352
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get("https://www.pmid2cite.com/pmid-to-doi-converter")
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//p[text()='Consent']"))).click()
for index, row in df.iterrows():
print(str(row['PMID']))
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#formInput"))).send_keys(row['PMID'])
driver.find_element_by_xpath("/html/body/div[5]/div[2]/form/button").click()
#print(driver.page_source)
#print(WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "/html/body/div[5]/div[3]/p[1]/span[2]/a"))).get_attribute('href'))
#print([my_elem.get_attribute("innerHTML") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "/html/body/div[5]/div[3]/p[1]/span[2]/a")))])
res = [my_elem.get_attribute("innerHTML") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "/html/body/div[5]/div[3]/p[1]/span[2]/a")))]
df.iloc[index, 1] = res[0]
print('Done')
CodePudding user response:
You are getting ElementNotInteractableException: Message: element not interactable
error because when webpages loaded a consent form
appear. you need to accept the consent form
then it will be able to interact the other elements.
Use WebDriverWait() and wait for consent form first to click and then the input element.
driver.get("https://www.pmid2cite.com/pmid-to-doi-converter")
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//p[text()='Consent']"))).click()
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#formInput"))).send_keys("20022636")
You need to import below libraries.
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
CodePudding user response:
Here's the answer, it returns DOI for every PMID's in a given dataframe.
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException, WebDriverException, NoSuchElementException
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get("https://www.pmid2cite.com/pmid-to-doi-converter")
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//p[text()='Consent']"))).click()
for index, row in df.iterrows():
try:
#print(str(row['PMID']))
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#formInput"))).send_keys(row['PMID'])
driver.find_element_by_xpath("/html/body/div[5]/div[2]/form/button").click()
res = [my_elem.get_attribute("innerHTML") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "/html/body/div[5]/div[3]/p[1]/span[2]/a")))]
print(res)
df.iloc[index, 1] = res[0]
except TimeoutException:
df.iloc[index, 1] = 'NA'
print('Done')
PMID DOI
0 277290620 10.1186/s13075-016-1133-8
1 110534327 10.1074/jbc.M004690200
2 97257504 NA
3 78270291 NA