Home > other >  Selenium timeoutexception error in one of the iterations
Selenium timeoutexception error in one of the iterations

Time:01-25

wait = WebDriverWait(driver, 20) #wait 20s to let webpage load

driver.get('https://beta.clinicaltrials.gov/') #getting website url

driver.maximize_window()
time.sleep(1)

country = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="content"]/div/ctg-home/div/div[2]/ctg-advanced-search-home/div[2]/div[1]/fieldset/div[2]/div[3]/ctg-location-search-input/form/div[2]/div/label')))
country.click()

searchBar = driver.find_element("id",'location-input')
searchBar.send_keys("Singapore") #input country name into searchBar

search_dropdown = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="mat-option-14"]/span'))) #wait till xpath is visible
search_dropdown.click()


search_button = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="content"]/div/ctg-home/div/div[2]/ctg-advanced-search-home/div[2]/div[2]/div/div[2]/button')))
search_button.click()


#finding filter button for recruiting status
filter_button = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="filter-button-statusGroup"]')))
filter_button.click()

#clicking on 'recruiting' status
recruiting = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="adv-check-status"]/div[2]/div[2]/div/label')))
recruiting.click()

#scraping each clinical trial details
clinical_trial = {} #empty dict to store details
name_list = []
phone_list = []
email_list = []
enrollment = []
condition_list = []

#loop to go through all the clinical trials in the search page (10 per page)
for i in range(1,11):
    time.sleep(2) #wait 2s to let page load
    xpath = '//*[@id="content"]/div/ctg-search-results/div[2]/div/div[2]/div/div[2]/div[1]/ctg-search-hit-card[{}]/div/header/a'.format(i)
    trials = driver.find_element("xpath", xpath)
    trials.click()
    #time.sleep(5) #wait 5s to let page load
    
    #getting contact person name
    name = wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="studyDetailsInfo"]/ctg-study-info/div/ctg-study-info-view/div/div[2]/ctg-study-contacts-and-locations/div/div/div/ctg-study-contact-info/p[1]/span')))
    name_list.append(name.text) #adding each name to the list
    
    #phone number of contact person
    phone = wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="studyDetailsInfo"]/ctg-study-info/div/ctg-study-info-view/div/div[2]/ctg-study-contacts-and-locations/div/div/div/ctg-study-contact-info/p[2]/span')))
    phone_list.append(phone.text) #adding each phone number to the list
    
    #email of contact person
    email = wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="studyDetailsInfo"]/ctg-study-info/div/ctg-study-info-view/div/div[2]/ctg-study-contacts-and-locations/div/div/div/ctg-study-contact-info/p[3]/ctg-study-contact-email/span/a')))
    email_list.append(email.text) #adding each email address to the list
    
    #number of enrollment
    enrollment_num = wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="studyDetailsInfo"]/ctg-study-info/div/ctg-study-info-view/div/div[1]/ctg-study-overview/div[3]/div[2]/div[3]/div[2]')))
    enrollment.append(enrollment_num.text) #adding each enrollment number to the list
    
    #condition of study
    conditions = wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="studyDetailsInfo"]/ctg-study-info/div/ctg-study-info-view/div/div[1]/ctg-study-overview/div[3]/div[2]/div[1]/div[2]')))
    condition_list.append(conditions.text) #adding conditions of the study to list
    
    driver.back() #return to search page


#adding all the different list details to the contact_details dict
clinical_trial["name"] = name_list
clinical_trial["phone_num"] = phone_list
clinical_trial["email_address"] = email_list
clinical_trial["Enrollment"] = enrollment
clinical_trial["Conditions"] = condition_list

I am having an issue with selenium somehow not finding the xpath for enrollment_num in the loop. The loop runs through the 10 clickable links on the webpage, however it gives a TimeoutException error at the 9th link. Why is that so? When i change the loop to iterate through 8 links instead of the usual 10 links, it works fine. Its just that one link which creates the error.

CodePudding user response:

Page number 9 is different from all the other pages. The difference is hard to spot. Tipp: to compare strings i use Notepad with the compare plugin. This page does not have these 2 elements:

  1. enrollment_num =...ctg-study-overview/div[3]/div[2]/di...'

    here it is:

    enrollment_num =...ctg-study-overview/div[2]/div[2]/di...

  2. conditions = ...ctg-study-overview/div[3]/di...

    here it is:

    ...ctg-study-overview/div[2]/di...

This is why it runs into a timeout. You could build a try:except:else around these to avoid the program from crashing. Below a quick fix. Of course you should tidy it up. I hope this helps.

# number of enrollment
try:
    enrollment_num = wait.until(EC.visibility_of_element_located((By.XPATH,
                                                                  '//*[@id="studyDetailsInfo"]/ctg-study-info/div/ctg-study-info-view/div/div[1]/ctg-study-overview/div[3]/div[2]/div[3]/div[2]')))
    enrollment.append(enrollment_num.text)  # adding each enrollment number to the list

except:
    print("enrollement div[3] but div[2]")
    enrollment_num = wait.until(EC.visibility_of_element_located((By.XPATH,
                                                                  '//*[@id="studyDetailsInfo"]/ctg-study-info/div/ctg-study-info-view/div/div[1]/ctg-study-overview/div[2]/div[2]/div[3]/div[2]')))
    enrollment.append(enrollment_num.text)  # adding each enrollment number to the list
else:
    pass


# condition of study
try:
    conditions = wait.until(EC.visibility_of_element_located((By.XPATH,
                                                              '//*[@id="studyDetailsInfo"]/ctg-study-info/div/ctg-study-info-view/div/div[1]/ctg-study-overview/div[3]/div[2]/div[1]/div[2]')))
    condition_list.append(conditions.text)  # adding conditions of the study to list
except:
    print("condition_list non div[3] but div[2]")
    conditions = wait.until(EC.visibility_of_element_located((By.XPATH,
                                                              '//*[@id="studyDetailsInfo"]/ctg-study-info/div/ctg-study-info-view/div/div[1]/ctg-study-overview/div[2]/div[2]/div[1]/div[2]')))
    condition_list.append(conditions.text)  # adding conditions of the study to list
else:
    pass
  • Related