Home > Software engineering >  Python Web scraping, automatically clicking on "load more" button until no more button, an
Python Web scraping, automatically clicking on "load more" button until no more button, an

Time:09-22

I want to download all tables from this website (https://www.doctolib.fr/medecin-generaliste/paris?availabilities=3) that gathers all doctors in Paris. However, so as to get all names, you have to click on the button "afficher plus de résultats" many times until you can't and then scrap all tables (names, adresses etc...)

I tried with selenium method but I did not succeed in. Therefore, does someone know how to do it ? Does someone have some codes to do so ?

  • Mine does not work

from selenium import webdriver
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager

driver = webdriver.Chrome("/Users/XXXX/Desktop/chromedriver")

def executeTest():
    global driver
    driver.get('https://www.doctolib.fr/medecin-generaliste/paris?availabilities=3')
    time.sleep(7)
    element = driver.find_element_by_xpath('/html/body/div[3]/div/div[5]/div/div[1]/div[1]/div[2]/div[4]/div/div/button/span')
    element.click()
    time.sleep(3)

def startWebDriver():
    global driver
    options = Options()
    options.add_argument("--disable-infobars")
    driver = webdriver.Chrome(chrome_options=options)

if __name__ == "__main__":
    startWebDriver()
    executeTest()
    driver.quit()

'''

CodePudding user response:

You need to use infinite loop and check if button exist, if not break the loop. Then collect all the information.

code:

from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
import pandas as pd
import time


driver.get('https://www.doctolib.fr/medecin-generaliste/paris?availabilities=3')
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,"button#didomi-notice-agree-button>span"))).click() #Accept the cookie button
while(True):
    try:
       
        WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,"div.dl-card-content >button>span.dl-button-label")))
        button=driver.find_element(By.CSS_SELECTOR, "div.dl-card-content >button>span.dl-button-label")
        driver.execute_script("arguments[0].click();", button)
        time.sleep(1)        
    except:
        break

names=[name.text for name in driver.find_elements(By.CSS_SELECTOR, "div.dl-search-result-presentation h3[data-design-system='oxygen']")]
addresses=[address.text for address in driver.find_elements(By.CSS_SELECTOR, "div.dl-search-result-presentation div.dl-margin-l-96 >span")]
cityPostcode=[city.text for city in driver.find_elements(By.CSS_SELECTOR, "div.dl-search-result-presentation div.dl-margin-l-96 >div[class='dl-text dl-text-body dl-text-regular dl-text-s']")]
df=pd.DataFrame({"Name":names, "Address" : addresses, "City" : cityPostcode})
print(df)
df.to_csv("doctos.csv")

Output:

                                                 Name                        Address         City
0                 Centre de santé Kersanté Rosa Parks           72 Rue Cesária Évora  75019 Paris
1                          Dr Niloufar ASSEF-ZAMANIAN   12 Rue Notre Dame des Champs  75006 Paris
2                                   Dr Emilie COUPAUD      299/301 Rue de Belleville  75019 Paris
3   Centre de Santé Convention - Ministère des Aff...        27 Rue de la Convention  75015 Paris
4                                       Dr Marc WYDRA          4 Rue du Docteur Roux  75015 Paris
..                                                ...                            ...          ...
64                               Dr Audrey CORNILLEAU              7b Rue de Lesseps  75020 Paris
65                                   Dr André AZUELOS  43 Rue de la Chaussée d'Antin  75009 Paris
66                                  Dr Déborah SMADJA         113 Avenue Victor Hugo  75116 Paris
67                                   Dr Philippe Levy                   35 Rue Vital  75116 Paris
68                                  Institut Pasquier                44 Rue Pasquier  75008 Paris

[69 rows x 3 columns]
  • Related