I want to download all tables from this website (https://www.doctolib.fr/medecin-generaliste/paris?availabilities=3) that gathers all doctors in Paris. However, so as to get all names, you have to click on the button "afficher plus de résultats" many times until you can't and then scrap all tables (names, adresses etc...)
I tried with selenium method but I did not succeed in. Therefore, does someone know how to do it ? Does someone have some codes to do so ?
- Mine does not work
from selenium import webdriver
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome("/Users/XXXX/Desktop/chromedriver")
def executeTest():
global driver
driver.get('https://www.doctolib.fr/medecin-generaliste/paris?availabilities=3')
time.sleep(7)
element = driver.find_element_by_xpath('/html/body/div[3]/div/div[5]/div/div[1]/div[1]/div[2]/div[4]/div/div/button/span')
element.click()
time.sleep(3)
def startWebDriver():
global driver
options = Options()
options.add_argument("--disable-infobars")
driver = webdriver.Chrome(chrome_options=options)
if __name__ == "__main__":
startWebDriver()
executeTest()
driver.quit()
'''
CodePudding user response:
You need to use infinite loop and check if button exist, if not break the loop. Then collect all the information.
code:
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
import pandas as pd
import time
driver.get('https://www.doctolib.fr/medecin-generaliste/paris?availabilities=3')
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,"button#didomi-notice-agree-button>span"))).click() #Accept the cookie button
while(True):
try:
WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,"div.dl-card-content >button>span.dl-button-label")))
button=driver.find_element(By.CSS_SELECTOR, "div.dl-card-content >button>span.dl-button-label")
driver.execute_script("arguments[0].click();", button)
time.sleep(1)
except:
break
names=[name.text for name in driver.find_elements(By.CSS_SELECTOR, "div.dl-search-result-presentation h3[data-design-system='oxygen']")]
addresses=[address.text for address in driver.find_elements(By.CSS_SELECTOR, "div.dl-search-result-presentation div.dl-margin-l-96 >span")]
cityPostcode=[city.text for city in driver.find_elements(By.CSS_SELECTOR, "div.dl-search-result-presentation div.dl-margin-l-96 >div[class='dl-text dl-text-body dl-text-regular dl-text-s']")]
df=pd.DataFrame({"Name":names, "Address" : addresses, "City" : cityPostcode})
print(df)
df.to_csv("doctos.csv")
Output:
Name Address City
0 Centre de santé Kersanté Rosa Parks 72 Rue Cesária Évora 75019 Paris
1 Dr Niloufar ASSEF-ZAMANIAN 12 Rue Notre Dame des Champs 75006 Paris
2 Dr Emilie COUPAUD 299/301 Rue de Belleville 75019 Paris
3 Centre de Santé Convention - Ministère des Aff... 27 Rue de la Convention 75015 Paris
4 Dr Marc WYDRA 4 Rue du Docteur Roux 75015 Paris
.. ... ... ...
64 Dr Audrey CORNILLEAU 7b Rue de Lesseps 75020 Paris
65 Dr André AZUELOS 43 Rue de la Chaussée d'Antin 75009 Paris
66 Dr Déborah SMADJA 113 Avenue Victor Hugo 75116 Paris
67 Dr Philippe Levy 35 Rue Vital 75116 Paris
68 Institut Pasquier 44 Rue Pasquier 75008 Paris
[69 rows x 3 columns]