Home > Net >  Selenium and chromedriver
Selenium and chromedriver

Time:06-06

I am scraping some data from enter image description here

This is my script:

from selenium import webdriver
import time
import pandas as pd

country = 'china'
ligue = 'cba'
year= '2021-2022'
url = 'https://www.flashscore.es/baloncesto/' country '/' ligue '/resultados/'
driver = webdriver.Chrome()
call = driver.get(url)
data = driver.find_elements_by_class_name('event__time')

data_clean = []
for i in range(len(data)):
    data_clean.append(data[i].text)

data_clean = [x.replace("\n", ";") for x in data_clean]
dataframe = pd.DataFrame(data_clean)
dataframe.to_csv(country ligue year '.csv', index=False)

CodePudding user response:

Split the text data with '\n' to get the first element of the div

Working code -

import time

import pandas as pd
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager

options = webdriver.ChromeOptions()

# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")

chrome_driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)


def flashscore_scraper():
    country = 'china'
    ligue = 'cba'
    year = '2021-2022'
    url = 'https://www.flashscore.es/baloncesto/'   country   '/'   ligue   '/resultados/'

    with chrome_driver as driver:
        driver.implicitly_wait(15)
        driver.get(url)

        data = driver.find_elements(By.CSS_SELECTOR, 'div.event__time')
        # or
        # driver.find_elements_by_class_name('event__time')
        event_time_data = [d.text for d in data]

        # pick the first element split by '\n'
        data_clean = [time.split('\n')[0] for time in event_time_data]
        dataframe = pd.DataFrame(data_clean)
        dataframe.to_csv(country   ligue   year   '.csv', index=False)


flashscore_scraper()


Output -

0
26.04. 13:35
24.04. 13:35
22.04. 13:35
20.04. 13:35
17.04. 13:35
16.04. 13:35
15.04. 13:35
14.04. 13:35
13.04. 13:35
12.04. 13:35
10.04. 13:35
10.04. 09:00
09.04. 13:35
09.04. 09:00
08.04. 13:35
08.04. 09:00
07.04. 13:35
07.04. 09:00
05.04. 13:35
04.04. 13:35
04.04. 09:00
03.04. 13:35
03.04. 09:00
02.04. 13:35
02.04. 09:00
01.04. 13:35
01.04. 09:00
22.03. 14:00
22.03. 14:00
22.03. 09:30
22.03. 09:30
22.03. 05:00
22.03. 05:00
21.03. 13:35
21.03. 13:35
21.03. 09:00
21.03. 09:00
20.03. 14:00
20.03. 14:00
20.03. 09:30
20.03. 09:30
20.03. 05:00
20.03. 05:00
19.03. 13:35
19.03. 13:35
19.03. 09:00
19.03. 09:00
18.03. 13:35
18.03. 13:35
18.03. 09:00
18.03. 09:00
17.03. 14:00
17.03. 14:00
17.03. 09:30
17.03. 09:30
17.03. 05:00
17.03. 05:00
16.03. 13:35
16.03. 13:35
15.03. 14:00
15.03. 14:00
15.03. 09:30
15.03. 09:30
15.03. 05:00
15.03. 05:00
14.03. 13:35
14.03. 13:35
13.03. 14:00
13.03. 14:00
13.03. 09:30
13.03. 09:30
13.03. 05:00
13.03. 05:00
12.03. 13:35
12.03. 13:35
12.03. 09:00
12.03. 09:00
11.03. 13:35
11.03. 13:35
11.03. 09:00
11.03. 09:00
10.03. 14:00
10.03. 14:00
10.03. 09:30
10.03. 09:30
10.03. 05:00
10.03. 05:00
09.03. 13:35
09.03. 13:35
08.03. 14:00
08.03. 13:35
08.03. 09:30
08.03. 09:00
08.03. 05:00
08.03. 05:00
07.03. 13:35
07.03. 13:35
06.03. 14:00
06.03. 13:35
06.03. 09:30
06.03. 09:00
06.03. 05:00
06.03. 05:00
05.03. 13:35
05.03. 13:35
05.03. 09:00
05.03. 09:00

  • Related