Home > Software design >  How to write scraped data to multiple columns in excel with pandas data frames?
How to write scraped data to multiple columns in excel with pandas data frames?

Time:09-19

How to scrape data to columns A, B, C and D? At the moment it is scraping only in the same column Team and Team odds instead of Team, Team Odds, Lay Team and Lay odds. How to fix scraping to same column instead of multiple where I am wanting to append this data. Thanks ###########################################################################################################################################################

options.add_argument('--start-maximized')
options.add_experimental_option("detach", True)
service = Service('driver/chromedriver.exe')
driver = webdriver.Chrome(service=Service(webdriver_manager.chrome.ChromeDriverManager().install()), options=options)
driver.get('https://www.betfair.com.au/exchange/plus/en/football-betting-1/13')
WebDriverWait(driver, 100).until(lambda driver: driver.execute_script('return document.readyState') == 'complete')
element = WebDriverWait(driver,20).until(lambda driver: driver.find_element(By.CSS_SELECTOR, ".name:nth-child(1) , bf-coupon-table:nth-child(1) tr:nth-child(1) .matched-amount-value"))
print('h')

scroll = driver.find_element(By.CSS_SELECTOR, ".coupon-page-navigation__label--next")
driver.execute_script("arguments[0].scrollIntoView();", scroll)

try:
    driver.find_element(By.CSS_SELECTOR, "#cookie-holder span").click()
except:
    pass

i = 1
while i < 2:
    try:
        time.sleep(2)
        #
        #bf-coupon-page-navigation > ul > li:nth-child(4) > abf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']
        #driver.find_elements(By.CSS_SELECTOR, "bf-coupon-page-navigation > ul > li:nth-child(4) > abf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']")
        WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR,  ".name:nth-child(1)")))
        WebDriverWait(driver, 15).until(lambda driver: driver.execute_script('return document.readyState') == 'complete')
        element = WebDriverWait(driver, 15).until(lambda driver: driver.find_element(By.CSS_SELECTOR, ".name:nth-child(1) , bf-coupon-table:nth-child(1) tr:nth-child(1) .matched-amount-value"))
        scroll = driver.find_element(By.CSS_SELECTOR, ".coupon-page-navigation__label--next")
        driver.execute_script("arguments[0].scrollIntoView();", scroll)
        NextStory = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'bf-coupon-page-navigation > ul > li:nth-child(4) > a')))
        match = driver.find_element(By.CSS_SELECTOR, ".name:nth-child(1)")

        matches = driver.find_elements(By.CSS_SELECTOR, "CSS_SELECTOR")

        Team1 = driver.find_elements(By.CSS_SELECTOR, ".name:nth-child(1)")
        for value in Team1:
            print(value.text)

        BackOdds = driver.find_elements(By.CSS_SELECTOR, ".coupon-runner:nth-child(1) .back-selection-button .bet-button-price")
        for value2 in BackOdds:
            print(value2.text)

        Team2 = driver.find_elements(By.CSS_SELECTOR, ".name:nth-child(2)")
        for value1 in Team2:
            print(value1.text)

        LayOdds = driver.find_elements(By.CSS_SELECTOR, ".coupon-runner:nth-child(1) .lay-selection-button .bet-button-price")
        for value3 in LayOdds:
            print(value3.text)
        #bf-coupon-page-navigation > ul > li:nth-child(4) > a
        driver.find_elements(By.CSS_SELECTOR, "bf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']")

        WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.CSS_SELECTOR,  "bf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']")))

        NextStory.click()


        for match in matches:
            #print(match.text)
            driver.find_elements(By.CSS_SELECTOR, "bf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']")
            WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.CSS_SELECTOR, "bf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']")))
            NextStory.click()
    except:
        i = 6
        print('error')
        pass

#PANDA'S SCRAPING TO EXCEL
########################################################################################################################################################################
ls = []

for value in Team1:
  df1 = ls.append(value.text)

for value2 in BackOdds:
  df2 = ls.append(value2.text)

for value1 in Team2:
  df3 = ls.append(value1.text)

for value3 in LayOdds:
  df4 = ls.append(value3.text)

df1 = pd.DataFrame(data=ls)
df2 = pd.DataFrame(data=ls)
df3 = pd.DataFrame(data=ls)
df4 = pd.DataFrame(data=ls)
#print(df1)
#print(df2)
#print(df3)
#print(df4)

#df1.to_excel('output.xlsx', engine='openpyxl', sheet_name='Sheet_name_1', startcol=1)
#df2.to_excel('output.xlsx', engine='openpyxl', sheet_name='Sheet_name_1', startcol=2)
#df3.to_excel('output.xlsx', engine='openpyxl', sheet_name='Sheet_name_1', startcol=3)
#df4.to_excel('output.xlsx', engine='openpyxl', sheet_name='Sheet_name_1', startcol=4)

with pd.ExcelWriter('output.xlsx', engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    df1.to_excel(writer, engine='openpyxl', sheet_name='Sheet_name_1', startcol=2)
    df2.to_excel(writer, engine='openpyxl', sheet_name='Sheet_name_1', startcol=2)
    df3.to_excel(writer, engine='openpyxl', sheet_name='Sheet_name_1', startcol=3)
    df4.to_excel(writer, engine='openpyxl', sheet_name='Sheet_name_1', startcol=4)

print('Script Completed')

CodePudding user response:

Well, I probably wouldn't have done the code this way (I there's an api request to get the data, don't need to use Selenium, although Selenium will work see sample of the data here in nice json format). But this will get you the data into lists. Then use pandas to construct the dataframe.

Code:

...

try:
    driver.find_element(By.CSS_SELECTOR, "#cookie-holder span").click()
except:
    pass


team1List = []
backOddsList = []
team2List = []
layOddsList = []

i = 1
while i < 2:
    try:
        time.sleep(2)
        #
        #bf-coupon-page-navigation > ul > li:nth-child(4) > abf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']
        #driver.find_elements(By.CSS_SELECTOR, "bf-coupon-page-navigation > ul > li:nth-child(4) > abf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']")
        WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR,  ".name:nth-child(1)")))
        WebDriverWait(driver, 15).until(lambda driver: driver.execute_script('return document.readyState') == 'complete')
        element = WebDriverWait(driver, 15).until(lambda driver: driver.find_element(By.CSS_SELECTOR, ".name:nth-child(1) , bf-coupon-table:nth-child(1) tr:nth-child(1) .matched-amount-value"))
        scroll = driver.find_element(By.CSS_SELECTOR, ".coupon-page-navigation__label--next")
        driver.execute_script("arguments[0].scrollIntoView();", scroll)
        NextStory = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'bf-coupon-page-navigation > ul > li:nth-child(4) > a')))
        match = driver.find_element(By.CSS_SELECTOR, ".name:nth-child(1)")

        matches = driver.find_elements(By.CSS_SELECTOR, "CSS_SELECTOR")

        Team1 = driver.find_elements(By.CSS_SELECTOR, ".name:nth-child(1)")
        for value in Team1:
            print(value.text)
            team1List.append(value.text)
            
        BackOdds = driver.find_elements(By.CSS_SELECTOR, ".coupon-runner:nth-child(1) .back-selection-button .bet-button-price")
        for value2 in BackOdds:
            print(value2.text)
            backOddsList.append(value2.text)

        Team2 = driver.find_elements(By.CSS_SELECTOR, ".name:nth-child(2)")
        for value1 in Team2:
            print(value1.text)
            team2List.append(value1.text)

        LayOdds = driver.find_elements(By.CSS_SELECTOR, ".coupon-runner:nth-child(1) .lay-selection-button .bet-button-price")
        for value3 in LayOdds:
            print(value3.text)
            layOddsList.append(value3.text)
        #bf-coupon-page-navigation > ul > li:nth-child(4) > a
        driver.find_elements(By.CSS_SELECTOR, "bf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']")

        WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.CSS_SELECTOR,  "bf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']")))

        NextStory.click()


        for match in matches:
            #print(match.text)
            driver.find_elements(By.CSS_SELECTOR, "bf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']")
            WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.CSS_SELECTOR, "bf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']")))
            NextStory.click()
    except:
        i = 6
        print('error')
        pass

#PANDA'S SCRAPING TO EXCEL
########################################################################################################################################################################
df = pd.DataFrame(
    {
     'Team1':team1List,
     'Back Odds':backOddsList,
     'Team2': team2List,
     'Lay Odds': layOddsList})

df.to_excel('output.xlsx', engine='openpyxl', sheet_name='Sheet_name_1', index=False)

Output:

print(df)
              Team1 Back Odds                  Team2 Lay Odds
0         FC Anyang       2.5        Daejeon Citizen     2.84
1   Seoul E-Land FC      2.82              Gyeongnam     3.25
2         MFk Snina      1.08              FC Kosice       25
3   Rimavska Sobota      1.01  DUKLA BANSKA BYSTRICA         
4        Nove Zamky      1.08                Samorin     12.5
..              ...       ...                    ...      ...
91          Denmark      1.47                Tunisia     1.54
92           Mexico      2.74                 Poland     3.25
93           France      1.22              Australia     1.28
94          Morocco       4.6                Croatia      5.6
95          Germany      1.36                  Japan     1.48

[96 rows x 4 columns]
  • Related