I am trying to upload data from scraping into a google sheet using pandas, but I get the following error:
worksheet.update([df.columns.values.tolist()] df.values.tolist())
AttributeError: 'list' object has no attribute 'columns'
Here is my code
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
from google.oauth2.service_account import Credentials
import gspread
scope = ['https://www.googleapis.com/auth/spreadsheets']
creds = Credentials.from_service_account_file("123.json", scopes=scope)
gs = gspread.authorize(creds)
sheet = gs.open_by_url('https://docs.google.com/spreadsheets/d/14jRVDlF1QY-vo0An2T8_aj-8TAgcIgGD3mrbDAjk9_E/edit#gid=0')
worksheet = sheet.get_worksheet(0)
PATH ="C:/Users/XXX/Desktop/chromedriver.exe"
driver = webdriver.Chrome( PATH )
driver.get("https://fixturedownload.com/results/epl-2021")
Select(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//select[@name='timezone']")))).select_by_value("SE Asia Standard Time" )
driver.find_element(By.XPATH, "//input[@value='Set Timezone']").click()
data = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "/html/body/div[2]/div/table"))).get_attribute("outerHTML")
df = pd.read_html(data)
worksheet.update([df.columns.values.tolist()] df.values.tolist())
CodePudding user response:
According to pandas documentation: https://pandas.pydata.org/docs/reference/api/pandas.read_html.html
The method read_html
returns a list of dataframes, that is why you are getting that error. So you'll need to change this on your code:
df = pd.read_html(data)
worksheet.update([df[0].columns.values.tolist()] df[0].values.tolist())
This is assuming that you only want the first data frame from the list, otherwise you will need to loop over the list of dataframes:
dfs = pd.read_html(data)
for df in dfs:
worksheet.update([df.columns.values.tolist()] df.values.tolist())