Been playing with this for 14 hours (I am a beginner)
Data is pulled from one database table to search on yahoo for all the data on that ticker and then its "meant" to upload it.
I orginally had it as panda df but got "ambiguous error" so I have now put it as [] again. New error. I rack my brains :( However, it does work if I leave it blank.
from __future__ import print_function
import yfinance as yf
import pandas as pd
import datetime
import warnings
import MySQLdb as mdb
import requests
import numpy as np
import MySQLdb as mdb
import requests
# Obtain a database connection to the MySQL instance
con = mdb.connect("localhost","sec_user","","securities_master")
def obtain_list_of_db_tickers():
"""
Obtains a list of the ticker symbols in the database.
"""
with con:
cur = con.cursor()
cur.execute("SELECT id, ticker FROM symbol")
data = cur.fetchall()
print(data)
return [(d[0], d[1]) for d in data]
def get_daily_historic_data_yahoo(ticker):
blow = yf.download(ticker)
data = []
data.append(yf.download(ticker).reset_index())
return data
def insert_daily_data_into_db(data_vendor_id, symbol_id, daily_data):
'''
Takes a list of tuples of daily data and adds it to the MySQL database.
Appends the vendor ID and symbol ID to the data.
daily_data: List of tuples of the OHLC data (with adj_close and volume)
'''
# Create the time now
now = datetime.datetime.utcnow()
df = pd.DataFrame(data=daily_data[0])
df.insert(0, 'data_vendor_id', data_vendor_id)
df.insert(1, 'symbol_id', symbol_id)
df.insert(3, 'created_date', now)
df.insert(4, 'last_updated_date', now)
daily_data = []
daily_data.append(df)
#df = daily_data
# Amend the data to include the vendor ID and symbol ID
# Connect to the MySQL instance
db_host = 'localhost'
db_user = ''
db_pass = ''
db_name = 'securities_master'
con = mdb.connect("localhost", "sec_user", "", "securities_master"
# host=db_host, user=db_user, passwd=db_pass, db=db_name
)
try:
mdb.connect
# If connection is not successful
except:
print("Can't connect to database")
return 0
# If Connection Is Successful
print("Connected")
final_str = """INSERT INTO daily_price (data_vendor_id, symbol_id, price_date, created_date,
last_updated_date, open_price, high_price, low_price, close_price, volume, adj_close_price) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""
with con:
cur = con.cursor()
cur.executemany(final_str, daily_data)
con.commit()
if __name__ == "__main__":
# This ignores the warnings regarding Data Truncation
# from the Yahoo precision to Decimal(19,4) datatypes
warnings.filterwarnings('ignore')
# Loop over the tickers and insert the daily historical
# data into the database
tickers = obtain_list_of_db_tickers()
lentickers = len(tickers)
for i, t in enumerate(tickers):
print(
"Adding data for %s: %s out of %s" %
(t[1], i 1, lentickers)
)
yf_data = get_daily_historic_data_yahoo(t[1])
insert_daily_data_into_db('1', t[0], yf_data)
print("Successfully added Yahoo Finance pricing data to DB.")
Errors
Traceback (most recent call last):
File "/home/quant/price_retrieval.py", line 106, in <module>
insert_daily_data_into_db('1', t[0], yf_data)
File "/home/quant/price_retrieval.py", line 88, in insert_daily_data_into_db
cur.executemany(final_str, daily_data)
File "/home/quant/.local/lib/python3.8/site-packages/MySQLdb/cursors.py", line 230, in executemany
return self._do_execute_many(
File "/home/quant/.local/lib/python3.8/site-packages/MySQLdb/cursors.py", line 255, in _do_execute_many
v = values % escape(next(args), conn)
TypeError: not enough arguments for format string
CodePudding user response:
I'm no data scientist so there's probably a more elegant way to fix it directly with pandas. But the way I usually work with MySQL (and really any SQL drivers) is to give it lists of python tuples.
If you parse each row of the pandas data frame with for row in df.itertuples():
and craft each tuple carefully - making sure the types match the SQL table, all should work ;)
Example:
def insert_daily_data_into_db(data_vendor_id, symbol_id, daily_data):
'''
Takes a list of tuples of daily data and adds it to the MySQL database.
Appends the vendor ID and symbol ID to the data.
daily_data: List of tuples of the OHLC data (with adj_close and volume)
'''
# Create the time now
now = datetime.datetime.utcnow()
df = pd.DataFrame(data=daily_data[0])
daily_data = []
created_date = now
last_updated_date = now
for row in df.itertuples():
_index = row[0] # discard
date = row[1]
open = row[2]
high = row[3]
low = row[4]
close = row[5]
adj_close_price = row[6]
volume = row[7]
daily_data.append((int(data_vendor_id), symbol_id, date, created_date, last_updated_date, open, high, low, close, volume, adj_close_price))
# Connect to the MySQL instance
con = mdb.connect(host="localhost", user="user", password="yourpassword",
db="yourdbname", port=3306)
final_str = """
INSERT INTO daily_price (data_vendor_id, symbol_id, price_date, created_date,
last_updated_date, open_price, high_price, low_price, close_price, volume, adj_close_price)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
with con:
cur = con.cursor()
cur.executemany(final_str, daily_data)
con.commit()
I've tried not to tamper with your existing code too much. Just enough to make it work.
I think what was happening there for you was that you're technically passing it a list of pandas dataframes with only a single pandas dataframe in the list. Instead what you want is a list of tuples with 11 fields to unpack per tuple.
Maybe you mean to pass the dataframe directly i.e. not contained inside of a list but I still don't think that would be right because 1) there's an "Index" column in the dataframe which would give erroneous results 2) you'd need to call some methods on the dataframe to retrieve only the values (not the headers to the columns) and transform it to the correct list of tuples. It's probably very doable but I will leave that to you to find out.
I am also assuming your table schema is something like this:
CREATE TABLE IF NOT EXISTS daily_price (
data_vendor_id INT,
symbol_id INT,
price_date DATETIME,
created_date DATETIME,
last_updated_date TIMESTAMP,
open_price VARCHAR(256),
high_price VARCHAR(256),
low_price VARCHAR(256),
close_price VARCHAR(256),
volume INT,
adj_close_price VARCHAR(256)
);