I am quite new on Python and even newer on SQL... I am stuck on pyodbc query which:
- should create dataframe with data for the last week 4.5 hours. (Example: last Monday= 07/03/2022 00:00:00, till this Monday = 14/03/2022 04:30:00.
- main Table for query is
tblLogs
, it hasLogDateTime
column in format: 2022-03-11 20:29:53.000 tblLogs
has lots of columns, so I pass the list of columnsLogColumnForQuery
also...
At the moment, I use the code below, but it collects data between two dates only... I was not able to figure out how to add few hours after Sunday's midnight. code example:
import pyodbc
import pandas as pd
cnx = pyodbc.connect('DRIVER=' driver ';SERVER=tcp:' server ';PORT=1433;DATABASE=' database
';UID=' username ';PWD=' password)
cursor = cnx.cursor()
# creating list of columns to select
LogColumns = ['LogID', 'LogDateTime', 'EmployeeID', 'EntryPointID', 'EventType', 'DeviceID']
LogColumnForQuery = ', '.join(LogColumns)
#main query
sql_query = pd.read_sql_query('SET DATEFIRST 1 SELECT ' LogColumnForQuery
'''
FROM tblLogs
WHERE LogDateTime >= DATEADD(day, -(DATEPART(WEEKDAY, GETDATE()) 6), CONVERT(DATE, GETDATE()))
AND LogDateTime < DATEADD(day, 1 - DATEPART(WEEKDAY, GETDATE()), CONVERT(DATE, GETDATE())); ;
'''
, cnx)
df = pd.DataFrame(sql_query)
df.to_csv(r'C:\Test\Monday\LastWeekLogs_Data.csv', index=False)
I am trying to introduce the following, with no progress...
these dates and their formats are killing me ;) please advise
as per code belo, - most likely my query is not correct and the column list is not in the correct format....
import pyodbc
import pandas as pd
from datetime import datetime, timedelta
today = datetime.now()
lastMon = (today - timedelta(days = today.weekday(), weeks=1))
thisMon = today - timedelta(days = today.weekday())
lastMon = lastMon.replace(hour=00, minute=00, second=00, microsecond=00)
thisMon = thisMon.replace(hour=4, minute=30, second=00, microsecond=00)
lastMon = datetime.strftime(lastMon, '%d/%m/%Y %X')
thisMon = datetime.strftime(thisMon, '%d/%m/%Y %X')
cnx = pyodbc.connect('DRIVER=' driver ';SERVER=tcp:' server ';PORT=1433;DATABASE=' database
';UID=' username ';PWD=' password)
cursor = cnx.cursor()
# creating list of columns to select
LogColumns = ['LogID', 'LogDateTime', 'EmployeeID', 'EntryPointID', 'EventType', 'DeviceID']
LogColumnForQuery = ', '.join(LogColumns)
sql_query = pd.read_sql_query(f'SELECT {LogColumnForQuery} FROM tblLogs WHERE '
f'LogDateTime > ({lastMon}) AND LogDateTime < ({thisMon})'
, cnx)
df = pd.DataFrame(sql_query)
df.to_csv(r'C:\Test\Monday\LastWeekLogs_Data.csv', index=False)
CodePudding user response:
I would import numpy and use a delta time to get you what you need
end_date = datetime.datetime.now()
first_date = start_date - datetime.timedelta(days = 6)
df = pd.DataFrame(pd.date_range(first_date, end_date, freq = '1D'), columns = ['Date'])
df['Day'] = df['Date'].dt.day_name()
df['Date'] = df['Date'].apply(lambda x : datetime.datetime.strftime(x, '%d/%m/%Y %X'))
current_monday = df['Date'].loc[df['Day'] == 'Monday'].values[0]
current_monday = datetime.datetime.strptime(current_monday, '%d/%m/%Y %X')
minutes_to_lookback = ((7 * 24 * 60) (4 * 60) 30)
last_monday = current_monday - datetime.timedelta(minutes = minutes_to_lookback)
current_monday = datetime.datetime.strftime(current_monday, '%d/%m/%Y %X')
last_monday = datetime.datetime.strftime(last_monday, '%d/%m/%Y %X')
cnx = pyodbc.connect('DRIVER=' driver ';SERVER=tcp:' server ';PORT=1433;DATABASE=' database
';UID=' username ';PWD=' password)
cursor = cnx.cursor()
# creating list of columns to select
LogColumns = ['LogID', 'LogDateTime', 'EmployeeID', 'EntryPointID', 'EventType', 'DeviceID']
LogColumnForQuery = "'" "', '" .join(LogColumns) "'"
query = f"""
SELECT
{LogColumnForQuery}
FROM tblLogs
WHERE 1=1
and LogDateTime between {current_monday} and {last_monday}
"""
sql_query = pd.read_sql_query(query, cnx)
sql_query.to_csv(r'C:\Test\Monday\LastWeekLogs_Data.csv', index=False)
CodePudding user response:
Using Python datetime
and SQLAlchemy Core you can avoid string formatting/quoting and other inconveniences:
import datetime
import pandas as pd
import sqlalchemy as sa
engine = sa.create_engine("mssql pyodbc://scott:tiger^5HHH@mssql_199")
table_name = "tblLogs"
# set up test environment
with engine.begin() as conn:
conn.exec_driver_sql(f"DROP TABLE IF EXISTS {table_name}")
conn.exec_driver_sql(
f"CREATE TABLE {table_name} (id int primary key, LogDateTime datetime2)"
)
# test
tbl_logs = sa.Table(table_name, sa.MetaData(), autoload_with=engine)
start_of_today = datetime.datetime.combine(
datetime.date.today(), datetime.datetime.min.time()
)
start_of_last_monday = start_of_today - datetime.timedelta(
days=start_of_today.weekday()
) # (same as today if today is a Monday)
start_time = start_of_last_monday - datetime.timedelta(days=7)
end_time = start_of_last_monday datetime.timedelta(hours=4.5)
qry = sa.select(tbl_logs).where(
tbl_logs.c.LogDateTime.between(start_time, end_time)
)
engine.echo = True
df = pd.read_sql_query(qry, engine)
""" SQL rendered:
SELECT [tblLogs].id, [tblLogs].[LogDateTime]
FROM [tblLogs]
WHERE [tblLogs].[LogDateTime] BETWEEN ? AND ?
[generated in 0.00080s] (datetime.datetime(2022, 3, 7, 0, 0), datetime.datetime(2022, 3, 14, 4, 30))
"""