pyodbc query with dates variable (get data for last week 4hours)-CodePudding

I am quite new on Python and even newer on SQL... I am stuck on pyodbc query which:

should create dataframe with data for the last week 4.5 hours. (Example: last Monday= 07/03/2022 00:00:00, till this Monday = 14/03/2022 04:30:00.
main Table for query is tblLogs, it has LogDateTime column in format: 2022-03-11 20:29:53.000
tblLogs has lots of columns, so I pass the list of columns LogColumnForQuery also...

At the moment, I use the code below, but it collects data between two dates only... I was not able to figure out how to add few hours after Sunday's midnight. code example:

import pyodbc
import pandas as pd

cnx = pyodbc.connect('DRIVER='   driver   ';SERVER=tcp:'   server   ';PORT=1433;DATABASE='   database  
                     ';UID='   username   ';PWD='   password)
cursor = cnx.cursor()
# creating list of columns to select
LogColumns = ['LogID', 'LogDateTime', 'EmployeeID', 'EntryPointID', 'EventType', 'DeviceID']
LogColumnForQuery = ', '.join(LogColumns)

#main query
sql_query = pd.read_sql_query('SET DATEFIRST 1 SELECT '   LogColumnForQuery  
                              '''
                                FROM tblLogs
                                WHERE LogDateTime >= DATEADD(day, -(DATEPART(WEEKDAY, GETDATE())   6), CONVERT(DATE, GETDATE()))
                                AND LogDateTime <  DATEADD(day, 1 - DATEPART(WEEKDAY, GETDATE()), CONVERT(DATE, GETDATE())); ;
                                '''
                              , cnx)

df = pd.DataFrame(sql_query)
df.to_csv(r'C:\Test\Monday\LastWeekLogs_Data.csv', index=False)

I am trying to introduce the following, with no progress...

these dates and their formats are killing me ;) please advise

as per code belo, - most likely my query is not correct and the column list is not in the correct format....

import pyodbc
import pandas as pd
from datetime import datetime, timedelta

today = datetime.now()
lastMon = (today - timedelta(days = today.weekday(), weeks=1))
thisMon = today - timedelta(days = today.weekday())
lastMon = lastMon.replace(hour=00, minute=00, second=00, microsecond=00)
thisMon = thisMon.replace(hour=4, minute=30, second=00, microsecond=00)
lastMon = datetime.strftime(lastMon, '%d/%m/%Y %X')
thisMon = datetime.strftime(thisMon, '%d/%m/%Y %X')

cnx = pyodbc.connect('DRIVER='   driver   ';SERVER=tcp:'   server   ';PORT=1433;DATABASE='   database  
                     ';UID='   username   ';PWD='   password)
cursor = cnx.cursor()

# creating list of columns to select
LogColumns = ['LogID', 'LogDateTime', 'EmployeeID', 'EntryPointID', 'EventType', 'DeviceID']
LogColumnForQuery = ', '.join(LogColumns)

sql_query = pd.read_sql_query(f'SELECT {LogColumnForQuery} FROM tblLogs WHERE '
                              f'LogDateTime > ({lastMon}) AND LogDateTime < ({thisMon})'
                              , cnx)

df = pd.DataFrame(sql_query)
df.to_csv(r'C:\Test\Monday\LastWeekLogs_Data.csv', index=False)

CodePudding user response：

I would import numpy and use a delta time to get you what you need

end_date = datetime.datetime.now()
first_date = start_date - datetime.timedelta(days = 6)

df = pd.DataFrame(pd.date_range(first_date, end_date, freq = '1D'), columns = ['Date'])
df['Day'] = df['Date'].dt.day_name()
df['Date'] = df['Date'].apply(lambda x : datetime.datetime.strftime(x, '%d/%m/%Y %X'))
current_monday = df['Date'].loc[df['Day'] == 'Monday'].values[0]
current_monday = datetime.datetime.strptime(current_monday, '%d/%m/%Y %X')
minutes_to_lookback = ((7 * 24 * 60)   (4 * 60)   30)
last_monday = current_monday - datetime.timedelta(minutes = minutes_to_lookback)

current_monday = datetime.datetime.strftime(current_monday, '%d/%m/%Y %X')
last_monday = datetime.datetime.strftime(last_monday, '%d/%m/%Y %X')

cnx = pyodbc.connect('DRIVER='   driver   ';SERVER=tcp:'   server   ';PORT=1433;DATABASE='   database  
                     ';UID='   username   ';PWD='   password)
cursor = cnx.cursor()

# creating list of columns to select
LogColumns = ['LogID', 'LogDateTime', 'EmployeeID', 'EntryPointID', 'EventType', 'DeviceID']
LogColumnForQuery = "'"   "', '" .join(LogColumns)   "'" 

query = f"""
SELECT 
    {LogColumnForQuery} 
FROM tblLogs 
WHERE 1=1
    and LogDateTime between {current_monday} and {last_monday}
"""

sql_query = pd.read_sql_query(query, cnx)
sql_query.to_csv(r'C:\Test\Monday\LastWeekLogs_Data.csv', index=False)

CodePudding user response：

Using Python datetime and SQLAlchemy Core you can avoid string formatting/quoting and other inconveniences:

import datetime

import pandas as pd
import sqlalchemy as sa

engine = sa.create_engine("mssql pyodbc://scott:tiger^5HHH@mssql_199")

table_name = "tblLogs"

# set up test environment
with engine.begin() as conn:
    conn.exec_driver_sql(f"DROP TABLE IF EXISTS {table_name}")
    conn.exec_driver_sql(
        f"CREATE TABLE {table_name} (id int primary key, LogDateTime datetime2)"
    )

# test
tbl_logs = sa.Table(table_name, sa.MetaData(), autoload_with=engine)

start_of_today = datetime.datetime.combine(
    datetime.date.today(), datetime.datetime.min.time()
)
start_of_last_monday = start_of_today - datetime.timedelta(
    days=start_of_today.weekday()
)  # (same as today if today is a Monday)
start_time = start_of_last_monday - datetime.timedelta(days=7)
end_time = start_of_last_monday   datetime.timedelta(hours=4.5)

qry = sa.select(tbl_logs).where(
    tbl_logs.c.LogDateTime.between(start_time, end_time)
)

engine.echo = True
df = pd.read_sql_query(qry, engine)
""" SQL rendered:
SELECT [tblLogs].id, [tblLogs].[LogDateTime] 
FROM [tblLogs] 
WHERE [tblLogs].[LogDateTime] BETWEEN ? AND ?

[generated in 0.00080s] (datetime.datetime(2022, 3, 7, 0, 0), datetime.datetime(2022, 3, 14, 4, 30))
"""