I have the following dataframe and can calculate the splits between the Start
and End
timestamp. It doesn't work for periods longer than 1 day and i can't concact my df properly:
import pandas as pd
import datetime as dt
df = pd.DataFrame({'Start':['2022-06-07 06:24:48','2022-06-07 14:37:16','2022-06-07 08:00:59'],
'End':['2022-06-07 14:07:00','2022-06-08 02:51:21','2022-06-09 13:18:34'],
'Process':['PROD','VORG','STO'],
'Duration_Min':[462.20,734.08,3197.58]})
df['Start'] = pd.to_datetime(df['Start'])
df['End'] = pd.to_datetime(df['End'])
#Calculate the difference in days
df['difference']=df['End'].dt.date-df['Start'].dt.date
splits = df[df.End.dt.date > df.Start.dt.date].copy()
print(pd.concat([
df,
pd.DataFrame({
'Start': list(splits.Start) list(splits.End.dt.floor(freq='1D')),
'End': list(splits.Start.dt.ceil(freq='1D')) list(splits.End)})
]))
What I get:
Start End Process Duration_Min difference
0 2022-06-07 06:24:48 2022-06-07 14:07:00 PROD 462.20 0 days
1 2022-06-07 14:37:16 2022-06-08 02:51:21 VORG 734.08 1 days
2 2022-06-07 08:00:59 2022-06-09 13:18:34 STO 3197.58 2 days
0 2022-06-07 14:37:16 2022-06-08 00:00:00 NaN NaN NaT
1 2022-06-07 08:00:59 2022-06-08 00:00:00 NaN NaN NaT
2 2022-06-08 00:00:00 2022-06-08 02:51:21 NaN NaN NaT
3 2022-06-09 00:00:00 2022-06-09 13:18:34 NaN NaN NaT
I would like to cut the events so that new timestamps with new intervals are created when the day changes. Days
should be corosponding with weekday()
What I want:
Start End Process Duration_Min Days
0 2022-06-07 06:24:48 2022-06-07 14:07:00 PROD 462.200000 1
1 2022-06-07 14:37:16 2022-06-07 23:59:59 VORG 562.716667 1
2 2022-06-08 00:00:00 2022-06-08 02:51:21 VORG 171.350000 2
3 2022-06-07 08:00:59 2022-06-07 23:59:59 STO 959.000000 1
4 2022-06-08 00:00:00 2022-06-08 23:59:59 STO 1439.983333 2
5 2022-06-09 00:00:00 2022-06-09 13:18:34 STO 798.566667 3
CodePudding user response:
You can try this:
import pandas as pd
df = pd.DataFrame({'Start':['2022-06-07 06:24:48','2022-06-07 14:37:16','2022-06-07 08:00:59'],
'End':['2022-06-07 14:07:00','2022-06-08 02:51:21','2022-06-09 13:18:34'],
'Process':['PROD','VORG','STO'],
'Duration_Min':[462.20,734.08,3197.58]})
df['Start'] = pd.to_datetime(df['Start'])
df['End'] = pd.to_datetime(df['End'])
print(df)
Start End Process Duration_Min
0 2022-06-07 06:24:48 2022-06-07 14:07:00 PROD 462.20
1 2022-06-07 14:37:16 2022-06-08 02:51:21 VORG 734.08
2 2022-06-07 08:00:59 2022-06-09 13:18:34 STO 3197.58
def split_dates(row):
days = row.End.date() - row.Start.date()
if days.days == 0:
return
dtr = pd.date_range(start=row.Start, end=row.End, freq='D', normalize=True)
periods = []
for i, t in enumerate(dtr):
if i == 0:
periods.append([t pd.to_timedelta(str(row.Start.time()))])
periods[i].append(t pd.to_timedelta('23:59:59'))
periods[i].append(row.Process)
periods[i].append(round((periods[i][1] - periods[i][0]).seconds / 60, 2))
continue
if i == len(dtr) - 1:
periods.append([t])
periods[i].append(t pd.to_timedelta(str(row.End.time())))
periods[i].append(row.Process)
periods[i].append(round((periods[i][1] - periods[i][0]).seconds / 60, 2))
continue
periods.append([t])
periods[i].append(t pd.to_timedelta('23:59:59'))
periods[i].append(row.Process)
periods[i].append(round((periods[i][1] - periods[i][0]).seconds / 60, 2))
result = {'Start' : [], 'End' : [], 'Process' : [], 'Duration_Min' : []}
for p in periods:
result['Start'].append(p[0].strftime('%F %T'))
result['End'].append(p[1].strftime('%F %T'))
result['Process'].append(p[2])
result['Duration_Min'].append(p[3])
return result
res = pd.DataFrame()
for i, d in df.apply(split_dates, axis=1).items():
if d:
res = pd.concat([res, pd.DataFrame(d)])
df = df.drop(index=i)
df = pd.concat([df, res], ignore_index=True)
df.Start = pd.to_datetime(df.Start)
df.End = pd.to_datetime(df.End)
df.sort_values(by=['Start', 'End'], inplace=True, ignore_index=True)
df['Days'] = df.Start.dt.weekday
print(df)
Start End Process Duration_Min Days
0 2022-06-07 06:24:48 2022-06-07 14:07:00 PROD 462.20 1
1 2022-06-07 08:00:59 2022-06-07 23:59:59 STO 959.00 1
2 2022-06-07 14:37:16 2022-06-07 23:59:59 VORG 562.72 1
3 2022-06-08 00:00:00 2022-06-08 02:51:21 VORG 171.35 2
4 2022-06-08 00:00:00 2022-06-08 23:59:59 STO 1439.98 2
5 2022-06-09 00:00:00 2022-06-09 13:18:34 STO 798.57 3