I have the foloowing dataset:
- user_id: name of user - eg: 'user1'
- start: when user started sleeping - eg: '2022-09-24 22:00:00 02:00'
- stop: when user stopped sleeping - eg: '2022-09-25 04:30:00 02:00'
Desired output:
I would like to plot a bar/line chart with:
- x axis: time of clock (0 to 24 hours)
- y axis: user_id
With each bar showing when user started and stopped sleeping.
Current results:
I am able to plot the bar chart with x axis "minutes from midnight". Here is the code:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
from datetime import timedelta
import datetime
df = pd.DataFrame({'user_id': ['user1', 'user2', 'user3', 'user4'],
'start': ['2022-09-24 22:00:00 02:00','2022-09-04 23:30:00 02:00', '2022-11-11 02:20:30 02:00', '2022-11-12 03:20:30 02:00'],
'stop': ['2022-09-25 04:30:00 02:00','2022-09-05 07:15:30 02:00', '2022-11-11 11:20:30 02:00', '2022-11-12 10:40:30 02:00']})
# change to datetime
datetime_columns = ['start', 'stop']
df[datetime_columns]= df[datetime_columns].apply(pd.to_datetime, errors='coerce')
def get_time_from_midnight (datetime_series):
# if above 12 you get next day, if below you get same day
conditions_midnight = [
datetime_series.dt.hour > 12,
datetime_series.dt.hour <= 12]
choiches_midnight = [(datetime_series - (datetime_series.dt.normalize() datetime.timedelta(days=1))) / pd.Timedelta(minutes=1),
(datetime_series - (datetime_series.dt.normalize())) / pd.Timedelta(minutes=1)]
time_from_midnight = np.select(conditions_midnight, choiches_midnight, default=np.nan)
return time_from_midnight
df['start_minutes_from_midnight'] = get_time_from_midnight(df['start'])
df['time_in_bed'] = (df['stop'] -df['start'])/ timedelta(minutes=1)
fig = go.Figure()
fig.add_bar(y=df['user_id'],
x=df['time_in_bed'],
base=df['start_minutes_from_midnight'],
orientation='h')
fig.update_layout(title_text='Current result: sleep time each user', xaxis_title="minutes_from_midnight")
fig.show()
However I am not able to have on the X-axis the "clock time"
EDIT current progress I have found a possible solution with px.timeline, howver it doens't work when:
- start before midight (eg 22:00)
- end after midnight (eg: 4:30) where in this case it revereses the order (it starts at 4:30 and ends at 22)
The ideal solution would be centred at midnight.
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.express as px
df = pd.DataFrame({'Task': ['user1', 'user2', 'user3', 'user4'],
'Start': ['2022-09-24 22:00:00 02:00','2022-09-04 23:30:00 02:00', '2022-11-11 02:20:30 02:00', '2022-11-12 03:20:30 02:00'],
'Finish': ['2022-09-25 04:30:00 02:00','2022-09-05 07:15:30 02:00', '2022-11-11 11:20:30 02:00', '2022-11-12 10:40:30 02:00']})
datetime_columns = ['Start', 'Finish']
#remove Year month day so it plots all within 24hr period
df[datetime_columns]= df[datetime_columns].apply(pd.to_datetime, errors='coerce')
df['Start'] = df['Start'].dt.strftime('%H:%M:%S')
df['Finish'] = df['Finish'].dt.strftime('%H:%M:%S')
df[datetime_columns]= df[datetime_columns].apply(pd.to_datetime, errors='coerce')
fig = px.timeline(df, x_start="Start", x_end="Finish", y="Task")
fig.update_yaxes(autorange="reversed") # otherwise tasks are listed from the bottom up
fig.update_xaxes(
tickformat="%H:%M")
fig.update_layout(height=400, width=800)
fig.show()
CodePudding user response:
I think what you're looking for is a gantt chart. See the following example:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
from datetime import timedelta
from datetime import datetime
import plotly.figure_factory as ff
from dateutil.parser import parse
df = pd.DataFrame({'Task': ['user1', 'user2', 'user3', 'user4'],
'Start': ['2022-09-24 22:00:00 02:00', '2022-09-24 23:30:00 02:00', '2022-09-25 02:20:30 02:00', '2022-09-25 03:20:30 02:00'],
'Finish': ['2022-09-25 04:30:00 02:00', '2022-09-25 07:15:30 02:00', '2022-09-25 11:20:30 02:00', '2022-09-25 10:40:30 02:00']})
datetime_columns = ['Start', 'Finish']
df[datetime_columns] = df[datetime_columns].apply(
pd.to_datetime, errors='coerce')
fig = ff.create_gantt(df,
showgrid_x=True, showgrid_y=False,
show_colorbar=True, bar_width=0.025)
fig.update_layout(height=400,
width=800,
xaxis_range=[parse('2022-09-24 12:00:00 02:00'), parse('2022-09-25 12:00:00 02:00')])
fig.update_xaxes(
tickformat="%H:%M",
autorange=False)
fig.show()
Notice that I 1) removed the day and month information from your dataframe, and 2) changed the tickformat
information to show only hours and minutes (otherwise it would put today's date as a default).