I have a timeseries dataset like so:
Time_TS Step
17:54:41 0
17:54:43 0
17:54:45 0
17:54:46 1
17:54:47 1
17:54:48 1
17:54:49 1
17:54:50 0
17:54:51 0
17:54:54 0
17:54:55 0
17:54:56 1
17:54:57 1
17:54:59 1
17:55:01 1
17:55:03 0
17:55:08 0
17:55:10 0
17:55:14 1
17:55:15 1
17:55:20 0
17:55:21 0
17:55:23 0
17:55:24 0
17:55:26 0
17:55:27 0
17:55:30 0
17:55:31 0
17:55:35 1
17:55:37 1
17:55:41 1
17:55:43 1
17:55:45 1
17:55:48 0
17:55:53 0
17:55:56 0
17:55:59 0
17:56:02 0
I would like to calculate the duration within each group where the Step=1 and then find the max duration within those grouped duration.
If I were to use typical first principles I would iteratively identify when the step
changed from 0
to 1
and 1
to 0
, store the timestamps and then subtract them.
But this doesn't seem elegant at all, and I was wondering if there was an easier way to do it in R.
CodePudding user response:
This can be done using dplyr
library(lubridate)
library(dplyr)
time <- c(
"17:54:41",
"17:54:43",
"17:54:45",
"17:54:46",
"17:54:47",
"17:54:48",
"17:54:49",
"17:54:50",
"17:54:51",
"17:54:54",
"17:54:55")
stage <- c(1,1,1,0,0,0,1,1,0,0,0)
df <- data.frame(time, stage)
newstage <- c(-1, diff(stage))!=0
df$stageid <- cumsum(newstage)
df$hms <- strptime(df$time, format = "%H:%M:%S")
time stage stageid hms
1 17:54:41 1 1 2022-07-13 17:54:41
2 17:54:43 1 1 2022-07-13 17:54:43
3 17:54:45 1 1 2022-07-13 17:54:45
4 17:54:46 0 2 2022-07-13 17:54:46
5 17:54:47 0 2 2022-07-13 17:54:47
6 17:54:48 0 2 2022-07-13 17:54:48
7 17:54:49 1 3 2022-07-13 17:54:49
8 17:54:50 1 3 2022-07-13 17:54:50
9 17:54:51 0 4 2022-07-13 17:54:51
10 17:54:54 0 4 2022-07-13 17:54:54
11 17:54:55 0 4 2022-07-13 17:54:55
I assigned an unique stage id to all data, then calculate the duration for each stage ID.
mysummary <- df %>% filter(stage==1) %>%
group_by(stageid) %>%
summarise(duration=difftime(max(df$hms),min(hms), units="secs"))
maxstage <- mysummary[which.max(mysummary$duration),]
# to go back to the original data just use the stage id
result <- df %>% filter(stageid == maxstage$stageid)
time stage stageid hms
1 17:54:41 1 1 2022-07-13 17:54:41
2 17:54:43 1 1 2022-07-13 17:54:43
3 17:54:45 1 1 2022-07-13 17:54:45
CodePudding user response:
library(dplyr)
df %>%
filter(Step != lag(Step, default = Inf)) %>%
mutate(duration = lead(Time_TS) - Time_TS) %>%
filter(Step == 1) %>%
filter(duration == max(duration))
This identifies the start of the occasion where Step == 1 and the longest duration happens before Step changes to 0.
Time_TS Step duration
1 17:55:35 1 13 secs
df <- data.frame(
Time_TS = c("17:54:41","17:54:43","17:54:45",
"17:54:46","17:54:47","17:54:48","17:54:49","17:54:50","17:54:51",
"17:54:54","17:54:55","17:54:56","17:54:57","17:54:59",
"17:55:01","17:55:03","17:55:08","17:55:10","17:55:14",
"17:55:15","17:55:20","17:55:21","17:55:23","17:55:24",
"17:55:26","17:55:27","17:55:30","17:55:31","17:55:35","17:55:37",
"17:55:41","17:55:43","17:55:45","17:55:48","17:55:53",
"17:55:56","17:55:59","17:56:02"),
Step = c(0L,0L,0L,1L,1L,1L,1L,0L,0L,0L,0L,
1L,1L,1L,1L,0L,0L,0L,1L,1L,0L,0L,0L,0L,0L,0L,0L,
0L,1L,1L,1L,1L,1L,0L,0L,0L,0L,0L)
) %>%
mutate(Time_TS = hms::as_hms(Time_TS))