Home > Back-end >  Insert group-final time measurement from last observation to set limit
Insert group-final time measurement from last observation to set limit

Time:05-08

This dataframe:

df <- structure(list(Subject = c("A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B"), 
               Timestamp = c("00:00:00.146 - 00:00:00.889", 
                             "00:00:00.146 - 00:00:00.889", "00:00:01.568 - 00:00:02.183", 
                             "00:00:01.568 - 00:00:02.183", "00:00:03.642 - 00:00:04.522", 
                             "00:00:03.642 - 00:00:04.522", "00:00:00.000 - 00:00:00.660", 
                             "00:00:01.247 - 00:00:02.229", "00:00:01.247 - 00:00:02.229", 
                             "00:00:03.697 - 00:00:04.926", "00:00:03.697 - 00:00:04.926"), 
               Starttime_ms = c(0, 146, 889, 1568, 2183, 3642, 0, 660, 1247, 2229, 3697), 
               Endtime_ms = c(146, 889, 1568, 2183, 3642, 4522,660, 1247, 2229, 3697, 4926), 
               Duration = c(146, 743, 679, 615, 1459, 880, 660, 587, 982, 1468, 1229), 
               Event = c("nf", "f", "nf", "f", "nf", "f", "f", "nf", "f", "nf", "f")), 
          row.names = c(NA, -11L), class = c("tbl_df", "tbl", "data.frame"))

contains time measurements of Subjects' ratings of an Event during a timespan from 0 ms to roughly but not exactly 5000 ms. What is missing is the Subject-final measurement up to the exact limit of 5000 ms. How can I insert a single row for that missing observation for each Subject? (The missing value for Event must be nf.)

Expected:

   Subject Timestamp                   Starttime_ms Endtime_ms Duration Event
   <chr>   <chr>                              <dbl>      <dbl>    <dbl> <chr>
 1 A       00:00:00.146 - 00:00:00.889            0        146      146 nf   
 2 A       00:00:00.146 - 00:00:00.889          146        889      743 f    
 3 A       00:00:01.568 - 00:00:02.183          889       1568      679 nf   
 4 A       00:00:01.568 - 00:00:02.183         1568       2183      615 f    
 5 A       00:00:03.642 - 00:00:04.522         2183       3642     1459 nf   
 6 A       00:00:03.642 - 00:00:04.522         3642       4522      880 f 
 7 A       00:00:04.522 - 00:00:05.000         4522       5000      478 nf  # <-- inserted
 8 B       00:00:00.000 - 00:00:00.660            0        660      660 f    
 9 B       00:00:01.247 - 00:00:02.229          660       1247      587 nf   
10 B       00:00:01.247 - 00:00:02.229         1247       2229      982 f    
11 B       00:00:03.697 - 00:00:04.926         2229       3697     1468 nf   
12 B       00:00:03.697 - 00:00:04.926         3697       4926     1229 f  
13 B       00:00:04.926 - 00:00:05.000         4926       5000       74 nf  # <-- inserted 

A dplyr solution is preferred.

EDIT:

Came up with this solution:

library(tidyr)
library(dplyr)
df %>%
  extract(Timestamp, into = c("start", "end"), regex = "(. ) - (. )") %>%
  group_by(Subject) %>%
  mutate(ID = row_number()) %>%
  filter(ID == max(ID)) %>%
  mutate(Duration = 5000 - Endtime_ms,
         Event = sub("f", "nf", Event),
         Starttime_ms = Endtime_ms,
         Endtime_ms = 5000,
         end = "00:00:05.000",
         start = format(as.POSIXct(Starttime_ms/1000, "UTC", origin = "1970-01-01"), "%H:%M:%OS3"),
         Timestamp = str_c(start, " - ", end)) %>%
  # deselect not needed cols:
  select(-c(start, end,ID)) %>%
  # join back to original df:
  bind_rows(.,df) %>%
  # re-order:
  arrange(Subject, Timestamp)

CodePudding user response:

Update:

library(dplyr)
library(tidyr)
library(lubridate)


df %>% 
  separate_rows(Timestamp, sep = " - ") %>% 
  mutate(Timestamp1 = lag(Timestamp, default = "00:00:00.000"), .before=Timestamp) %>% 
  group_by(Subject) %>% 
  slice(c(1:n(),n())) %>% 
  mutate(Timestamp = c(Timestamp[-n()], "00:00:05.000")) %>% 
  mutate(Timestamp1 = c(Timestamp1[-n()], nth(Timestamp, -2))) %>% 
  mutate(Starttime_ms = as.numeric(hms(Timestamp1))*1000,
         Endtime_ms = as.numeric(hms(Timestamp))*1000,
         Duration = Endtime_ms-Starttime_ms) %>% 
  mutate(Timestamp = paste(Timestamp1, Timestamp, sep = " - ")) %>% 
  left_join(df, by="Timestamp") %>% 
  mutate(Event.y = replace_na(Event.y, "nf")) %>% 
  select(1, 3:6, Event.y) %>% 
  filter(Endtime_ms.x !=0) %>% 
  rename_with(., ~str_replace(., '\\.\\w', ''))
     Subject Timestamp                   Starttime_ms Endtime_ms Duration Event
   <chr>   <chr>                              <dbl>      <dbl>    <dbl> <chr>
 1 A       00:00:00.000 - 00:00:00.146            0        146      146 nf   
 2 A       00:00:00.146 - 00:00:00.889          146        889      743 f    
 3 A       00:00:00.889 - 00:00:01.568          889       1568      679 nf   
 4 A       00:00:01.568 - 00:00:02.183         1568       2183      615 f    
 5 A       00:00:02.183 - 00:00:03.642         2183       3642     1459 nf   
 6 A       00:00:03.642 - 00:00:04.522         3642       4522      880 f    
 7 A       00:00:04.522 - 00:00:05.000         4522       5000      478 nf   
 8 B       00:00:00.000 - 00:00:00.660            0        660      660 f    
 9 B       00:00:00.660 - 00:00:01.247          660       1247      587 nf   
10 B       00:00:01.247 - 00:00:02.229         1247       2229      982 f    
11 B       00:00:02.229 - 00:00:03.697         2229       3697     1468 nf   
12 B       00:00:03.697 - 00:00:04.926         3697       4926     1229 f    
13 B       00:00:04.926 - 00:00:05.000         4926       5000       74 nf 
  • Related