Counter for occasion within a day based on date/time by ID-CodePudding

Now updated with the code for the data frame - Thanks for the tip!

structure(list(id = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 
6L, 6L), date = c("2022-03-03", "2022-03-03", "2022-03-03", "2022-03-04", 
"2022-03-04", "2022-03-05", "2022-03-05", "2022-03-04", "2022-03-04", 
"2022-03-05", "2022-03-05", "2022-03-05"), time = c("08:19:40", 
"11:05:31", "20:49:38", "08:28:11", "11:02:02", "08:42:56", "11:11:25", 
"08:16:48", "10:59:36", "08:16:35", "11:12:01", "13:23:31")), class = "data.frame", row.names = c(NA, 
-12L))

I have the following data set from an EMA study.

id  date            time
1   2022-03-03  08:19:40
1   2022-03-03  11:05:31
1   2022-03-03  20:49:38
1   2022-03-04  08:28:11
1   2022-03-04  11:02:02
1   2022-03-05  08:42:56
1   2022-03-05  11:11:25
2   2022-03-04  08:16:48
2   2022-03-04  10:59:36
2   2022-03-05  08:16:35
2   2022-03-05  11:12:01
2   2022-03-05  13:23:31

I would like to create a variable that counts the nth day for each id (participant) "day", as well as the nth occasion within a day "beep". Like the following output:

id  date            time      day beep
1   2022-03-03  08:19:40      1   1
1   2022-03-03  11:05:31      1   2
1   2022-03-03  20:49:38      1   3
1   2022-03-04  08:28:11      2   1
1   2022-03-04  11:02:02      2   2
1   2022-03-05  08:42:56      3   1
1   2022-03-05  11:11:25      3   2
2   2022-03-04  08:16:48      1   1
2   2022-03-04  10:59:36      1   2
2   2022-03-05  08:16:35      2   1
2   2022-03-05  11:12:01      2   2
2   2022-03-05  13:23:31      2   3

CodePudding user response：

You can use dense_rank() to rank date and time in each group with different depths.

library(dplyr)

df %>%
  group_by(id) %>%
  mutate(day = dense_rank(date)) %>%
  group_by(day, .add = TRUE) %>%
  mutate(beep = dense_rank(time)) %>%
  ungroup()

# A tibble: 12 × 5
      id date       time       day  beep
   <int> <chr>      <chr>    <int> <int>
 1     1 2022-03-03 08:19:40     1     1
 2     1 2022-03-03 11:05:31     1     2
 3     1 2022-03-03 20:49:38     1     3
 4     1 2022-03-04 08:28:11     2     1
 5     1 2022-03-04 11:02:02     2     2
 6     1 2022-03-05 08:42:56     3     1
 7     1 2022-03-05 11:11:25     3     2
 8     2 2022-03-04 08:16:48     1     1
 9     2 2022-03-04 10:59:36     1     2
10     2 2022-03-05 08:16:35     2     1
11     2 2022-03-05 11:12:01     2     2
12     2 2022-03-05 13:23:31     2     3

Data

df <- structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L), date = c("2022-03-03", "2022-03-03", "2022-03-03", "2022-03-04", 
"2022-03-04", "2022-03-05", "2022-03-05", "2022-03-04", "2022-03-04", 
"2022-03-05", "2022-03-05", "2022-03-05"), time = c("08:19:40", 
"11:05:31", "20:49:38", "08:28:11", "11:02:02", "08:42:56", "11:11:25", 
"08:16:48", "10:59:36", "08:16:35", "11:12:01", "13:23:31")), class = "data.frame", row.names = c(NA, -12L))

CodePudding user response：

This should help, for more you can checkout the function row_number() here

library(tidyverse)
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union

df = "id  date            time
1   2022-03-03  08:19:40
1   2022-03-03  11:05:31
1   2022-03-03  20:49:38
1   2022-03-04  08:28:11
1   2022-03-04  11:02:02
1   2022-03-05  08:42:56
1   2022-03-05  11:11:25
2   2022-03-04  08:16:48
2   2022-03-04  10:59:36
2   2022-03-05  08:16:35
2   2022-03-05  11:12:01
2   2022-03-05  13:23:31"

df2 = read.table(text=df, header=T)
df2$date = ymd(df2$date)
df2 %>% group_by(date) %>% mutate(beep=row_number()) 
#> # A tibble: 12 × 4
#> # Groups:   date [3]
#>       id date       time      beep
#>    <int> <date>     <chr>    <int>
#>  1     1 2022-03-03 08:19:40     1
#>  2     1 2022-03-03 11:05:31     2
#>  3     1 2022-03-03 20:49:38     3
#>  4     1 2022-03-04 08:28:11     1
#>  5     1 2022-03-04 11:02:02     2
#>  6     1 2022-03-05 08:42:56     1
#>  7     1 2022-03-05 11:11:25     2
#>  8     2 2022-03-04 08:16:48     3
#>  9     2 2022-03-04 10:59:36     4
#> 10     2 2022-03-05 08:16:35     3
#> 11     2 2022-03-05 11:12:01     4
#> 12     2 2022-03-05 13:23:31     5

^{Created on 2022-05-13 by the reprex package (v2.0.1)}