Now updated with the code for the data frame - Thanks for the tip!
structure(list(id = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L,
6L, 6L), date = c("2022-03-03", "2022-03-03", "2022-03-03", "2022-03-04",
"2022-03-04", "2022-03-05", "2022-03-05", "2022-03-04", "2022-03-04",
"2022-03-05", "2022-03-05", "2022-03-05"), time = c("08:19:40",
"11:05:31", "20:49:38", "08:28:11", "11:02:02", "08:42:56", "11:11:25",
"08:16:48", "10:59:36", "08:16:35", "11:12:01", "13:23:31")), class = "data.frame", row.names = c(NA,
-12L))
I have the following data set from an EMA study.
id date time
1 2022-03-03 08:19:40
1 2022-03-03 11:05:31
1 2022-03-03 20:49:38
1 2022-03-04 08:28:11
1 2022-03-04 11:02:02
1 2022-03-05 08:42:56
1 2022-03-05 11:11:25
2 2022-03-04 08:16:48
2 2022-03-04 10:59:36
2 2022-03-05 08:16:35
2 2022-03-05 11:12:01
2 2022-03-05 13:23:31
I would like to create a variable that counts the nth day for each id (participant) "day", as well as the nth occasion within a day "beep". Like the following output:
id date time day beep
1 2022-03-03 08:19:40 1 1
1 2022-03-03 11:05:31 1 2
1 2022-03-03 20:49:38 1 3
1 2022-03-04 08:28:11 2 1
1 2022-03-04 11:02:02 2 2
1 2022-03-05 08:42:56 3 1
1 2022-03-05 11:11:25 3 2
2 2022-03-04 08:16:48 1 1
2 2022-03-04 10:59:36 1 2
2 2022-03-05 08:16:35 2 1
2 2022-03-05 11:12:01 2 2
2 2022-03-05 13:23:31 2 3
CodePudding user response:
You can use dense_rank()
to rank date
and time
in each group with different depths.
library(dplyr)
df %>%
group_by(id) %>%
mutate(day = dense_rank(date)) %>%
group_by(day, .add = TRUE) %>%
mutate(beep = dense_rank(time)) %>%
ungroup()
# A tibble: 12 × 5
id date time day beep
<int> <chr> <chr> <int> <int>
1 1 2022-03-03 08:19:40 1 1
2 1 2022-03-03 11:05:31 1 2
3 1 2022-03-03 20:49:38 1 3
4 1 2022-03-04 08:28:11 2 1
5 1 2022-03-04 11:02:02 2 2
6 1 2022-03-05 08:42:56 3 1
7 1 2022-03-05 11:11:25 3 2
8 2 2022-03-04 08:16:48 1 1
9 2 2022-03-04 10:59:36 1 2
10 2 2022-03-05 08:16:35 2 1
11 2 2022-03-05 11:12:01 2 2
12 2 2022-03-05 13:23:31 2 3
Data
df <- structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L), date = c("2022-03-03", "2022-03-03", "2022-03-03", "2022-03-04",
"2022-03-04", "2022-03-05", "2022-03-05", "2022-03-04", "2022-03-04",
"2022-03-05", "2022-03-05", "2022-03-05"), time = c("08:19:40",
"11:05:31", "20:49:38", "08:28:11", "11:02:02", "08:42:56", "11:11:25",
"08:16:48", "10:59:36", "08:16:35", "11:12:01", "13:23:31")), class = "data.frame", row.names = c(NA, -12L))
CodePudding user response:
This should help, for more you can checkout the function row_number()
here
library(tidyverse)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
df = "id date time
1 2022-03-03 08:19:40
1 2022-03-03 11:05:31
1 2022-03-03 20:49:38
1 2022-03-04 08:28:11
1 2022-03-04 11:02:02
1 2022-03-05 08:42:56
1 2022-03-05 11:11:25
2 2022-03-04 08:16:48
2 2022-03-04 10:59:36
2 2022-03-05 08:16:35
2 2022-03-05 11:12:01
2 2022-03-05 13:23:31"
df2 = read.table(text=df, header=T)
df2$date = ymd(df2$date)
df2 %>% group_by(date) %>% mutate(beep=row_number())
#> # A tibble: 12 × 4
#> # Groups: date [3]
#> id date time beep
#> <int> <date> <chr> <int>
#> 1 1 2022-03-03 08:19:40 1
#> 2 1 2022-03-03 11:05:31 2
#> 3 1 2022-03-03 20:49:38 3
#> 4 1 2022-03-04 08:28:11 1
#> 5 1 2022-03-04 11:02:02 2
#> 6 1 2022-03-05 08:42:56 1
#> 7 1 2022-03-05 11:11:25 2
#> 8 2 2022-03-04 08:16:48 3
#> 9 2 2022-03-04 10:59:36 4
#> 10 2 2022-03-05 08:16:35 3
#> 11 2 2022-03-05 11:12:01 4
#> 12 2 2022-03-05 13:23:31 5
Created on 2022-05-13 by the reprex package (v2.0.1)