Here's my dataset:
df = data.frame(id = c(1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2),
treatment = c(0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0),
date = lubridate::ymd(c("2019-07-07", "2019-07-07", "2019-07-07", "2019-07-07",
"2019-07-07", "2019-07-06", "2019-07-06", "2019-07-05",
"2019-07-05", "2019-04-20", "2019-04-20", "2019-04-20",
"2019-04-20", "2019-04-19", "2019-04-19", "2019-03-14",
"2019-03-14", "2019-03-14", "2019-03-14", "2019-03-14")))
I need to create a variale which reflects date after treatment for each id. Like this:
df = data.frame(id = c(1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2),
treatment = c(0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0),
date = lubridate::ymd(c("2019-07-07", "2019-07-07", "2019-07-07", "2019-07-07", "2019-07-07", "2019-07-06", "2019-07-06", "2019-07-05",
"2019-07-05", "2019-04-20", "2019-04-20", "2019-04-20", "2019-04-20", "2019-04-19", "2019-04-19", "2019-03-14",
"2019-03-14", "2019-03-14", "2019-03-14", "2019-03-14")),
dat = c(0,0,0,1,2,3,0,1,2,3,4,5,6,0,0,1,0,1,2,3)
)
Can you help me with this?
CodePudding user response:
library(dplyr)
df %>%
group_by(id, grp = cumsum(treatment)) %>%
mutate(dat2 = cumsum(cumany(lag(treatment > 0, default = FALSE)))) %>%
ungroup()
# # A tibble: 20 x 6
# id treatment date dat grp dat2
# <dbl> <dbl> <date> <dbl> <dbl> <int>
# 1 1 0 2019-07-07 0 0 0
# 2 1 0 2019-07-07 0 0 0
# 3 1 1 2019-07-07 0 1 0
# 4 1 0 2019-07-07 1 1 1
# 5 1 0 2019-07-07 2 1 2
# 6 1 0 2019-07-06 3 1 3
# 7 1 1 2019-07-06 0 2 0
# 8 1 0 2019-07-05 1 2 1
# 9 1 0 2019-07-05 2 2 2
# 10 1 0 2019-04-20 3 2 3
# 11 1 0 2019-04-20 4 2 4
# 12 1 0 2019-04-20 5 2 5
# 13 1 0 2019-04-20 6 2 6
# 14 2 0 2019-04-19 0 2 0
# 15 2 1 2019-04-19 0 3 0
# 16 2 0 2019-03-14 1 3 1
# 17 2 1 2019-03-14 0 4 0
# 18 2 0 2019-03-14 1 4 1
# 19 2 0 2019-03-14 2 4 2
# 20 2 0 2019-03-14 3 4 3
You can of course delete grp
after this.
An alternative:
df %>%
group_by(id, grp = cumsum(treatment)) %>%
mutate(dat2 = if (first(treatment)) row_number() - 1 else 0) %>%
ungroup()
CodePudding user response:
Here is a way.
df <- data.frame(id = c(1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2),
treatment = c(0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0),
date = lubridate::ymd(c("2019-07-07", "2019-07-07", "2019-07-07", "2019-07-07",
"2019-07-07", "2019-07-06", "2019-07-06", "2019-07-05",
"2019-07-05", "2019-04-20", "2019-04-20", "2019-04-20",
"2019-04-20", "2019-04-19", "2019-04-19", "2019-03-14",
"2019-03-14", "2019-03-14", "2019-03-14", "2019-03-14")))
suppressPackageStartupMessages(library(dplyr))
df %>%
group_by(id) %>%
mutate(days = cumsum(treatment)) %>%
group_by(id, days) %>%
mutate(days = ifelse(days > 0, row_number() - 1L, 0)) %>%
ungroup()
#> # A tibble: 20 × 4
#> id treatment date days
#> <dbl> <dbl> <date> <dbl>
#> 1 1 0 2019-07-07 0
#> 2 1 0 2019-07-07 0
#> 3 1 1 2019-07-07 0
#> 4 1 0 2019-07-07 1
#> 5 1 0 2019-07-07 2
#> 6 1 0 2019-07-06 3
#> 7 1 1 2019-07-06 0
#> 8 1 0 2019-07-05 1
#> 9 1 0 2019-07-05 2
#> 10 1 0 2019-04-20 3
#> 11 1 0 2019-04-20 4
#> 12 1 0 2019-04-20 5
#> 13 1 0 2019-04-20 6
#> 14 2 0 2019-04-19 0
#> 15 2 1 2019-04-19 0
#> 16 2 0 2019-03-14 1
#> 17 2 1 2019-03-14 0
#> 18 2 0 2019-03-14 1
#> 19 2 0 2019-03-14 2
#> 20 2 0 2019-03-14 3
Created on 2022-08-24 by the reprex package (v2.0.1)