Create a variable days after treatment-CodePudding

Here's my dataset:

df = data.frame(id = c(1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2), 
                treatment = c(0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0),
                date = lubridate::ymd(c("2019-07-07", "2019-07-07", "2019-07-07", "2019-07-07", 
"2019-07-07", "2019-07-06", "2019-07-06", "2019-07-05",
"2019-07-05", "2019-04-20", "2019-04-20", "2019-04-20", 
"2019-04-20", "2019-04-19", "2019-04-19", "2019-03-14",
"2019-03-14", "2019-03-14", "2019-03-14", "2019-03-14")))

I need to create a variale which reflects date after treatment for each id. Like this:

df = data.frame(id = c(1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2), 
                treatment = c(0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0),
                date = lubridate::ymd(c("2019-07-07", "2019-07-07", "2019-07-07", "2019-07-07", "2019-07-07", "2019-07-06", "2019-07-06", "2019-07-05",
                         "2019-07-05", "2019-04-20", "2019-04-20", "2019-04-20", "2019-04-20", "2019-04-19", "2019-04-19", "2019-03-14",
                         "2019-03-14", "2019-03-14", "2019-03-14", "2019-03-14")),
                dat = c(0,0,0,1,2,3,0,1,2,3,4,5,6,0,0,1,0,1,2,3)
)

Can you help me with this?

CodePudding user response：

library(dplyr)
df %>%
  group_by(id, grp = cumsum(treatment)) %>%
  mutate(dat2 = cumsum(cumany(lag(treatment > 0, default = FALSE)))) %>%
  ungroup()
# # A tibble: 20 x 6
#       id treatment date         dat   grp  dat2
#    <dbl>     <dbl> <date>     <dbl> <dbl> <int>
#  1     1         0 2019-07-07     0     0     0
#  2     1         0 2019-07-07     0     0     0
#  3     1         1 2019-07-07     0     1     0
#  4     1         0 2019-07-07     1     1     1
#  5     1         0 2019-07-07     2     1     2
#  6     1         0 2019-07-06     3     1     3
#  7     1         1 2019-07-06     0     2     0
#  8     1         0 2019-07-05     1     2     1
#  9     1         0 2019-07-05     2     2     2
# 10     1         0 2019-04-20     3     2     3
# 11     1         0 2019-04-20     4     2     4
# 12     1         0 2019-04-20     5     2     5
# 13     1         0 2019-04-20     6     2     6
# 14     2         0 2019-04-19     0     2     0
# 15     2         1 2019-04-19     0     3     0
# 16     2         0 2019-03-14     1     3     1
# 17     2         1 2019-03-14     0     4     0
# 18     2         0 2019-03-14     1     4     1
# 19     2         0 2019-03-14     2     4     2
# 20     2         0 2019-03-14     3     4     3

You can of course delete grp after this.

An alternative:

df %>%
  group_by(id, grp = cumsum(treatment)) %>%
  mutate(dat2 = if (first(treatment)) row_number() - 1 else 0) %>%
  ungroup()

CodePudding user response：

Here is a way.

df <- data.frame(id = c(1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2),
                treatment = c(0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0),
                date = lubridate::ymd(c("2019-07-07", "2019-07-07", "2019-07-07", "2019-07-07", 
                                        "2019-07-07", "2019-07-06", "2019-07-06", "2019-07-05",
                                        "2019-07-05", "2019-04-20", "2019-04-20", "2019-04-20", 
                                        "2019-04-20", "2019-04-19", "2019-04-19", "2019-03-14",
                                        "2019-03-14", "2019-03-14", "2019-03-14", "2019-03-14")))

suppressPackageStartupMessages(library(dplyr))

df %>%
  group_by(id) %>%
  mutate(days = cumsum(treatment)) %>%
  group_by(id, days) %>%
  mutate(days = ifelse(days > 0, row_number() - 1L, 0)) %>%
  ungroup()
#> # A tibble: 20 × 4
#>       id treatment date        days
#>    <dbl>     <dbl> <date>     <dbl>
#>  1     1         0 2019-07-07     0
#>  2     1         0 2019-07-07     0
#>  3     1         1 2019-07-07     0
#>  4     1         0 2019-07-07     1
#>  5     1         0 2019-07-07     2
#>  6     1         0 2019-07-06     3
#>  7     1         1 2019-07-06     0
#>  8     1         0 2019-07-05     1
#>  9     1         0 2019-07-05     2
#> 10     1         0 2019-04-20     3
#> 11     1         0 2019-04-20     4
#> 12     1         0 2019-04-20     5
#> 13     1         0 2019-04-20     6
#> 14     2         0 2019-04-19     0
#> 15     2         1 2019-04-19     0
#> 16     2         0 2019-03-14     1
#> 17     2         1 2019-03-14     0
#> 18     2         0 2019-03-14     1
#> 19     2         0 2019-03-14     2
#> 20     2         0 2019-03-14     3

^{Created on 2022-08-24 by the reprex package (v2.0.1)}