I have a grouped dataset with patients and a condition column (Caffeinefactor) with yes and no. I now want to create a new column that basically divides Patient in before and after the condition was yes for the first time. So if the Patient has the condition no,no,no,no,yes,yes,no,no,yes and I would like to ne column to look like this: before, before, before, before, after, after, after, after, after.
the first rows of my datagram look like this:
DF = structure(list(PATIENT.ID = c(210625L, 210625L, 210625L, 210625L,
210625L, 210625L, 210625L, 210625L, 210625L, 210625L, 210625L,
210625L, 210625L, 210625L, 210625L, 210625L, 210625L, 220909L,
220909L, 220909L, 220909L, 220909L, 220909L, 220909L, 220909L,
220909L, 220909L, 221179L, 221179L, 221179L, 221179L, 221179L,
221179L, 221179L, 221179L, 221179L, 221179L, 221179L, 221179L,
221179L, 221179L, 301705L, 301705L, 301705L, 301705L, 301705L,
301705L, 301705L, 301705L, 301705L, 301705L, 301705L, 301705L,
301705L, 301705L, 301705L, 303926L, 303926L, 303926L, 303926L
), PATIENT.TREATMENT.NUMBER = c(1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 17L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L, 4L), Caffeinefactor = c("no",
"no", "no", "no", "yes", "yes", "yes", "no", "yes", "yes", "yes",
"yes", "yes", "no", "no", "yes", "yes", "yes", "yes", "yes",
"yes", "yes", "yes", "yes", "yes", "yes", "no", "no", "no", "no",
"no", "no", "no", "no", "no", "no", "yes", "yes", "yes", "yes",
"yes", "no", "no", "no", "no", "no", "no", "yes", "no", "yes",
"yes", "yes", "yes", "yes", "yes", "yes", "no", "no", "no", "no"
)), row.names = c(NA, -60L), class = c("data.table", "data.frame"
), .internal.selfref = <pointer: 0x7fe7f7002ee0>)
I would have started like this:
daf1 <- df %>%
setDT(df) %>%
dplyr::mutate(row_id = row_number()) %>%
dplyr::group_by(PATIENT.ID) %>%
dplyr::mutate(first_yes = cumsum(Caffeinefactor == "yes"),
last_no = (Caffeinefactor == "no") * row_number(),
last_no = ifelse(first_yes == 0, last_no, 0))
CodePudding user response:
You could do:
library(dplyr)
DF %>%
group_by(PATIENT.ID) %>%
mutate(new_col = c("before", "after")[sign(cumsum(Caffeinefactor == "yes")) 1])
#> # A tibble: 60 x 4
#> # Groups: PATIENT.ID [5]
#> PATIENT.ID PATIENT.TREATMENT.NUMBER Caffeinefactor new_col
#> <int> <int> <chr> <chr>
#> 1 210625 1 no before
#> 2 210625 2 no before
#> 3 210625 4 no before
#> 4 210625 5 no before
#> 5 210625 6 yes after
#> 6 210625 7 yes after
#> 7 210625 8 yes after
#> 8 210625 9 no after
#> 9 210625 10 yes after
#> 10 210625 11 yes after
#> # ... with 50 more rows
Created on 2022-03-23 by the reprex package (v2.0.1)
CodePudding user response:
library(purrr)
DF |>
group_by(PATIENT.ID) |>
mutate(brk = tail(accumulate(Caffeinefactor, ~{
if (.y == "yes")
"after"
else .x
}, .init = "before"),
-1))
##># Groups: PATIENT.ID [5]
##> PATIENT.ID PATIENT.TREATMENT.NUMBER Caffeinefactor brk
##> <int> <int> <chr> <chr>
##> 1 210625 1 no before
##> 2 210625 2 no before
##> 3 210625 4 no before
##> 4 210625 5 no before
##> 5 210625 6 yes after
##> 6 210625 7 yes after
##> 7 210625 8 yes after
##> 8 210625 9 no after
##> 9 210625 10 yes after
##>10 210625 11 yes after
##># … with 50 more rows