df<-data.frame('id'=c(1:6),
'start_date'=c('Mar-22','Feb-22','Jan-22','Dec-21','Nov-21','Oct-21'),
'Jan-22'= c(NA,NA,1,0,0,1),
'Feb-22'= c(NA,1,1,0,0,1))
I would like to figure out if I can nest case_when or if_else functions within the dplyr::mutate function to get a new column. The new column should show if the value of the month column 3 months after start date has a value of 1 or not. I can write a case_when as shown:
df2<-df%>%
mutate(3_month_followup=case_when(start_date=='Oct-21'&'Jan-22'==1~'yes))
This works fine, but only allows for testing one interval before pasting the answer. is there a way to include all 3 month intervals in this function?
Expected outcome:
#id start_date Jan-22 Feb-22 3mofollowup
#1 Mar-22 NA NA NA
#2 Feb-22 NA 1 NA
#3 Jan-22 1 1 NA
#4 Dec-21 0 0 NA
#5 Nov-21 0 0 no
#6 Oct-21 1 1 yes
CodePudding user response:
Okay, this took a little doing. I left the helper columns in to illustrate, but you can drop them, of course. (And probably condense some of them, if you'd like.)
library(dplyr)
library(purrr)
library(lubridate)
date_cols = names(df)[names(df) %>% endsWith("22")]
df %>%
mutate(
follow_up = pmap(select(., all_of(date_cols)), c),
follow_up_date = lapply(follow_up, \(x) date_cols[x == 1]),
start_date_date = as.Date(paste0("01-", start_date), format = "%d-%b-%y"),
`3modate` = start_date_date months(3),
`3mostring` = format(`3modate`, "%b-%y"),
`3mofollowup` = map2(`3mostring`, follow_up_date, ~ifelse(any(.x == .y), "yes", "no")),
`3mofollowup` = ifelse(!`3mostring` %in% date_cols, NA, `3mofollowup`)
)
# id start_date Jan-22 Feb-22 follow_up follow_up_date start_date_date 3modate 3mostring
# 1 1 Mar-22 NA NA NA, NA NA, NA 2022-03-01 2022-06-01 Jun-22
# 2 2 Feb-22 NA 1 NA, 1 NA, Feb-22 2022-02-01 2022-05-01 May-22
# 3 3 Jan-22 1 1 1, 1 Jan-22, Feb-22 2022-01-01 2022-04-01 Apr-22
# 4 4 Dec-21 0 0 0, 0 2021-12-01 2022-03-01 Mar-22
# 5 5 Nov-21 0 0 0, 0 2021-11-01 2022-02-01 Feb-22
# 6 6 Oct-21 1 1 1, 1 Jan-22, Feb-22 2021-10-01 2022-01-01 Jan-22
# 3mofollowup
# 1 NA
# 2 NA
# 3 NA
# 4 NA
# 5 no
# 6 yes
Commented version:
## identify date columns
date_cols = names(df)[names(df) %>% endsWith("22")]
df %>%
mutate(
## turn the date columns into a single list column
follow_up = pmap(select(., all_of(date_cols)), c),
## get the col names where the date cols have a 1
follow_up_date = lapply(follow_up, \(x) date_cols[x == 1]),
## make a Date class version of `start_date`
start_date_date = as.Date(paste0("01-", start_date), format = "%d-%b-%y"),
## add 3 months to the start date so we know when to look
`3modate` = start_date_date months(3),
## convert the 3modate to a string of the same format as the col names
`3mostring` = format(`3modate`, "%b-%y"),
## look for the string
## using any() here rather than just `%in%` to keep the NAs in the result
`3mofollowup` = map2(`3mostring`, follow_up_date, ~ifelse(any(.x == .y), "yes", "no")),
## check that the 3 month followup was possible
`3mofollowup` = ifelse(!`3mostring` %in% date_cols, NA, `3mofollowup`)
)