I have a dataframe apcd_hud_ex. I want to take some column names (e.g. x2014_03_15), and change the value of the columns based on the current value of the columns, the parsed date in the column names, and another column in the dataframe (SMOKEFREE_DATE). I can do it in a loop over the columns, but I would really like to know how to do it with dplyr and mutate. Any help would be much appreciated!
apcd_hud_ex = structure(list(studyid = 1:5, SMOKEFREE_DATE = structure(c(16283,
16283, 16071, 16071, 16648), class = "Date"), x2014_03_15 = c(1,
1, 1, 0, 1), x2014_04_15 = c(1, 1, 1, 1, 1), x2014_05_15 = c(1,
1, 1, 1, 1), x2014_06_15 = c(1, 1, 1, 1, 1), x2014_07_15 = c(1,
1, 1, 1, 1), x2014_08_15 = c(1, 1, 1, 1, 1), x2014_09_15 = c(1,
1, 1, 1, 1), x2014_10_15 = c(1, 1, 1, 1, 1), x2014_11_15 = c(1,
1, 1, 1, 1), x2014_12_15 = c(1, 1, 1, 1, 1), x2015_01_15 = c(1,
1, 1, 1, 1)), row.names = c(NA, -5L), class = c("tbl_df", "tbl",
"data.frame"))
> apcd_hud_ex
# A tibble: 5 x 13
studyid SMOKEFREE_DATE x2014_03_15 x2014_04_15 x2014_05_15 x2014_06_15 x2014_07_15 x2014_08_15 x2014_09_15 x2014_10_15
<int> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2014-08-01 1 1 1 1 1 1 1 1
2 2 2014-08-01 1 1 1 1 1 1 1 1
3 3 2014-01-01 1 1 1 1 1 1 1 1
4 4 2014-01-01 0 1 1 1 1 1 1 1
5 5 2015-08-01 1 1 1 1 1 1 1 1
# ... with 3 more variables: x2014_11_15 <dbl>, x2014_12_15 <dbl>, x2015_01_15 <dbl>
>
#function for loop
assign_PHRes_enrollIns_fn <- function(SFdate,insValue,insDate){
val = if_else(insValue == 0,
0,
if_else(as.Date(insDate) < as.Date(SFdate,"%Y-%m-%d"),
1,
2))
return(val)
}
#vectorized function
assign_PHRes_enrollIns_fn_vec <- Vectorize(assign_PHRes_enrollIns_fn)
dateCols = names(apcd_hud_ex)[which(names(apcd_hud_ex) == "x2014_03_15"):which(names(apcd_hud_ex) == "x2015_01_15")]
This loop over the column names (dateCols) works:
for(i in 1:length(dateCols)){
dateCol = dateCols[i]
insDate = as.Date(paste0(str_sub(dateCol,2,5),"/",str_sub(dateCol,7,8),"/",str_sub(dateCol,10,11)),"%Y/%m/%d")
apcd_hud_ex[,dateCol] = assign_PHRes_enrollIns_fn_vec(apcd_hud_ex[,"SMOKEFREE_DATE"],apcd_hud_ex[,dateCol],insDate)
}
Now the manipulated dataframe looks like this, which is what I want:
> apcd_hud_ex
# A tibble: 5 x 13
studyid SMOKEFREE_DATE x2014_03_15 x2014_04_15 x2014_05_15 x2014_06_15 x2014_07_15 x2014_08_15 x2014_09_15 x2014_10_15
<int> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2014-08-01 1 1 1 1 1 2 2 2
2 2 2014-08-01 1 1 1 1 1 2 2 2
3 3 2014-01-01 2 2 2 2 2 2 2 2
4 4 2014-01-01 0 2 2 2 2 2 2 2
5 5 2015-08-01 1 1 1 1 1 1 1 1
# ... with 3 more variables: x2014_11_15 <dbl>, x2014_12_15 <dbl>, x2015_01_15 <dbl>
However, I would like to learn how to do this with dynamic programming and dplyr. I've tried 2 functions:
newInsValCols_fn1 <- function(df,dateCols){
insDate = as.Date(paste0(str_sub(dateCols,2,5),"/",str_sub(dateCols,7,8),"/",str_sub(dateCols,10,11)),"%Y/%m/%d")
df1 <- df %>%
mutate({{dateCols}} := if_else({{dateCols}} == 0,
0,
if_else(as.Date(insDate) < as.Date(SMOKEFREE_DATE,"%Y-%m-%d"),
1,
2)))
return(df1)
}
newInsValCols_fn1(apcd_hud_ex,dateCols)
Which gives error:
Error: The LHS of `:=` must be a string or a symbol
So I tried using symbols:
newInsValCols_fn2 <- function(df,dateCols){
dateCols_syms = syms(dateCols)
insDate = as.Date(paste0(str_sub(dateCols,2,5),"/",str_sub(dateCols,7,8),"/",str_sub(dateCols,10,11)),"%Y/%m/%d")
df1 <- df %>%
mutate(!!dateCols_syms := if_else({{dateCols}} == 0,
0,
if_else(as.Date(insDate) < as.Date(SMOKEFREE_DATE,"%Y-%m-%d"),
1,
2)))
return(df1)
}
newInsValCols_fn2(apcd_hud_ex,dateCols)
which gives the same error:
Error: The LHS of `:=` must be a string or a symbol
I also tried using !!! instead of !!, but that resulted in the following error:
Error: The LHS of `:=` can't be spliced with `!!!`
Something in my understanding is lacking.
CodePudding user response:
Here's how I'd do it with dplyr
.
library(dplyr)
library(lubridate)
apcd_hud_ex %>%
mutate(across(
starts_with('x'),
~ case_when(. == 0 ~ 0,
ymd(gsub('x', '', cur_column())) < SMOKEFREE_DATE ~ 1,
TRUE ~ 2)
))
#> # A tibble: 5 x 13
#> studyid SMOKEFREE_DATE x2014_03_15 x2014_04_15 x2014_05_15 x2014_06_15 x2014_07_15 x2014_08_15 x2014_09_15 x2014_10_15 x2014_11_15 x2014_12_15 x2015_01_15
#> <int> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 2014-08-01 1 1 1 1 1 2 2 2 2 2 2
#> 2 2 2014-08-01 1 1 1 1 1 2 2 2 2 2 2
#> 3 3 2014-01-01 2 2 2 2 2 2 2 2 2 2 2
#> 4 4 2014-01-01 0 2 2 2 2 2 2 2 2 2 2
#> 5 5 2015-08-01 1 1 1 1 1 1 1 1 1 1 1
CodePudding user response:
You can use pivot_longer
to have just one column to modify, which is an alternative to mutate(across())
.
You can use case_when
to have multiple conditions, so you do not need to nest multiple if statements. The value will be the one of the first true statement.
library(tidyverse)
apcd_hud_ex <- structure(list(studyid = 1:5, SMOKEFREE_DATE = structure(c(
16283,
16283, 16071, 16071, 16648
), class = "Date"), x2014_03_15 = c(
1,
1, 1, 0, 1
), x2014_04_15 = c(1, 1, 1, 1, 1), x2014_05_15 = c(
1,
1, 1, 1, 1
), x2014_06_15 = c(1, 1, 1, 1, 1), x2014_07_15 = c(
1,
1, 1, 1, 1
), x2014_08_15 = c(1, 1, 1, 1, 1), x2014_09_15 = c(
1,
1, 1, 1, 1
), x2014_10_15 = c(1, 1, 1, 1, 1), x2014_11_15 = c(
1,
1, 1, 1, 1
), x2014_12_15 = c(1, 1, 1, 1, 1), x2015_01_15 = c(
1,
1, 1, 1, 1
)), row.names = c(NA, -5L), class = c(
"tbl_df", "tbl",
"data.frame"
))
apcd_hud_ex %>%
pivot_longer(starts_with("x")) %>%
mutate(
insDate = name %>% str_remove("^x") %>% str_replace_all("_", "-") %>% as.Date(),
value = case_when(
value == 0 ~ 0,
insDate < SMOKEFREE_DATE ~ 1,
insDate >= SMOKEFREE_DATE ~ 2
)
) %>%
select(-insDate) %>%
pivot_wider()
#> # A tibble: 5 × 13
#> studyid SMOKEFREE_DATE x2014_03_15 x2014_04_15 x2014_05_15 x2014_06_15
#> <int> <date> <dbl> <dbl> <dbl> <dbl>
#> 1 1 2014-08-01 1 1 1 1
#> 2 2 2014-08-01 1 1 1 1
#> 3 3 2014-01-01 2 2 2 2
#> 4 4 2014-01-01 0 2 2 2
#> 5 5 2015-08-01 1 1 1 1
#> # … with 7 more variables: x2014_07_15 <dbl>, x2014_08_15 <dbl>,
#> # x2014_09_15 <dbl>, x2014_10_15 <dbl>, x2014_11_15 <dbl>, x2014_12_15 <dbl>,
#> # x2015_01_15 <dbl>
Created on 2022-05-05 by the reprex package (v2.0.0)