I have an example df:
df <- data.frame(
var1 = c("A", "B"),
var2 = c("C", "D"),
var3 = c("E", "F"),
var4 = c("G", "H"),
var5 = c("I", "J"),
var6 = c("K", "L"),
var7 = c("M", "N"),
var8 = c(NA, "P"))
and df2 is my desired output:
df2 <- data.frame(
var1 = c("I", "B"),
var2 = c("Z", "D"),
var3 = c("M", "F"),
var4 = c("G", "H"),
var5 = c("I", "J"),
var6 = c("K", "L"),
var7 = c("M", "N"),
var8 = c(NA, "P"),
inputed_flag = c("Y","N"))
Basically the logic is as follows:
df3 <- df %>%
mutate(var1 = ifelse(is.na(var8), var5, var1),
var2 = ifelse(is.na(var8), "Z", var2),
var3 = ifelse(is.na(var8), var7, var3),
imputed_flag = ifelse(is.na(var8), "Y", "N"))
but in R is there an easier/more compact way of mutating columns based on a single if condition? The condition in this case is if var8 is missing, then impute certain values for other variables. I don't know another way of doing this without a bunch of ifelse statements with the same conditon.
In sas we can do something like this
if missing(var8) then do;
var1 = var5;
var2 = "Z";
var3 = var7;
imputed_flag = "Y";
end;
if not missing(var8) then imputed_flag = "N";
where if a single if condition is met, then several variables can be mutated all in one if statement.
I'm looking for an elegant R solution like this in sas if possible.
CodePudding user response:
I think simple subset assignment in base R is reasonable here:
i <- is.na(df$var8)
df[i, 1:3] <- data.frame(var1 = df$var5[i], var2 = "z", var3 = df$var7[i])
cbind(df, imputed_flag = i)
#> var1 var2 var3 var4 var5 var6 var7 var8 imputed_flag
#> 1 I z M G I K M <NA> TRUE
#> 2 B D F H J L N P FALSE
Created on 2022-08-17 by the reprex package (v2.0.1)
CodePudding user response:
Seems like Map
would be more concise
nm1 <- paste0("var", 1:3)
i1 <- is.na(df$var8)
df[nm1] <- Map(function(x, y) ifelse(i1, y, x),
df[nm1], c(df['var5'], 'Z', df['var7']))
df$imputed_flag <- c("N", "Y")[1 i1]
-checking
> names(df2)[length(df2)] <- "imputed_flag"
> all.equal(df, df2)
[1] TRUE
Or another option (inspired from @Allan Cameron's post) will be to create a temp column in 'df' and then use row index and column name reordering to update the values
nm1 <- paste0("var", 1:3)
i1 <- is.na(df$var8)
df$z <- "Z"
df[i1, nm1] <- df[i1, c("var5", "z", "var7")]
df$imputed_flag <- c("N", "Y")[1 i1]
df$z <- NULL
With across2
(from dplyover
), we could do
library(dplyover)
library(dplyr)
library(stringr)
df %>%
mutate(z = 'Z',
across2(var1:var3, c(var5, z, var7),
~ case_when(is.na(var8)~ .y, TRUE ~ .x),
.names_fn = ~ str_remove(.x, "_.*")),
imputed_flag = c("N", "Y")[1 is.na(var8)])
var1 var2 var3 var4 var5 var6 var7 var8 z imputed_flag
1 I Z M G I K M <NA> Z Y
2 B D F H J L N P Z N