I have a dataset like the following simplified one:
x_1 <- c(1, NA, 2, 3, NA, 4, 5)
x_2 <- c(2, 1, NA, NA, NA, 4, 6)
y_1 <- c(2, 4, 6, 8, NA, 10, NA)
y_2 <- c(NA, 4, NA, 8, 10, 11, 13)
df <- data.frame(x_1, x_2, y_1, y_2)
x_1 x_2 y_1 y_2
1 1 2 2 NA
2 NA 1 4 4
3 2 NA 6 NA
4 3 NA 8 8
5 NA NA NA 10
6 4 4 10 11
7 5 6 NA 13
The goal is to coalesce each of the two corresponding variables (x and y) and to replace the values that are not the same (e.g. first row of x_1 and x_2) with NA. I did this with the following:
df <- df %>%
mutate(x = coalesce(x_1, x_2)) %>%
mutate(x = ifelse(!is.na(x) &
!is.na(x_2) &
x != x_2,
NA,
x)) %>%
select(!c(x_1, x_2))
Now, I have to do this with 21 variables so I thought I put the variables in a list and feed them through the pipeline with a for loop like this:
cols <- c("x", "y")
for(i in cols){
var_1 <- paste(i, "1", sep = "_")
var_2 <- paste(i, "2", sep = "_")
df <- df %>%
mutate(i = coalesce(var_1, var_2)) %>%
mutate(i = ifelse(!is.na(i) &
!is.na(var_2) &
i != var_2,
NA,
i)) %>%
select(!c(var_1, var_2))
}
What happens is that the code is executed, but instead of the new variables there is only the variable "i" with empty values. It seems as if R does not recognise the "i" in the pipeline as the iterator, however it does recognize "var_1" and "var_2" (because they are being removed from the dataset).
Does anyone know why that is and how I can fix it?
Thanks a lot in advance.
CodePudding user response:
fun <- function(x, var) {
var_1 <- sym(paste(var, "1", sep = "_"))
var_2 <- sym(paste(var, "2", sep = "_"))
x %>%
mutate(!!var := ifelse((!!var_1 != !!var_2) %in% TRUE,
NA, coalesce(!!var_1, !!var_2))) %>%
select(!c(var_1, var_2))
}
cols <- c("x", "y")
Reduce(fun, cols, init = df)
# x y
# 1 NA 2
# 2 1 4
# 3 2 6
# 4 3 8
# 5 NA 10
# 6 4 NA
# 7 NA 13
CodePudding user response:
If you want to avoid rlang:
library(tidyverse)
library(stringr)
x_1 <- c(1, NA, 2, 3, NA, 4, 5)
x_2 <- c(2, 1, NA, NA, NA, 4, 6)
y_1 <- c(2, 4, 6, 8, NA, 10, NA)
y_2 <- c(NA, 4, NA, 8, 10, 11, 13)
df <- data.frame(x_1, x_2, y_1, y_2)
my_coalesce <- function(d) {
vec_1 <- select(d, 1) %>% pull()
vec_2 <- select(d, 2) %>% pull()
res <- coalesce(vec_1, vec_2)
res[vec_1 != vec_2] <- NA
res
}
cols <- c("x", "y")
map(cols, ~df %>%
select(starts_with(.x)) %>% # or:
#select(str_c(.x, "_", 1:2)) %>%
my_coalesce()) %>%
set_names(cols) %>%
as_tibble()