Suppose I have the following (simplified) data frame:
set.seed(123)
df <- data.frame("extracolumns" = rep("random", 6),
"which.x" = c("x1", "x2", "x3", "x2", "x3", "x1"),
"which.y" = c("y2", "y2", "y2", "y1", "y1", "y1"),
"which.z" = c("z3", "z3", "z3", "z1", "z2", "z1"),
"x1" = rnorm(6),
"x2" = rnorm(6),
"x3" = rnorm(6),
"y1" = rnorm(6),
"y2" = rnorm(6),
"y3" = rnorm(6),
"z1" = rnorm(6),
"z2" = rnorm(6),
"z3" = rnorm(6)) %>%
mutate_if(is.numeric, round, 2)
which gives
extracolumns which.x which.y which.z x1 x2 x3 y1 y2 y3 z1 z2 z3
1 random x1 y2 z3 -0.56 0.46 0.40 0.70 -0.63 0.43 0.55 -1.27 0.78
2 random x2 y2 z3 -0.23 -1.27 0.11 -0.47 -1.69 -0.30 -0.06 2.17 -0.08
3 random x3 y2 z3 1.56 -0.69 -0.56 -1.07 0.84 0.90 -0.31 1.21 0.25
4 random x2 y1 z1 0.07 -0.45 1.79 -0.22 0.15 0.88 -0.38 -1.12 -0.03
5 random x3 y1 z2 0.13 1.22 0.50 -1.03 -1.14 0.82 -0.69 -0.40 -0.04
6 random x1 y1 z1 1.72 0.36 -1.97 -0.73 1.25 0.69 -0.21 -0.47 1.37
I want to mutate values in df such that each row contains NA values for the columns that appear in 'which.x', 'which.y' and 'which.z'. Something like:
for(i in 1:nrow(df)) {
df[i, match(df$which.x, colnames(df))[i]] <- NA
df[i, match(df$which.y, colnames(df))[i]] <- NA
df[i, match(df$which.z, colnames(df))[i]] <- NA
}
which gives the desired output:
> df
extracolumns which.x which.y which.z x1 x2 x3 y1 y2 y3 z1 z2 z3
1 random x1 y2 z3 NA 0.46 0.40 0.70 NA 0.43 0.55 -1.27 NA
2 random x2 y2 z3 -0.23 NA 0.11 -0.47 NA -0.30 -0.06 2.17 NA
3 random x3 y2 z3 1.56 -0.69 NA -1.07 NA 0.90 -0.31 1.21 NA
4 random x2 y1 z1 0.07 NA 1.79 NA 0.15 0.88 NA -1.12 -0.03
5 random x3 y1 z2 0.13 1.22 NA NA -1.14 0.82 -0.69 NA -0.04
6 random x1 y1 z1 NA 0.36 -1.97 NA 1.25 0.69 NA -0.47 1.37
I would like to obtain the same desired result, but more efficiently/elegantly -- perhaps using functions like across()
and case_when()
-- but I cannot make it work.
Thank you in advance!
CodePudding user response:
A literal path:
df %>%
mutate(
across(x1:x3, ~ if_else(cur_column() == which.x, .[NA], .)),
across(y1:y3, ~ if_else(cur_column() == which.y, .[NA], .)),
across(z1:z3, ~ if_else(cur_column() == which.z, .[NA], .))
)
# extracolumns which.x which.y which.z x1 x2 x3 y1 y2 y3 z1 z2 z3
# 1 random x1 y2 z3 NA 0.46 0.40 0.70 NA 0.43 0.55 -1.27 NA
# 2 random x2 y2 z3 -0.23 NA 0.11 -0.47 NA -0.30 -0.06 2.17 NA
# 3 random x3 y2 z3 1.56 -0.69 NA -1.07 NA 0.90 -0.31 1.21 NA
# 4 random x2 y1 z1 0.07 NA 1.79 NA 0.15 0.88 NA -1.12 -0.03
# 5 random x3 y1 z2 0.13 1.22 NA NA -1.14 0.82 -0.69 NA -0.04
# 6 random x1 y1 z1 NA 0.36 -1.97 NA 1.25 0.69 NA -0.47 1.37
I use .[NA]
in place of just NA
to be clear on the type of NA
: R has at least 8 different types of NA
, so we need to be explicit about which one; in this case, it'd be NA_real_
.
CodePudding user response:
Using pivot_longer
then pivot_wider
:
df %>%
pivot_longer(starts_with("which")) %>%
mutate(across(x1:z3, ~ replace(.x, cur_column() == value, NA)),
id = cumsum(name == "which.x")) %>%
pivot_wider(id_cols = id, unused_fn = min)
# A tibble: 6 × 14
id which.x which.y which.z extracolumns x1 x2 x3 y1 y2 y3 z1 z2 z3
<int> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 x1 y2 z3 random NA -1.43 1.92 -0.92 NA -0.59 1.42 0.81 NA
2 2 x2 y2 z3 random -0.38 NA 0.56 -0.07 NA 1.29 -0.72 -0.67 NA
3 3 x3 y2 z3 random -1.34 -2.02 NA -0.23 NA 0.74 0.24 -0.55 NA
4 4 x2 y1 z1 random 0.72 NA 1.59 NA -0.09 0.32 NA -0.88 0.1
5 5 x3 y1 z2 random -1.38 -1.83 NA NA -0.07 1.1 0.08 NA 0.67
6 6 x1 y1 z1 random NA 0.61 1.15 NA 1.1 -0.84 NA 0.28 -0.45
Or, with rowwise
:
df %>%
rowwise() %>%
mutate(across(x1:z3, ~ replace(.x, any(across(starts_with("which")) == cur_column()), NA)))
Or with if_any
:
df %>%
mutate(across(x1:z3, \(x){
cur <- cur_column()
replace(x, if_any(starts_with("which"), \(y) y == cur), NA)
}
))
CodePudding user response:
We can do it in one call to across()
and use if_any()
inside ifelse()
:
library(dplyr)
df %>%
mutate(across(x1:z3,
\(x) {
this_col <- cur_column()
ifelse(
if_any(starts_with("which"), ~ .x == this_col),
NA,
x)
}))
#> extracolumns which.x which.y which.z x1 x2 x3 y1 y2 y3
#> 1 random x1 y2 z3 NA 0.46 0.40 0.70 NA 0.43
#> 2 random x2 y2 z3 -0.23 NA 0.11 -0.47 NA -0.30
#> 3 random x3 y2 z3 1.56 -0.69 NA -1.07 NA 0.90
#> 4 random x2 y1 z1 0.07 NA 1.79 NA 0.15 0.88
#> 5 random x3 y1 z2 0.13 1.22 NA NA -1.14 0.82
#> 6 random x1 y1 z1 NA 0.36 -1.97 NA 1.25 0.69
#> z1 z2 z3
#> 1 0.55 -1.27 NA
#> 2 -0.06 2.17 NA
#> 3 -0.31 1.21 NA
#> 4 NA -1.12 -0.03
#> 5 -0.69 NA -0.04
#> 6 NA -0.47 1.37
Created on 2022-09-27 by the reprex package (v2.0.1)