Mutate column values to NA whenever column name occurs in different column-CodePudding

Suppose I have the following (simplified) data frame:

set.seed(123)
df <- data.frame("extracolumns" = rep("random", 6),
                 "which.x" = c("x1", "x2", "x3", "x2", "x3", "x1"),
                 "which.y" = c("y2", "y2", "y2", "y1", "y1", "y1"),
                 "which.z" = c("z3", "z3", "z3", "z1", "z2", "z1"),
                 "x1" = rnorm(6),
                 "x2" = rnorm(6),
                 "x3" = rnorm(6),
                 "y1" = rnorm(6),
                 "y2" = rnorm(6),
                 "y3" = rnorm(6),
                 "z1" = rnorm(6),
                 "z2" = rnorm(6),
                 "z3" = rnorm(6)) %>%
  mutate_if(is.numeric, round, 2)

which gives

  extracolumns which.x which.y which.z    x1    x2    x3    y1    y2    y3    z1    z2    z3
1       random      x1      y2      z3 -0.56  0.46  0.40  0.70 -0.63  0.43  0.55 -1.27  0.78
2       random      x2      y2      z3 -0.23 -1.27  0.11 -0.47 -1.69 -0.30 -0.06  2.17 -0.08
3       random      x3      y2      z3  1.56 -0.69 -0.56 -1.07  0.84  0.90 -0.31  1.21  0.25
4       random      x2      y1      z1  0.07 -0.45  1.79 -0.22  0.15  0.88 -0.38 -1.12 -0.03
5       random      x3      y1      z2  0.13  1.22  0.50 -1.03 -1.14  0.82 -0.69 -0.40 -0.04
6       random      x1      y1      z1  1.72  0.36 -1.97 -0.73  1.25  0.69 -0.21 -0.47  1.37

I want to mutate values in df such that each row contains NA values for the columns that appear in 'which.x', 'which.y' and 'which.z'. Something like:


for(i in 1:nrow(df)) {
  
  df[i, match(df$which.x, colnames(df))[i]] <- NA
  df[i, match(df$which.y, colnames(df))[i]] <- NA
  df[i, match(df$which.z, colnames(df))[i]] <- NA
  
}

which gives the desired output:

> df
  extracolumns which.x which.y which.z    x1    x2    x3    y1    y2    y3    z1    z2    z3
1       random      x1      y2      z3    NA  0.46  0.40  0.70    NA  0.43  0.55 -1.27    NA
2       random      x2      y2      z3 -0.23    NA  0.11 -0.47    NA -0.30 -0.06  2.17    NA
3       random      x3      y2      z3  1.56 -0.69    NA -1.07    NA  0.90 -0.31  1.21    NA
4       random      x2      y1      z1  0.07    NA  1.79    NA  0.15  0.88    NA -1.12 -0.03
5       random      x3      y1      z2  0.13  1.22    NA    NA -1.14  0.82 -0.69    NA -0.04
6       random      x1      y1      z1    NA  0.36 -1.97    NA  1.25  0.69    NA -0.47  1.37

I would like to obtain the same desired result, but more efficiently/elegantly -- perhaps using functions like across() and case_when() -- but I cannot make it work.

Thank you in advance!

CodePudding user response：

A literal path:

df %>%
  mutate(
    across(x1:x3, ~ if_else(cur_column() == which.x, .[NA], .)), 
    across(y1:y3, ~ if_else(cur_column() == which.y, .[NA], .)), 
    across(z1:z3, ~ if_else(cur_column() == which.z, .[NA], .))
  ) 
#   extracolumns which.x which.y which.z    x1    x2    x3    y1    y2    y3    z1    z2    z3
# 1       random      x1      y2      z3    NA  0.46  0.40  0.70    NA  0.43  0.55 -1.27    NA
# 2       random      x2      y2      z3 -0.23    NA  0.11 -0.47    NA -0.30 -0.06  2.17    NA
# 3       random      x3      y2      z3  1.56 -0.69    NA -1.07    NA  0.90 -0.31  1.21    NA
# 4       random      x2      y1      z1  0.07    NA  1.79    NA  0.15  0.88    NA -1.12 -0.03
# 5       random      x3      y1      z2  0.13  1.22    NA    NA -1.14  0.82 -0.69    NA -0.04
# 6       random      x1      y1      z1    NA  0.36 -1.97    NA  1.25  0.69    NA -0.47  1.37

I use .[NA] in place of just NA to be clear on the type of NA: R has at least 8 different types of NA, so we need to be explicit about which one; in this case, it'd be NA_real_.

CodePudding user response：

Using pivot_longer then pivot_wider:

df %>% 
  pivot_longer(starts_with("which")) %>% 
  mutate(across(x1:z3, ~ replace(.x, cur_column() == value, NA)),
         id = cumsum(name == "which.x")) %>% 
  pivot_wider(id_cols = id, unused_fn = min)

# A tibble: 6 × 14
     id which.x which.y which.z extracolumns    x1    x2    x3    y1    y2    y3    z1    z2    z3
  <int> <chr>   <chr>   <chr>   <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     1 x1      y2      z3      random       NA    -1.43  1.92 -0.92 NA    -0.59  1.42  0.81 NA   
2     2 x2      y2      z3      random       -0.38 NA     0.56 -0.07 NA     1.29 -0.72 -0.67 NA   
3     3 x3      y2      z3      random       -1.34 -2.02 NA    -0.23 NA     0.74  0.24 -0.55 NA   
4     4 x2      y1      z1      random        0.72 NA     1.59 NA    -0.09  0.32 NA    -0.88  0.1 
5     5 x3      y1      z2      random       -1.38 -1.83 NA    NA    -0.07  1.1   0.08 NA     0.67
6     6 x1      y1      z1      random       NA     0.61  1.15 NA     1.1  -0.84 NA     0.28 -0.45

Or, with rowwise:

df %>% 
  rowwise() %>% 
  mutate(across(x1:z3, ~ replace(.x, any(across(starts_with("which")) == cur_column()), NA)))

Or with if_any:

df %>% 
  mutate(across(x1:z3, \(x){
    cur <- cur_column()
    replace(x, if_any(starts_with("which"), \(y) y == cur), NA)
    }
    ))

CodePudding user response：

We can do it in one call to across() and use if_any() inside ifelse():

library(dplyr)
  
df %>% 
  mutate(across(x1:z3,
                \(x) {
                  this_col <- cur_column()
                  ifelse(
                    if_any(starts_with("which"), ~ .x == this_col),
                    NA,
                    x)
                  }))

#>   extracolumns which.x which.y which.z    x1    x2    x3    y1    y2    y3
#> 1       random      x1      y2      z3    NA  0.46  0.40  0.70    NA  0.43
#> 2       random      x2      y2      z3 -0.23    NA  0.11 -0.47    NA -0.30
#> 3       random      x3      y2      z3  1.56 -0.69    NA -1.07    NA  0.90
#> 4       random      x2      y1      z1  0.07    NA  1.79    NA  0.15  0.88
#> 5       random      x3      y1      z2  0.13  1.22    NA    NA -1.14  0.82
#> 6       random      x1      y1      z1    NA  0.36 -1.97    NA  1.25  0.69
#>      z1    z2    z3
#> 1  0.55 -1.27    NA
#> 2 -0.06  2.17    NA
#> 3 -0.31  1.21    NA
#> 4    NA -1.12 -0.03
#> 5 -0.69    NA -0.04
#> 6    NA -0.47  1.37

^{Created on 2022-09-27 by the reprex package (v2.0.1)}