Add a new column that specify multiple columns name that have value-CodePudding

I have a dataframe with multiple columns like the following.

df
ID var_1   var_2   var_3   var_4   var_5   
 A     0       1       0       1       0    
 C     0       0       2       0       1    
 D     0       1       0       2       0    
 E     2       0       1       0       0

How do I add a new column that specify the name of the columns with value more than zero?

Desired output would be something like this:

df
ID var_1   var_2   var_3   var_4   var_5      vars_with_value      
 A     0       1       0       1       0       var_2, var_4    
 C     0       0       2       1       1    var_3, var_4, var_5     
 D     0       1       0       0       0          var_2
 E     2       0       1       0       0       var_1, var_3

Any help would be much appreciated, thanks.

CodePudding user response：

You could use apply over rows, and paste the names:

df$vars_with_value <- apply(df[, -1], 1, function(x) paste0(names(df[, -1])[x > 0], collapse = " "))

Or by a similar logic using dplyr's rowwise:

library(dplyr)

df |>
  rowwise() |>
  mutate(vars_with_value = paste0(names(cur_data() |> select(-ID))[which(c_across(-ID) > 0)], collapse = " ")) |>
  ungroup()

Output:

# A tibble: 4 × 7
  ID    var_1 var_2 var_3 var_4 var_5 vars_with_value
  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>          
1 A         0     1     0     1     0 var_2 var_4    
2 C         0     0     2     0     1 var_3 var_5    
3 D         0     1     0     2     0 var_2 var_4    
4 E         2     0     1     0     0 var_1 var_3

CodePudding user response：

additional solution option

library(tidyverse)

df <- data.frame(
  stringsAsFactors = FALSE,
  ID = c("A", "C", "D", "E"),
  var_1 = c(0L, 0L, 0L, 2L),
  var_2 = c(1L, 0L, 1L, 0L),
  var_3 = c(0L, 2L, 0L, 1L),
  var_4 = c(1L, 0L, 2L, 0L),
  var_5 = c(0L, 1L, 0L, 0L)
)

df %>% 
  pivot_longer(-ID) %>% 
  filter(value != 0) %>% 
  group_by(ID) %>% 
  summarise(res = str_c(name, collapse = ", ")) %>% 
  right_join(df, by = "ID")
#> # A tibble: 4 x 7
#>   ID    res          var_1 var_2 var_3 var_4 var_5
#>   <chr> <chr>        <int> <int> <int> <int> <int>
#> 1 A     var_2, var_4     0     1     0     1     0
#> 2 C     var_3, var_5     0     0     2     0     1
#> 3 D     var_2, var_4     0     1     0     2     0
#> 4 E     var_1, var_3     2     0     1     0     0

^{Created on 2022-08-08 by the reprex package (v2.0.1)}