Home > Enterprise >  Replace values ones for row ids in multiple columns with dplyr case_when
Replace values ones for row ids in multiple columns with dplyr case_when

Time:09-17

I have the following dataframe:

have <- structure(list(a1 = c(1, 1, 0, 1, 1, 1, 1, 1, 1, 1), a2 = c(1, 
1, 0, 1, 1, 0, 1, 1, 1, 1), b1 = c(0, 0, 0, 0, 0, 1, 0, 0, 0, 
0), b2 = c(1, 1, 0, 0, 0, 1, 0, 0, 0, 0), c1 = c(0, 0, 0, 0, 
1, 0, 0, 0, 0, 0), c2 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), id = 1:10), 
row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"))

For variables a1:c2, whenever value is 1, I need to replace it for the id value (of the same row). Whenever value is zero, I need to replace it for missing (NA_real).

Preferably with dplyr and case_when, that I am more used to.

So, the expected output is:

want <- structure(list(a1 = c(1, 2, NA, 4, 5, 6, 7, 8, 9, 10), a2 = c(1, 
2, NA, 4, 5, NA, 7, 8, 9, 10), b1 = c(NA, NA, NA, NA, NA, 6, NA, NA, NA, 
NA), b2 = c(1, 2, NA, NA, NA, 6, NA, NA, NA, NA), c1 = c(NA, NA, NA, NA, 
5, NA, NA, NA, NA, NA), c2 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), id = 1:10), 
row.names = c(NA,-10L), class = c("tbl_df", "tbl", "data.frame"))

What I have tried is:

library(dplyr)

want <- have %>%
   mutate(across(c(a1:c2), 
         .fns = ~ case_when(. == 1 ~ id,
                            T ~ NA_real_))
   )

But it doesnt work, I get the error:

Error in `mutate()`:
! Problem while computing `..1 = across(c(a1:c2), .fns = ~case_when(. == 1 ~ id,
  T ~ NA_real_))`.
Caused by error in `across()`:
! Problem while computing column `a1`.
Caused by error in `` names(message) <- `*vtmp*` ``:
! 'names' attribute [1] must be the same length as the vector [0]
Backtrace:
  1. have %>% ...
  8. dplyr::case_when(a1 == 1 ~ id, T ~ NA_real_)
  9. dplyr:::replace_with(...)
 10. dplyr:::check_type(val, x, name, error_call = error_call)
 11. rlang::abort(msg, call = error_call)
     ...
 16. rlang::cnd_message(c)
 17. rlang:::cnd_message_format(cnd, ...)
 18. rlang (local) cli_format(glue_escape(lines))
 19. rlang:::.rlang_cli_format(x, cli::format_error)
 20. cli::cli_format(x, .envir = emptyenv())

CodePudding user response:

Could be an issue with the data types; I can reproduce the error using dplyr_1.0.9 on macOS with NA_real_ and 'fix it' using NA_integer_, i.e.

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
have <- structure(list(a1 = c(1, 1, 0, 1, 1, 1, 1, 1, 1, 1),
                       a2 = c(1, 1, 0, 1, 1, 0, 1, 1, 1, 1), 
                       b1 = c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0), 
                       b2 = c(1, 1, 0, 0, 0, 1, 0, 0, 0, 0),
                       c1 = c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0), 
                       c2 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                       id = 1:10),
                  row.names = c(NA, -10L), 
                  class = c("tbl_df", "tbl", "data.frame"))

want <- structure(list(a1 = c(1, 2, NA, 4, 5, 6, 7, 8, 9, 10),
                       a2 = c(1, 2, NA, 4, 5, NA, 7, 8, 9, 10),
                       b1 = c(NA, NA, NA, NA, NA, 6, NA, NA, NA, NA), 
                       b2 = c(1, 2, NA, NA, NA, 6, NA, NA, NA, NA), 
                       c1 = c(NA, NA, NA, NA, 5, NA, NA, NA, NA, NA),
                       c2 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), id = 1:10), 
                  row.names = c(NA,-10L), class = c("tbl_df", "tbl", "data.frame"))
want
#> # A tibble: 10 × 7
#>       a1    a2    b1    b2    c1 c2       id
#>    <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <int>
#>  1     1     1    NA     1    NA NA        1
#>  2     2     2    NA     2    NA NA        2
#>  3    NA    NA    NA    NA    NA NA        3
#>  4     4     4    NA    NA    NA NA        4
#>  5     5     5    NA    NA     5 NA        5
#>  6     6    NA     6     6    NA NA        6
#>  7     7     7    NA    NA    NA NA        7
#>  8     8     8    NA    NA    NA NA        8
#>  9     9     9    NA    NA    NA NA        9
#> 10    10    10    NA    NA    NA NA       10

have %>%
  mutate(across(c(a1:c2), 
                .fns = ~ case_when(. == 1 ~ id,
                                   T ~ NA_real_))
  )
#> Error in `mutate()`:
#> ! Problem while computing `..1 = across(c(a1:c2), .fns = ~case_when(. ==
#>   1 ~ id, T ~ NA_real_))`.
#> Caused by error in `across()`:
#> ! Problem while computing column `a1`.
#> Caused by error in `` names(message) <- `*vtmp*` ``:
#> ! 'names' attribute [1] must be the same length as the vector [0]

have %>%
  mutate(across(c(a1:c2), 
                .fns = ~ case_when(. == 1 ~ id,
                                   T ~ NA_integer_))
  )
#> # A tibble: 10 × 7
#>       a1    a2    b1    b2    c1    c2    id
#>    <int> <int> <int> <int> <int> <int> <int>
#>  1     1     1    NA     1    NA    NA     1
#>  2     2     2    NA     2    NA    NA     2
#>  3    NA    NA    NA    NA    NA    NA     3
#>  4     4     4    NA    NA    NA    NA     4
#>  5     5     5    NA    NA     5    NA     5
#>  6     6    NA     6     6    NA    NA     6
#>  7     7     7    NA    NA    NA    NA     7
#>  8     8     8    NA    NA    NA    NA     8
#>  9     9     9    NA    NA    NA    NA     9
#> 10    10    10    NA    NA    NA    NA    10


sessionInfo()
#> R version 4.1.3 (2022-03-10)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Big Sur/Monterey 10.16
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] dplyr_1.0.9
#> 
#> loaded via a namespace (and not attached):
#>  [1] pillar_1.8.1      compiler_4.1.3    highr_0.9         R.methodsS3_1.8.2
#>  [5] R.utils_2.12.0    tools_4.1.3       digest_0.6.29     evaluate_0.15    
#>  [9] lifecycle_1.0.1   tibble_3.1.8      R.cache_0.16.0    pkgconfig_2.0.3  
#> [13] rlang_1.0.5       reprex_2.0.1      cli_3.3.0         DBI_1.1.3        
#> [17] rstudioapi_0.13   yaml_2.3.5        xfun_0.31         fastmap_1.1.0    
#> [21] withr_2.5.0       styler_1.7.0      stringr_1.4.0     knitr_1.39       
#> [25] generics_0.1.3    fs_1.5.2          vctrs_0.4.1       tidyselect_1.1.2 
#> [29] glue_1.6.2        R6_2.5.1          fansi_1.0.3       rmarkdown_2.14   
#> [33] purrr_0.3.4       magrittr_2.0.3    ellipsis_0.3.2    htmltools_0.5.3  
#> [37] assertthat_0.2.1  utf8_1.2.2        stringi_1.7.8     R.oo_1.25.0

Created on 2022-09-16 by the reprex package (v2.0.1)

  • Related