R: Randomly shuffle the contents of all columns that have a specific column name-CodePudding

In below R dataframe I would like to randomly shuffle the contents of all columns whose names start with 'b' (while preserving column names and position):

> df1
  a1 a2 b1 b2
1  1  4 50 55
2  1  4 60 65
3  0  4 70 75
4  0  4 80 85

So one result may be:

> df1
  a1 a2 b1 b2
1  1  4 55 50
2  1  4 60 65
3  0  4 75 70
4  0  4 85 80

CodePudding user response：

Does this work:

cbind(df[!grepl('b',colnames(df))], sapply(df[grep('b',colnames(df))], function(x) sample(x, length(x))))
  a1 a2 b1 b2
1  1  4 80 55
2  1  4 70 85
3  0  4 50 75
4  0  4 60 65

CodePudding user response：

Tidyverse solution

library(tidyverse)

df_example <- read_table('a1 a2 b1 b2
 1  4 50 55
  1  4 60 65
  0  4 70 75
 0  4 80 85')

df_example |> 
  mutate(across(starts_with('b'),.fns = ~ sample(.x,length(.x))))
#> # A tibble: 4 x 4
#>      a1    a2    b1    b2
#>   <dbl> <dbl> <dbl> <dbl>
#> 1     1     4    60    65
#> 2     1     4    50    75
#> 3     0     4    80    85
#> 4     0     4    70    55

^{Created on 2021-12-10 by the reprex package (v2.0.1)}

CodePudding user response：

df1 <- data.frame(a1 = c(1, 1, 0, 0), 
              a2 = rep(4, 4), 
              b1 = c(50, 60, 70, 80), 
              b2 = c(55, 65, 75, 85))

col_names <- names(df1)

df1 <- cbind(df1[, grep('a', names(df1))], 
             t(apply(df1[, grep('b', names(df1))], 1, function(x) {
               x <- rev(x)
               return(x)
               }))
)

names(df1) <- col_names

  a1 a2 b1 b2
1  1  4 55 50
2  1  4 65 60
3  0  4 75 70
4  0  4 85 80

CodePudding user response：

Yet another solution:

library(tidyverse)

df <- data.frame(
  a1 = c(1L, 1L, 0L, 0L),
  a2 = c(4L, 4L, 4L, 4L),
  b1 = c(50L, 60L, 70L, 80L),
  b2 = c(55L, 65L, 75L, 85L)
)

set.seed(143)

df %>% 
  rowwise %>% 
  mutate(b = across(starts_with("b")) %>% list %>% 
           map(~ unlist(.x, use.names = F) %>% sample)) %>% 
  select(!matches("b\\d ")) %>% 
  unnest_wider(b, names_sep = "")

#> # A tibble: 4 × 4
#>      a1    a2    b1    b2
#>   <int> <int> <int> <int>
#> 1     1     4    55    50
#> 2     1     4    65    60
#> 3     0     4    70    75
#> 4     0     4    85    80