Home > database >  How to use case_when and for loop to reorganize my tibbles in a list?
How to use case_when and for loop to reorganize my tibbles in a list?

Time:01-29

I have a list of 15 tibbles. The tibbles are in either one of 3 formats with different numbers of columns. What I am trying to do is to create a loop over all of the tibbles that will change each tibble depending on its structure. Essentially, I have the names of each tibble in each group stored in a separate vector. I'm trying to use case_when to call when the name of the tibble is in the group vector.

I'm pretty new to R and coding in general so any help is appreciated!

library(tidyverse)

#Example of what my data looks like
df.1 <- tibble(x = 1:5, y = 1, z = 4:8)
df.2 <- tibble(x= 1:8, y = 2, z = 5, r = 7, d = 9)
df.3 <- tibble(x = 1:4, y = 3, z = 8, r = 2, d = 8)
df.4 <- tibble(x=1:3, y = 1, z = 0, r = 4)
df.list <- list(df.1, df.2, df.3, df.4)
names(df.list) <- c("a", "b", "c", "d")

#I have groups like these which I'm using to index by each Tibble's name
group1 <- c("a")
group2 <- c("b", "c")
group3 <- c("d")

#Here's the for loop I try to run to rearrange each tibble based on it's grouping

for(i in seq_along(df.list)){
  df.list[[i]] <- case_when(
    names(df.list[i])%in%group1 ~ df.list[[i]] %>% 
      dplyr::select(c(3, 2)) %>%
      rename("yy" = "y", "zz" = "z"),
    names(df.list[1])%in%group2 ~ df.list[[i]] %>% 
      dplyr::select(c(3,4)) %>%
      rename("zz" = "z", "rr" = "r"),
    names(df.list[i])%in%group3 ~ df.list[[i]] %>%
      dplyr::select(c(1, 4, 3)) %>%
      rename("zz"= "z")
  )
}

From this, I expect each of my tibbles to have only the columns I selected for that group and have those columns renamed. I get an error like:

Error in `dplyr::select()`:
! Can't subset columns past the end.
ℹ Location 4 doesn't exist.
ℹ There are only 3 columns.

CodePudding user response:

case_when/ifelse/if_else requires all the arguments to be of same length and type. Here, it is not the case. We may need if/else if/else loop here

library(dplyr)
for(i in seq_along(df.list)) {
  tmp <- df.list[[i]]
  if(names(df.list)[i] %in% group1)
    {
      tmp <- tmp %>%
         dplyr::select(c(3, 2)) %>%
         rename("yy" = "y", "zz" = "z")
    } else if(names(df.list)[i] %in% group2)
     {
       tmp <- tmp %>%
          dplyr::select(c(3,4)) %>%
          rename("zz" = "z", "rr" = "r") 
     
     } else if(names(df.list)[i] %in% group3)
        {
         tmp <- tmp %>%
             dplyr::select(c(1, 4, 3)) %>%
             rename("zz"= "z")
        
        }
      df.list[[i]] <- tmp




}

-output

> df.list
$a
# A tibble: 5 × 2
     zz    yy
  <int> <dbl>
1     4     1
2     5     1
3     6     1
4     7     1
5     8     1

$b
# A tibble: 8 × 2
     zz    rr
  <dbl> <dbl>
1     5     7
2     5     7
3     5     7
4     5     7
5     5     7
6     5     7
7     5     7
8     5     7

$c
# A tibble: 4 × 2
     zz    rr
  <dbl> <dbl>
1     8     2
2     8     2
3     8     2
4     8     2

$d
# A tibble: 3 × 3
      x     r    zz
  <int> <dbl> <dbl>
1     1     4     0
2     2     4     0
3     3     4     0

It is also possible to do this with key/mapping dataset

library(tibble)
library(purrr)
key_dat <- tibble(col1 = c("a", "b", "c", "d"), 
  col2 = list(c(3, 2), c(3, 4), c(3, 4), c(1, 4, 3)), 
    col3 = list(c(yy = "y", zz =  "z"), c(zz = "z", rr = "r"),
      c(zz = "z", rr = "r"), c(zz = "z")))
imap(df.list, ~ 
     {
     ind <- match(.y, key_dat$col1)
    .x %>%
   dplyr::select(all_of(key_dat$col2[[ind]])) %>% 
    rename(!!! key_dat$col3[[ind]])
   }
)

-output

$a
# A tibble: 5 × 2
     zz    yy
  <int> <dbl>
1     4     1
2     5     1
3     6     1
4     7     1
5     8     1

$b
# A tibble: 8 × 2
     zz    rr
  <dbl> <dbl>
1     5     7
2     5     7
3     5     7
4     5     7
5     5     7
6     5     7
7     5     7
8     5     7

$c
# A tibble: 4 × 2
     zz    rr
  <dbl> <dbl>
1     8     2
2     8     2
3     8     2
4     8     2

$d
# A tibble: 3 × 3
      x     r    zz
  <int> <dbl> <dbl>
1     1     4     0
2     2     4     0
3     3     4     0

As the question is about case_when, we may need to make sure that each of the expressions in case_when can work for each of the list elements. To do that, wrap with any_of in select as well as use rename_with and any_of

for(i in seq_along(df.list)){
  tmp <- df.list[[i]]
  nm1 <- names(df.list)[i]
  
  tmp1 <- case_when(
    nm1 %in% group1 ~ list(
    tmp %>% 
      dplyr::select(any_of(c(3, 2))) %>%
      rename_with(~ strrep(.x, 2), any_of(c("y", "z"))))      
      ,
      nm1 %in% group2 ~ list(
       tmp %>% 
        dplyr::select(any_of(c(3,4))) %>%
        rename_with(~ strrep(.x, 2), any_of(c("z", "r")))),   
        nm1 %in% group3 ~ list(
        tmp %>% 
           dplyr::select(any_of(c(1, 4, 3))) %>%
           rename_with(~ strrep(.x, 2), any_of(c("z"))))
           
        )
  
 
  
  df.list[[i]] <- tmp1[[1]]
}

-output

> df.list
$a
# A tibble: 5 × 2
     zz    yy
  <int> <dbl>
1     4     1
2     5     1
3     6     1
4     7     1
5     8     1

$b
# A tibble: 8 × 2
     zz    rr
  <dbl> <dbl>
1     5     7
2     5     7
3     5     7
4     5     7
5     5     7
6     5     7
7     5     7
8     5     7

$c
# A tibble: 4 × 2
     zz    rr
  <dbl> <dbl>
1     8     2
2     8     2
3     8     2
4     8     2

$d
# A tibble: 3 × 3
      x     r    zz
  <int> <dbl> <dbl>
1     1     4     0
2     2     4     0
3     3     4     0
  • Related