I have a list of 15 tibbles. The tibbles are in either one of 3 formats with different numbers of columns. What I am trying to do is to create a loop over all of the tibbles that will change each tibble depending on its structure. Essentially, I have the names of each tibble in each group stored in a separate vector. I'm trying to use case_when to call when the name of the tibble is in the group vector.
I'm pretty new to R and coding in general so any help is appreciated!
library(tidyverse)
#Example of what my data looks like
df.1 <- tibble(x = 1:5, y = 1, z = 4:8)
df.2 <- tibble(x= 1:8, y = 2, z = 5, r = 7, d = 9)
df.3 <- tibble(x = 1:4, y = 3, z = 8, r = 2, d = 8)
df.4 <- tibble(x=1:3, y = 1, z = 0, r = 4)
df.list <- list(df.1, df.2, df.3, df.4)
names(df.list) <- c("a", "b", "c", "d")
#I have groups like these which I'm using to index by each Tibble's name
group1 <- c("a")
group2 <- c("b", "c")
group3 <- c("d")
#Here's the for loop I try to run to rearrange each tibble based on it's grouping
for(i in seq_along(df.list)){
df.list[[i]] <- case_when(
names(df.list[i])%in%group1 ~ df.list[[i]] %>%
dplyr::select(c(3, 2)) %>%
rename("yy" = "y", "zz" = "z"),
names(df.list[1])%in%group2 ~ df.list[[i]] %>%
dplyr::select(c(3,4)) %>%
rename("zz" = "z", "rr" = "r"),
names(df.list[i])%in%group3 ~ df.list[[i]] %>%
dplyr::select(c(1, 4, 3)) %>%
rename("zz"= "z")
)
}
From this, I expect each of my tibbles to have only the columns I selected for that group and have those columns renamed. I get an error like:
Error in `dplyr::select()`:
! Can't subset columns past the end.
ℹ Location 4 doesn't exist.
ℹ There are only 3 columns.
CodePudding user response:
case_when/ifelse/if_else
requires all the arguments to be of same length and type. Here, it is not the case. We may need if/else if/else
loop here
library(dplyr)
for(i in seq_along(df.list)) {
tmp <- df.list[[i]]
if(names(df.list)[i] %in% group1)
{
tmp <- tmp %>%
dplyr::select(c(3, 2)) %>%
rename("yy" = "y", "zz" = "z")
} else if(names(df.list)[i] %in% group2)
{
tmp <- tmp %>%
dplyr::select(c(3,4)) %>%
rename("zz" = "z", "rr" = "r")
} else if(names(df.list)[i] %in% group3)
{
tmp <- tmp %>%
dplyr::select(c(1, 4, 3)) %>%
rename("zz"= "z")
}
df.list[[i]] <- tmp
}
-output
> df.list
$a
# A tibble: 5 × 2
zz yy
<int> <dbl>
1 4 1
2 5 1
3 6 1
4 7 1
5 8 1
$b
# A tibble: 8 × 2
zz rr
<dbl> <dbl>
1 5 7
2 5 7
3 5 7
4 5 7
5 5 7
6 5 7
7 5 7
8 5 7
$c
# A tibble: 4 × 2
zz rr
<dbl> <dbl>
1 8 2
2 8 2
3 8 2
4 8 2
$d
# A tibble: 3 × 3
x r zz
<int> <dbl> <dbl>
1 1 4 0
2 2 4 0
3 3 4 0
It is also possible to do this with key/mapping dataset
library(tibble)
library(purrr)
key_dat <- tibble(col1 = c("a", "b", "c", "d"),
col2 = list(c(3, 2), c(3, 4), c(3, 4), c(1, 4, 3)),
col3 = list(c(yy = "y", zz = "z"), c(zz = "z", rr = "r"),
c(zz = "z", rr = "r"), c(zz = "z")))
imap(df.list, ~
{
ind <- match(.y, key_dat$col1)
.x %>%
dplyr::select(all_of(key_dat$col2[[ind]])) %>%
rename(!!! key_dat$col3[[ind]])
}
)
-output
$a
# A tibble: 5 × 2
zz yy
<int> <dbl>
1 4 1
2 5 1
3 6 1
4 7 1
5 8 1
$b
# A tibble: 8 × 2
zz rr
<dbl> <dbl>
1 5 7
2 5 7
3 5 7
4 5 7
5 5 7
6 5 7
7 5 7
8 5 7
$c
# A tibble: 4 × 2
zz rr
<dbl> <dbl>
1 8 2
2 8 2
3 8 2
4 8 2
$d
# A tibble: 3 × 3
x r zz
<int> <dbl> <dbl>
1 1 4 0
2 2 4 0
3 3 4 0
As the question is about case_when
, we may need to make sure that each of the expressions in case_when
can work for each of the list
elements. To do that, wrap with any_of
in select
as well as use rename_with
and any_of
for(i in seq_along(df.list)){
tmp <- df.list[[i]]
nm1 <- names(df.list)[i]
tmp1 <- case_when(
nm1 %in% group1 ~ list(
tmp %>%
dplyr::select(any_of(c(3, 2))) %>%
rename_with(~ strrep(.x, 2), any_of(c("y", "z"))))
,
nm1 %in% group2 ~ list(
tmp %>%
dplyr::select(any_of(c(3,4))) %>%
rename_with(~ strrep(.x, 2), any_of(c("z", "r")))),
nm1 %in% group3 ~ list(
tmp %>%
dplyr::select(any_of(c(1, 4, 3))) %>%
rename_with(~ strrep(.x, 2), any_of(c("z"))))
)
df.list[[i]] <- tmp1[[1]]
}
-output
> df.list
$a
# A tibble: 5 × 2
zz yy
<int> <dbl>
1 4 1
2 5 1
3 6 1
4 7 1
5 8 1
$b
# A tibble: 8 × 2
zz rr
<dbl> <dbl>
1 5 7
2 5 7
3 5 7
4 5 7
5 5 7
6 5 7
7 5 7
8 5 7
$c
# A tibble: 4 × 2
zz rr
<dbl> <dbl>
1 8 2
2 8 2
3 8 2
4 8 2
$d
# A tibble: 3 × 3
x r zz
<int> <dbl> <dbl>
1 1 4 0
2 2 4 0
3 3 4 0