In the example below, I select from a list of lists the unique cases of specific column. I think I have successfully done what I want but I would like to know if the datasets can be organised in a more tidy way (e.g., not using lists for organising the data like this) and use fully the tidyverse to achieve the same output (e.g., with purr or similar). As you can see I use partly the tidyverse but I'm interested in seeing if it's possible to avoid the for loop, the lapply and maybe not using lists to organise the data like this.
#Load libraries
library(tidyverse)
library(stringi)
#Generate datasets to create example
dat1 <- tibble(col1 = runif(10), col2 = stri_rand_strings(10, 5))
dat2 <- tibble(col1 = runif(10), col2 = stri_rand_strings(10, 5))
dat3 <- tibble(col1 = runif(10), col2 = stri_rand_strings(10, 5))
dat4 <- tibble(col1 = runif(10), col2 = stri_rand_strings(10, 5))
#Generate lists from the generated datasets (3 of them for example)
list1 <- list(id1 = dat1, id2 = dat2)
list2 <- list(id1 = dat3)
list3 <- list(id1 = dat4)
#Generate list of lists
final_list <- list(dataset1 = list1, dataset2 = list2, dataset3 = list3)
#Select unique cases from the list of lists
#First loop for each list (1, 2 and 3) and then within list select unique cases with lapply for col2
data <- NULL
for (i in final_list) {
single_cases <- bind_rows(lapply(i, function(x) x %>% select(col2) %>% distinct(col2)))
data <- rbind(data, single_cases)
}
CodePudding user response:
You could do:
data1 <- c(list1, list2, list3) %>%
setNames(paste0("id", seq_along(.))) %>%
bind_rows(.id = "id") %>%
distinct(id, col2) %>%
select(-id)
And the complete reproducible example showing that your data
and my data1
are identical:
library(dplyr)
library(stringi)
set.seed(123)
dat1 <- tibble(col1 = runif(10), col2 = stri_rand_strings(10, 5))
dat2 <- tibble(col1 = runif(10), col2 = stri_rand_strings(10, 5))
dat3 <- tibble(col1 = runif(10), col2 = stri_rand_strings(10, 5))
dat4 <- tibble(col1 = runif(10), col2 = stri_rand_strings(10, 5))
list1 <- list(id1 = dat1, id2 = dat2)
list2 <- list(id1 = dat3)
list3 <- list(id1 = dat4)
final_list <- list(dataset1 = list1, dataset2 = list2, dataset3 = list3)
data <- NULL
for (i in final_list) {
single_cases <- bind_rows(lapply(i, function(x) x %>% select(col2) %>% distinct(col2)))
data <- rbind(data, single_cases)
}
data1 <- c(list1, list2, list3) %>%
setNames(paste0("id", seq_along(.))) %>%
bind_rows(.id = "id") %>%
distinct(id, col2) %>%
select(-id)
identical(data, data1)
#> [1] TRUE