Subsetting a list with lapply-CodePudding

I have a list of lists. The big list is composed by list A, B, C and each of these letters is composed of a list with two dataframes. See below:

ListAll = list(list(data = data.frame( a = rnorm(10), b = rnorm(10), c = rnorm(10)), 
          char = data.frame(L3 = sample(letters, 10), Color = rep (c("red", "blue"), 5))),
     list(data = data.frame( a = rnorm(10), b = rnorm(10), c = rnorm(10)),
          char = data.frame(L3 = sample(letters, 10), Color = rep (c("blue", "red"), 5))),
     list(data = data.frame( a = rnorm(10), b = rnorm(10), c = rnorm(10)),
          char = data.frame(L3 = sample(letters, 10), Color = rep (c("red", "blue"), 5))))
names(ListAll) = c("A", "B", "C")

> ListAll
$A
$A$data
             a          b          c
1  -2.53712026 -0.6694815  2.0284354
2   0.82611084  1.4536211  2.1929939
3   0.17368971  0.2681694 -0.7438975
4  -0.08234772 -0.9999586 -0.2801080
5  -4.02607175 -0.7535428 -0.3388808
6  -1.63180889 -0.1910306  1.0356928
7  -1.33313693 -0.9250792  0.5803261
8   0.88087036 -0.3548668 -0.3731106
9  -0.77262301  0.7583444 -1.9437539
10  0.06084087 -1.0525777 -0.2681174

$A$char
   L3 Color
1   l   red
2   g  blue
3   m   red
4   h  blue
5   q   red
6   d  blue
7   a   red
8   s  blue
9   o   red
10  t  blue


$B
$B$data
             a           b           c
1  -0.36633900 -0.47905641 -0.78860596
2  -0.37072977  1.96669745 -0.36168884
3   0.07639764 -1.82932608  0.65566462
4   0.29858227  0.09976703 -0.11711302
5   0.77847744  0.27128421  0.75654492
6   0.89522882  1.28713178 -0.96608108
7   0.11443759 -0.99962094 -0.11501575
8  -0.11920869  0.43800235 -1.09136876
9  -0.58772063 -1.94333868 -1.72354096
10 -2.33637437 -0.52737344  0.02056622
$B$char
   L3 Color
1   k  blue
2   d   red
3   s  blue
4   z   red
5   x  blue
6   u   red
7   p  blue
8   t   red
9   l  blue
10  v   red


$C
$C$data
            a           b           c
1  -0.6366169  0.06446953 -0.02150794
2  -3.3466860 -0.87783521  0.46069701
3  -0.1865610 -0.69112686  0.09725638
4  -0.3280284 -0.59064344  0.35889595
5   0.3922027 -0.04336874  0.97091651
6  -0.7290331 -1.60534590  0.11241533
7   1.3195515  0.18069203 -0.32894485
8   0.6107493 -0.03220736 -2.40375652
9   0.9425496  2.32873530  0.77154078
10 -0.4656241 -0.35425911  0.53269487

$C$char
   L3 Color
1   o  red
2   x  blue
3   g  red
4   k  blue
5   a  red
6   p  blue
7   h  red
8   w  blue
9   n  red
10  s  blue
11  o  red
12  x  blue
13  g  red
14  k  blue
15  a  red
16  p  blue
17  h  red
18  w  blue
19  n  red
20  s  blue

From this list, I would like to have the dataframe data and only the column 2 (Color) of the dataframe char of A, B and C, then have them in a dataframe, like this:

  .id            a           b           c      Color
1    A -0.269213166  1.25959696  0.60815096      red
2    A -0.673893796  1.20581201  1.34714525      blue  
3    A  0.241470346 -1.52928110  0.26428783      red
4    A  1.208567627 -1.07829166  1.80748543      blue
5    A -0.007692734 -0.86438548 -1.64096665      red
6    A -2.017163360 -0.48675333 -1.67662652      blue   
7    A -0.633148300 -2.18180234 -0.04390119      red
8    A  1.463925875 -0.73571593  0.42213125      blue
9    A  0.003649150 -1.65524103 -0.98250787      red
10   A  1.138650189  0.84001979 -0.94617824      blue
11   B -0.251018082  1.01930668  0.54220304      blue
12   B -0.095962972  0.62334767  0.11791738      red 
13   B -0.696342795 -1.56159037 -0.26576622      blue  
14   B  0.870936972 -0.01171982 -0.39033396      red
15   B  0.187987981 -0.17495084 -1.35257925      blue
16   B -0.346946647 -0.43859714 -1.11467490      red 
17   B -1.042017149 -0.78778432  0.81739490      blue
18   B -2.174102096 -0.47316699 -2.30049184      red 
19   B  0.722957032 -0.74404910  1.46612141      blue 
20   B  0.491792528  0.49700033 -0.72193117      red
21   C -0.016289926  0.07519843  0.71536429      red 
22   C -0.234032445 -1.36138903  0.15231631      blue 
23   C  1.192632593 -0.96278902  1.60867489      red
24   C -0.037066726  1.32362311  0.32692901      blue
25   C -0.600207546 -1.27953645  1.07315190      red  
26   C -1.153873839 -1.15354816 -0.99859212      blue 
27   C  0.217616268  0.48432038  0.05511501      red
28   C  0.817652920 -0.69406053 -0.24095757      blue
29   C  0.645816458  1.44073689  0.92930204      red 
30   C  1.880040619  0.62055981  0.43319005      blue

I tried:

L = lapply(ListAll, function(x) x$data|x$char[2])

L = lapply(ListAll, function(x) x[[1]]|x[[2]][2])

and to the dataframe, I would run:

ldply(L, .fun=identity)

but the lapply only works to one element per time

lapply(ListAll function(x) x$data

lapply(ListAll, function(x) x$char[2]

How can I subset my list to have specific what I need?

CodePudding user response：

Here is a base R way.
First loop with lapply through the list and extract and cbindthe wanted data and columns. Then, a second lapply loop creates the .id column. The lists are rbind'ed together with do.call.

set.seed(2022)
ListAll = list(list(data = data.frame( a = rnorm(10), b = rnorm(10), c = rnorm(10)), 
                    char = data.frame(L3 = sample(letters, 10), Color = rep (c("red", "blue"), 5))),
               list(data = data.frame( a = rnorm(10), b = rnorm(10), c = rnorm(10)),
                    char = data.frame(L3 = sample(letters, 10), Color = rep (c("blue", "red"), 5))),
               list(data = data.frame( a = rnorm(10), b = rnorm(10), c = rnorm(10)),
                    char = data.frame(L3 = sample(letters, 10), Color = rep (c("red", "blue"), 5))))
names(ListAll) = c("A", "B", "C")


L <- lapply(ListAll, \(x) cbind(x$data, Color = x$char[2]))
L <- do.call(rbind, lapply(seq_along(L), \(i) {
  L[[i]]$`.id` <- names(L)[i]
  L[[i]][c(5, 1:4)]
}))

L
#>    .id           a           b           c Color
#> 1    A  0.90014199  1.00618570  0.36446078   red
#> 2    A -1.17334577 -0.18514603  0.38365100  blue
#> 3    A -0.89748536 -0.98182671  1.11340572   red
#> 4    A -1.44450140  0.09290795  1.21150979  blue
#> 5    A -0.33101358 -0.05278440 -0.34832546   red
#> 6    A -2.90062899 -0.08032790 -0.85955345  blue
#> 7    A -1.05925573 -0.65410367  0.65002719   red
#> 8    A  0.27795474 -0.95068351  0.32805913  blue
#> 9    A  0.74948592  1.01956176 -0.51794657   red
#> 10   A  0.24158254  0.85904641 -0.23898215  blue
#> 11   B -0.16872026  0.36817344  0.41592754  blue
#> 12   B -0.26903769  1.69319002 -1.33844239   red
#> 13   B  0.80776842  0.99583702 -1.29197474  blue
#> 14   B -1.12471724  0.18675214 -0.30907421   red
#> 15   B -1.43078802  1.23833740  0.15651209  blue
#> 16   B  0.06035668  0.30937330 -0.83391675   red
#> 17   B -0.79298250  0.63571793 -0.02454928  blue
#> 18   B  0.34027593  0.02318327 -1.13735155   red
#> 19   B -0.25946873  1.17786360  1.07205425  blue
#> 20   B -1.30484865 -0.45354660  2.31449858   red
#> 21   C  1.29847651 -0.38394914  0.51793514   red
#> 22   C  1.35567805 -0.97585695  0.71217646  blue
#> 23   C  1.35203582 -0.78390718  1.58281914   red
#> 24   C -1.62961506 -1.09959065 -0.41238653  blue
#> 25   C  0.61440940 -1.37465043  0.85369834   red
#> 26   C  0.99671740 -0.45036800 -1.17205398  blue
#> 27   C -0.88329764 -0.02765994 -0.30639866   red
#> 28   C  0.42093350  0.06869818  0.53687936  blue
#> 29   C  2.16138816  0.30745958 -0.29881872   red
#> 30   C -1.22360972  0.24673189 -0.75251562  blue

^{Created on 2022-05-04 by the reprex package (v2.0.1)}

CodePudding user response：

If I interpret the OP correctly I assume that he wants two seperate appended data.frames, one for the data and one for the colors in char.

method 1

Apply two times over your nested list and rbind the results.

data <- do.call(rbind, lapply(ListAll, function(x) x$data))
char <- do.call(rbind, lapply(ListAll, function(x) x$char[2]))

method 2

You can also unlist but note the recursive = FALSE! We then just need the odd ones to get data, and the even ones to get char. Note this might be faster, but can become risky if you do not always have this strict pairs.

t <- unlist(ListAll, recursive = FALSE)
data <- do.call(rbind, t[c(1:length(t))[c(T,F)]])
char <- do.call(rbind, t[c(1:length(t))[c(F,T)]])
char[, 1] <- NULL