Home > other >  Extracting elements from a nested list to a data.matrix
Extracting elements from a nested list to a data.matrix

Time:02-25

I have a list of 2 elements that each has 3 elements. In every 3 elements, there is a list of length 2 nested elements so that each includes a tibble with rows and columns. tibbles' dimension is fixed for rows but the columns are different from the third column.

Here is the structure:

my_list <- list(list(list(ENSG0000014 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000014", "ENSG0000014", 
"ENSG0000014"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0"), `1_43223126_C_T_b37` = c("0", "1", "0"), `1_43223317_T_C_b37` = c("1", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")), ENSG0000015 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000015", "ENSG0000015", 
"ENSG0000015"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame"))), list(ENSG0000014 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000014", "ENSG0000014", 
"ENSG0000014"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0"), `1_43223126_C_T_b37` = c("0", "1", "0"), `1_43223317_T_C_b37` = c("1", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")), ENSG0000015 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000015", "ENSG0000015", 
"ENSG0000015"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame"))), list(ENSG0000014 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000014", "ENSG0000014", 
"ENSG0000014"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0"), `1_43223126_C_T_b37` = c("0", "1", "0"), `1_43223317_T_C_b37` = c("1", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")), ENSG0000015 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000015", "ENSG0000015", 
"ENSG0000015"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")))), list(list(ENSG0000014 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000014", "ENSG0000014", 
"ENSG0000014"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0"), `1_43223126_C_T_b37` = c("0", "1", "0"), `1_43223317_T_C_b37` = c("1", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")), ENSG0000015 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000015", "ENSG0000015", 
"ENSG0000015"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame"))), list(ENSG0000014 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000014", "ENSG0000014", 
"ENSG0000014"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0"), `1_43223126_C_T_b37` = c("0", "1", "0"), `1_43223317_T_C_b37` = c("1", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")), ENSG0000015 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000015", "ENSG0000015", 
"ENSG0000015"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame"))), list(ENSG0000014 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000014", "ENSG0000014", 
"ENSG0000014"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0"), `1_43223126_C_T_b37` = c("0", "1", "0"), `1_43223317_T_C_b37` = c("1", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")), ENSG0000015 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000015", "ENSG0000015", 
"ENSG0000015"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")))))

For every 3 elements, I need first, extract those tibbles, except the first three columns, to create new data.matrix. Then automate this job presumably in a loop to get the same data.matrix for each of the 3 elements. So the output will be three matrices for each of the 2 main elements of the list.

I'd appreciate it if you can help.

CodePudding user response:

As it is nested, an option is rrapply

library(rrapply)
library(dplyr)
out <- rrapply(my_list, classes = "data.frame",
    f = function(x) x %>% 
        dplyr::select(-(1:3)), how = "list")

-output

out
[[1]]
[[1]][[1]]
[[1]][[1]]$ENSG0000014
# A tibble: 6 × 4
  `1_43222779_A_G_b37` `1_43222856_A_G_b37` `1_43223126_C_T_b37` `1_43223317_T_C_b37`
  <chr>                <chr>                <chr>                <chr>               
1 1                    0                    0                    1                   
2 1                    0                    1                    0                   
3 2                    0                    0                    0                   
4 1                    0                    0                    1                   
5 1                    0                    1                    0                   
6 2                    0                    0                    0                   

[[1]][[1]]$ENSG0000015
# A tibble: 6 × 2
  `1_43222779_A_G_b37` `1_43222856_A_G_b37`
  <chr>                <chr>               
1 1                    0                   
2 1                    0                   
3 2                    0                   
4 1                    0                   
5 1                    0                   
6 2                    0       
...
  • Related