There are two long nested lists that have the same element names. I want to find for each elements, the number of common sub elements between two lists. The desired output will be a dataframe with two columns where the first column is the list's names (ENSGs) and the second column is the number of sub elements (chrXX_XXXX_X_X_b38) in common between two lists.
list1:
list1 <- list(ENSG00000040608 = structure(list(chr22_20230714_G_A_b38 = 0.0000953181301665087,
chr22_20230737_G_A_b38 = -0.00124036704551427,
chr22_20231229_T_A_b38 = 0.000808061558738542,
chr22_20231474_G_A_b38 = 0.000387528601423933,
chr22_20231667_C_G_b38 = -0.000120624028990859),
row.names = c(NA, -1L),
class = c("tbl_df", "tbl", "data.frame")),
ENSG00000020766 = structure(list(chr22_47157062_G_A_b38 = 0.00000909931572319958,
chr22_47157212_G_A_b38 = -0.000124084106569373,
chr22_47157394_C_G_b38 = -0.0000752774417069946,
chr22_47157559_G_A_b38 = 0.0000808446315377557,
chr22_47157607_T_C_b38 = 0.000237979025556899),
row.names = c(NA, -1L),
class = c("tbl_df", "tbl", "data.frame")))
list2:
list2 <- list(ENSG00000040608 = structure(list(chr22_20230714_G_A_b38 = c(1L, 1L, 1L, 2L, 1L),
chr22_20230737_G_A_b38 = c(0L, 0L, 0L, 0L, 0L),
chr22_20231229_T_A_b38 = c(1L, 0L, 1L, 0L, 1L),
chr22_20231555_A_T_b38 = c(0L,1L, 0L, 0L, 0L),
chr22_20231667_C_G_b38 = c(1L, 1L, 1L, 2L, 1L)),
row.names = c(NA, -5L),
class = c("tbl_df", "tbl", "data.frame")),
ENSG00000020766 = structure(list(chr22_47157062_G_A_b38 = c(0L,1L, 0L, 0L, 0L),
chr22_47157212_G_A_b38 = c(0L, 0L, 1L, 1L, 2L),
chr22_47157394_C_G_b38 = c(0L, 1L, 1L, 1L, 2L),
chr22_47207559_T_C_b38 = c(0L, 1L, 0L, 0L, 0L)),
row.names = c(NA, -4L),
class = c("tbl_df", "tbl", "data.frame")))
CodePudding user response:
We may use map2
to loop over the corresponding elements, get the length
of intersect
ing column names into a tibble
while creating the ENSG
column with .id
library(purrr)
library)(tibble)
map2_dfr(list1, list2,
~ tibble(common_elements = length(intersect(names(.x),
names(.y)))), .id = 'ENSG')
-output
# A tibble: 2 × 2
ENSG common_elements
<chr> <int>
1 ENSG00000040608 4
2 ENSG00000020766 3