I have several lists, each one containing many data frames. I would like to calculate the element-wise median across the elements of each data frame inside each list, i.e. the median between element [[1]][1,1]
of list1
, element [[1]][1,1]
of list2
, and element [[1]][1,1]
of list3
, and so on for all elements. The data frames have many columns each, but here is some sample data with only two columns:
set.seed(1)
list1 <- list(data.frame(a = sample.int(10, 4), b = sample.int(10, 4)),
data.frame(a = sample.int(10, 4), b = sample.int(10, 4)))
list2 <- list(data.frame(a = sample.int(10, 4), b = sample.int(10, 4)),
data.frame(a = sample.int(10, 4), b = sample.int(10, 4)))
list3 <- list(data.frame(a = sample.int(10, 4), b = sample.int(10, 4)),
data.frame(a = sample.int(10, 4), b = sample.int(10, 4)))
This is the expected result:
[[1]]
a b
7 4
9 9
7 3
4 6
[[2]]
a b
5 7
8 6
2 6
5 2
Any tips?
CodePudding user response:
Using purrr
:
library(purrr)
lsts <- list(list1,list2,list3)
map(transpose(lsts),~map_dfc(transpose(.), ~apply(list2DF(.x),1,median)))
[[1]]
# A tibble: 4 × 2
a b
<int> <int>
1 7 4
2 9 9
3 7 3
4 4 6
[[2]]
# A tibble: 4 × 2
a b
<int> <int>
1 5 7
2 8 6
3 2 6
4 5 2
In Base R, assuming they all have the same structure:
dims <- c(dim(list1[[1]]), length(list1), length(lsts))
d <- apply(array(unlist(lsts), dims), head(seq(dims),-1), median)
asplit(aperm(d, c(1,3,2)), 3)
[[1]]
[,1] [,2]
[1,] 7 4
[2,] 9 9
[3,] 7 3
[4,] 4 6
[[2]]
[,1] [,2]
[1,] 5 7
[2,] 8 6
[3,] 2 6
[4,] 5 2
CodePudding user response:
Here's another (base R) option:
Map(function(...) {
dots1 <- list(...)
out <- do.call(mapply, c(list(FUN=function(...) {
dots2 <- list(...)
apply(do.call(cbind, dots2), 1, median)
}), dots1))
data.frame(out)
}, list1, list2, list3)
# [[1]]
# a b
# 1 7 4
# 2 9 9
# 3 7 3
# 4 4 6
# [[2]]
# a b
# 1 5 7
# 2 8 6
# 3 2 6
# 4 5 2
Certainly not beautiful, but functional.
A related dplyr
option:
list(list1, list2, list3) |>
lapply(bind_rows, .id = "id1") |>
bind_rows(.id = "id2") |>
group_by(id1, id2) |>
mutate(rn = row_number()) |>
group_by(id1, rn) |>
summarize(across(c(a, b), ~ median(.))) |>
ungroup() |>
select(-rn) |>
group_nest(id1) |>
pull(data)
# [[1]]
# # A tibble: 4 × 2
# a b
# <int> <int>
# 1 7 4
# 2 9 9
# 3 7 3
# 4 4 6
# [[2]]
# # A tibble: 4 × 2
# a b
# <int> <int>
# 1 5 7
# 2 8 6
# 3 2 6
# 4 5 2
CodePudding user response:
Certainly not the most efficient solution, but one option with tidyverse
might be:
map_dfr(mget(ls(pattern = "list")),
function(list_of_lists) imap(list_of_lists,
function(lists, lists_id)
lists %>%
mutate(rowid = row_number(),
lists_id = lists_id))) %>%
group_by(rowid, lists_id) %>%
summarise(across(c(a, b), median))
rowid lists_id a b
<int> <int> <int> <int>
1 1 1 10 3
2 1 2 8 1
3 2 1 5 4
4 2 2 9 8
5 3 1 6 6
6 3 2 6 3
7 4 1 3 2
8 4 2 4 6
If the goal is to return a list:
map_dfr(mget(ls(pattern = "list")),
function(list_of_lists) imap(list_of_lists,
function(lists, lists_id)
lists %>%
mutate(rowid = row_number(),
lists_id = lists_id))) %>%
group_by(rowid, lists_id) %>%
summarise(across(c(a, b), median)) %>%
ungroup() %>%
group_split(lists_id)
[[1]]
# A tibble: 4 × 4
rowid lists_id a b
<int> <int> <int> <int>
1 1 1 10 3
2 2 1 5 4
3 3 1 6 6
4 4 1 3 2
[[2]]
# A tibble: 4 × 4
rowid lists_id a b
<int> <int> <int> <int>
1 1 2 8 1
2 2 2 9 8
3 3 2 6 3
4 4 2 4 6
CodePudding user response:
Here is a tidyverse
solution first draft (I am sure that it could be improved):
library(tidyverse)
bind_rows(list1, list2, list3) %>%
mutate(x =rep(1:3, each=8, length.out = n())) %>%
group_by(x) %>%
pivot_wider(names_from = x,
values_from = c(a,b),
values_fn = list) %>%
unnest() %>%
rowwise() %>%
transmute(a = median(c(a_1, a_2, a_3)),
b = median(c(b_1, b_2, b_3))
) %>%
ungroup() %>%
group_by(x = as.integer(gl(n(),4,n()))) %>%
group_split() %>%
map(.,~(.x %>%select(-x)))
[[1]]
# A tibble: 4 × 2
a b
<int> <int>
1 7 4
2 9 9
3 7 3
4 4 6
[[2]]
# A tibble: 4 × 2
a b
<int> <int>
1 5 7
2 8 6
3 2 6
4 5 2