Apply same function to several data replicates in R-CodePudding

Consider the following data simulation mechanism:

set.seed(1)

simulW <- function(G)
{
# Let G be the number of groups
  n<-2*G  #Assume 2 individuals per group
i<-rep(1:G, rep(2,G))  # Group index 
  j<-rep (1:n)
  Y<-rbinom(n, 1, 0.5) #  binary 
data.frame(id=1:n, i,Y)
}

r<-5 #5 replicates
dat1 <- replicate(r,  simulW(G = 10 ), simplify=FALSE)
#For example the first data replicate will be 
> dat1[[1]]
   id  i Y
1   1  1 0
2   2  1 1
3   3  2 0
4   4  2 0
5   5  3 0
6   6  3 0
7   7  4 0
8   8  4 1
9   9  5 1
10 10  5 0

The code below can perform group wise (i is the group) sum of Y but by default considers only the first replicate i.e dat1[[1]].

Di<-aggregate( Y, by=list ( i ),FUN=sum) #Sum per group for the first dataset
e<-colSums(Di [ 2 ] ) #Total sum of Y for all groups for dataset 1 
> e
x 
8 
di<-Di [ 2  ] # Groupwise sum for replicate 1 
> di
  x
1 2
2 2
3 2
4 0
5 2

How can I use the same function to perform the group wise sum for the other replicates.

Maybe something like:

for (m in 1:r ) 
{
  Di[m]<-
    e[m]<-
  di[m]<-
}

CodePudding user response：

You may use aggregate in lapply -

result <- lapply(dat1, function(x) aggregate(Y~i, x, sum))
result

#[[1]]
#    i Y
#1   1 1
#2   2 1
#3   3 0
#4   4 0
#5   5 1
#6   6 1
#7   7 0
#8   8 2
#9   9 1
#10 10 1

#[[2]]
#    i Y
#1   1 2
#2   2 2
#3   3 2
#4   4 0
#5   5 2
#6   6 1
#7   7 0
#8   8 0
#9   9 1
#10 10 1
#...
#...

CodePudding user response：

We may use tidyverse

library(purrr)
library(dplyr)
map(dat1, ~ .x %>%
          group_by(i) %>%
          summarise(Y = sum(Y)))

-output

[[1]]
# A tibble: 10 × 2
       i     Y
   <int> <int>
 1     1     0
 2     2     2
 3     3     1
 4     4     2
 5     5     1
 6     6     0
 7     7     1
 8     8     1
 9     9     2
10    10     1

[[2]]
# A tibble: 10 × 2
       i     Y
   <int> <int>
 1     1     1
 2     2     1
 3     3     0
 4     4     0
 5     5     1
 6     6     1
 7     7     0
 8     8     2
 9     9     1
10    10     1

[[3]]
# A tibble: 10 × 2
       i     Y
   <int> <int>
 1     1     2
 2     2     2
 3     3     2
 4     4     0
 5     5     2
 6     6     1
 7     7     0
 8     8     0
 9     9     1
10    10     1

[[4]]
# A tibble: 10 × 2
       i     Y
   <int> <int>
 1     1     1
 2     2     0
 3     3     1
 4     4     1
 5     5     1
 6     6     1
 7     7     0
 8     8     1
 9     9     1
10    10     2

[[5]]
# A tibble: 10 × 2
       i     Y
   <int> <int>
 1     1     1
 2     2     0
 3     3     1
 4     4     1
 5     5     0
 6     6     0
 7     7     2
 8     8     2
 9     9     0
10    10     2