Consider the following data simulation mechanism:
set.seed(1)
simulW <- function(G)
{
# Let G be the number of groups
n<-2*G #Assume 2 individuals per group
i<-rep(1:G, rep(2,G)) # Group index
j<-rep (1:n)
Y<-rbinom(n, 1, 0.5) # binary
data.frame(id=1:n, i,Y)
}
r<-5 #5 replicates
dat1 <- replicate(r, simulW(G = 10 ), simplify=FALSE)
#For example the first data replicate will be
> dat1[[1]]
id i Y
1 1 1 0
2 2 1 1
3 3 2 0
4 4 2 0
5 5 3 0
6 6 3 0
7 7 4 0
8 8 4 1
9 9 5 1
10 10 5 0
The code below can perform group wise (i is the group) sum of Y but by default considers only the first replicate i.e dat1[[1]].
Di<-aggregate( Y, by=list ( i ),FUN=sum) #Sum per group for the first dataset
e<-colSums(Di [ 2 ] ) #Total sum of Y for all groups for dataset 1
> e
x
8
di<-Di [ 2 ] # Groupwise sum for replicate 1
> di
x
1 2
2 2
3 2
4 0
5 2
How can I use the same function to perform the group wise sum for the other replicates.
Maybe something like:
for (m in 1:r )
{
Di[m]<-
e[m]<-
di[m]<-
}
CodePudding user response:
You may use aggregate
in lapply
-
result <- lapply(dat1, function(x) aggregate(Y~i, x, sum))
result
#[[1]]
# i Y
#1 1 1
#2 2 1
#3 3 0
#4 4 0
#5 5 1
#6 6 1
#7 7 0
#8 8 2
#9 9 1
#10 10 1
#[[2]]
# i Y
#1 1 2
#2 2 2
#3 3 2
#4 4 0
#5 5 2
#6 6 1
#7 7 0
#8 8 0
#9 9 1
#10 10 1
#...
#...
CodePudding user response:
We may use tidyverse
library(purrr)
library(dplyr)
map(dat1, ~ .x %>%
group_by(i) %>%
summarise(Y = sum(Y)))
-output
[[1]]
# A tibble: 10 × 2
i Y
<int> <int>
1 1 0
2 2 2
3 3 1
4 4 2
5 5 1
6 6 0
7 7 1
8 8 1
9 9 2
10 10 1
[[2]]
# A tibble: 10 × 2
i Y
<int> <int>
1 1 1
2 2 1
3 3 0
4 4 0
5 5 1
6 6 1
7 7 0
8 8 2
9 9 1
10 10 1
[[3]]
# A tibble: 10 × 2
i Y
<int> <int>
1 1 2
2 2 2
3 3 2
4 4 0
5 5 2
6 6 1
7 7 0
8 8 0
9 9 1
10 10 1
[[4]]
# A tibble: 10 × 2
i Y
<int> <int>
1 1 1
2 2 0
3 3 1
4 4 1
5 5 1
6 6 1
7 7 0
8 8 1
9 9 1
10 10 2
[[5]]
# A tibble: 10 × 2
i Y
<int> <int>
1 1 1
2 2 0
3 3 1
4 4 1
5 5 0
6 6 0
7 7 2
8 8 2
9 9 0
10 10 2