I am trying to calculate the standard deviation of the mean of groups within a list of dataframes.
A small selection of my list of dataframes below
list(Fe = structure(list(Sample_No = c(80606, 80606, 80606, 80606,
80606, 80769, 80769, 80769, 80769, 80769, 80850, 80850, 80850,
80850, 80850, 80947, 80947, 80947, 80947, 80947, 81089, 81089,
81089, 81089, 81089, 81753, 81753, 81753, 81753, 81753, 80601,
80601, 80601, 80601, 80601, 81001, 81001, 81001, 81001, 81001,
81493, 81493, 81493, 81493, 81493, 81760, 81760, 81760, 81760,
81760), Rep_No = c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4,
5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5), Result = c(53.83,
53.89, 53.87, 53.95, 54.04, 54.11, 54, 53.98, 54.02, 53.99, 54.08,
54.04, 54.07, 54.14, 54.13, 54.19, 54.05, 54.04, 54.04, 54.07,
54.14, 54.07, 54.08, 54.14, 54.09, 54.12, 54.07, 54.04, 54.03,
54.12, 54.06, 54.17, 54.06, 54.09, 54.11, 54.13, 54.02, 54.22,
54.08, 54.04, 54.05, 54.06, 54.17, 54, 54.06, 54.05, 54.03, 54.09,
54.14, 54.02)), class = "data.frame", row.names = c(NA, -50L)),
SiO2 = structure(list(Sample_No = c(80606, 80606, 80606,
80606, 80606, 80769, 80769, 80769, 80769, 80769, 80850, 80850,
80850, 80850, 80850, 80947, 80947, 80947, 80947, 80947, 81089,
81089, 81089, 81089, 81089, 81753, 81753, 81753, 81753, 81753,
80601, 80601, 80601, 80601, 80601, 81001, 81001, 81001, 81001,
81001, 81493, 81493, 81493, 81493, 81493, 81760, 81760, 81760,
81760, 81760), Rep_No = c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1,
2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4,
5), Result = c(11.82, 11.76, 11.79, 11.71, 11.64, 11.63,
11.67, 11.71, 11.73, 11.78, 11.63, 11.62, 11.67, 11.66, 11.68,
11.65, 11.68, 11.68, 11.69, 11.65, 11.62, 11.61, 11.61, 11.57,
11.61, 11.63, 11.67, 11.67, 11.68, 11.6, 11.61, 11.57, 11.66,
11.64, 11.61, 11.58, 11.63, 11.53, 11.59, 11.64, 11.69, 11.68,
11.64, 11.72, 11.64, 11.68, 11.62, 11.69, 11.62, 11.69)), class = "data.frame", row.names = c(NA,
-50L)), Al2O3 = structure(list(Sample_No = c(80606, 80606,
80606, 80606, 80606, 80769, 80769, 80769, 80769, 80769, 80850,
80850, 80850, 80850, 80850, 80947, 80947, 80947, 80947, 80947,
81089, 81089, 81089, 81089, 81089, 81753, 81753, 81753, 81753,
81753, 80601, 80601, 80601, 80601, 80601, 81001, 81001, 81001,
81001, 81001, 81493, 81493, 81493, 81493, 81493, 81760, 81760,
81760, 81760, 81760), Rep_No = c(1, 2, 3, 4, 5, 1, 2, 3,
4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2,
3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1,
2, 3, 4, 5), Result = c(2.73, 2.74, 2.72, 2.71, 2.71, 2.7,
2.68, 2.71, 2.7, 2.69, 2.71, 2.7, 2.69, 2.71, 2.7, 2.68,
2.69, 2.69, 2.69, 2.68, 2.69, 2.69, 2.69, 2.69, 2.69, 2.69,
2.71, 2.7, 2.7, 2.7, 2.71, 2.69, 2.7, 2.72, 2.71, 2.71, 2.71,
2.68, 2.71, 2.69, 2.72, 2.71, 2.71, 2.71, 2.7, 2.7, 2.7,
2.7, 2.68, 2.71)), class = "data.frame", row.names = c(NA,
-50L)))
I have the following code that works on a single dataframe
df <- df_list[["Fe"]]
df_sd <- lapply(aggregate(df$Result, list(df$Sample_No), FUN=mean),sd)
which produces the desired outcome for a single dataframe
I have tried
df_list_sd <- lapply(aggregate(df_list$Result, list(df_list$Sample_No), FUN=mean),sd)
df_list_sd <- lapply(lapply(df_list, function(x) {aggregate(Result ~ Sample_No, data = x, mean) })),x sd)
But I get the following error message Error: unexpected ',' in "lapply(lapply(df_list, function(x) {aggregate(Result ~ Sample_No, data = x, mean) })),"
I am not quite sure how to fix the syntax/code
CodePudding user response:
We can use anonymous function
out <- lapply(df_list, function(dat)
aggregate(Result ~ Sample_No, FUN = mean, data = dat))
-output
out
$Fe
Sample_No Result
1 80601 54.098
2 80606 53.916
3 80769 54.020
4 80850 54.092
5 80947 54.078
6 81001 54.098
7 81089 54.104
8 81493 54.068
9 81753 54.076
10 81760 54.066
$SiO2
Sample_No Result
1 80601 11.618
2 80606 11.744
3 80769 11.704
4 80850 11.652
5 80947 11.670
6 81001 11.594
7 81089 11.604
8 81493 11.674
9 81753 11.650
10 81760 11.660
$Al2O3
Sample_No Result
1 80601 2.706
2 80606 2.722
3 80769 2.696
4 80850 2.702
5 80947 2.686
6 81001 2.700
7 81089 2.690
8 81493 2.710
9 81753 2.700
10 81760 2.698
To find the sd
of 'Result`
sapply(out, function(x) sd(x$Result))
Fe SiO2 Al2O3
0.05660624 0.04548260 0.01016530
Or using tidyverse
library(dplyr)
library(purrr)
map_dbl(df_list, ~ .x %>%
group_by(Sample_No) %>%
summarise(Result = mean(Result), .groups = 'drop') %>%
summarise(sd = sd(Result)) %>%
pull(sd))
Fe SiO2 Al2O3
0.05660624 0.04548260 0.01016530
CodePudding user response:
library(tidyverse)
df_list %>%
bind_rows(.id = 'group') %>%
group_by(group, Sample_No) %>%
summarise(res = mean(Result), .groups = 'drop_last') %>%
summarise(res = sd(res), .groups = 'drop')
# A tibble: 3 x 2
group res
<chr> <dbl>
1 Al2O3 0.0102
2 Fe 0.0566
3 SiO2 0.0455