Home > Net >  Aggregate function to a list of dataframes R
Aggregate function to a list of dataframes R

Time:09-21

I am trying to calculate the standard deviation of the mean of groups within a list of dataframes.

A small selection of my list of dataframes below

list(Fe = structure(list(Sample_No = c(80606, 80606, 80606, 80606, 
80606, 80769, 80769, 80769, 80769, 80769, 80850, 80850, 80850, 
80850, 80850, 80947, 80947, 80947, 80947, 80947, 81089, 81089, 
81089, 81089, 81089, 81753, 81753, 81753, 81753, 81753, 80601, 
80601, 80601, 80601, 80601, 81001, 81001, 81001, 81001, 81001, 
81493, 81493, 81493, 81493, 81493, 81760, 81760, 81760, 81760, 
81760), Rep_No = c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 
5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 
1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5), Result = c(53.83, 
53.89, 53.87, 53.95, 54.04, 54.11, 54, 53.98, 54.02, 53.99, 54.08, 
54.04, 54.07, 54.14, 54.13, 54.19, 54.05, 54.04, 54.04, 54.07, 
54.14, 54.07, 54.08, 54.14, 54.09, 54.12, 54.07, 54.04, 54.03, 
54.12, 54.06, 54.17, 54.06, 54.09, 54.11, 54.13, 54.02, 54.22, 
54.08, 54.04, 54.05, 54.06, 54.17, 54, 54.06, 54.05, 54.03, 54.09, 
54.14, 54.02)), class = "data.frame", row.names = c(NA, -50L)), 
    SiO2 = structure(list(Sample_No = c(80606, 80606, 80606, 
    80606, 80606, 80769, 80769, 80769, 80769, 80769, 80850, 80850, 
    80850, 80850, 80850, 80947, 80947, 80947, 80947, 80947, 81089, 
    81089, 81089, 81089, 81089, 81753, 81753, 81753, 81753, 81753, 
    80601, 80601, 80601, 80601, 80601, 81001, 81001, 81001, 81001, 
    81001, 81493, 81493, 81493, 81493, 81493, 81760, 81760, 81760, 
    81760, 81760), Rep_No = c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 
    2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 
    1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 
    5), Result = c(11.82, 11.76, 11.79, 11.71, 11.64, 11.63, 
    11.67, 11.71, 11.73, 11.78, 11.63, 11.62, 11.67, 11.66, 11.68, 
    11.65, 11.68, 11.68, 11.69, 11.65, 11.62, 11.61, 11.61, 11.57, 
    11.61, 11.63, 11.67, 11.67, 11.68, 11.6, 11.61, 11.57, 11.66, 
    11.64, 11.61, 11.58, 11.63, 11.53, 11.59, 11.64, 11.69, 11.68, 
    11.64, 11.72, 11.64, 11.68, 11.62, 11.69, 11.62, 11.69)), class = "data.frame", row.names = c(NA, 
    -50L)), Al2O3 = structure(list(Sample_No = c(80606, 80606, 
    80606, 80606, 80606, 80769, 80769, 80769, 80769, 80769, 80850, 
    80850, 80850, 80850, 80850, 80947, 80947, 80947, 80947, 80947, 
    81089, 81089, 81089, 81089, 81089, 81753, 81753, 81753, 81753, 
    81753, 80601, 80601, 80601, 80601, 80601, 81001, 81001, 81001, 
    81001, 81001, 81493, 81493, 81493, 81493, 81493, 81760, 81760, 
    81760, 81760, 81760), Rep_No = c(1, 2, 3, 4, 5, 1, 2, 3, 
    4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 
    3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 
    2, 3, 4, 5), Result = c(2.73, 2.74, 2.72, 2.71, 2.71, 2.7, 
    2.68, 2.71, 2.7, 2.69, 2.71, 2.7, 2.69, 2.71, 2.7, 2.68, 
    2.69, 2.69, 2.69, 2.68, 2.69, 2.69, 2.69, 2.69, 2.69, 2.69, 
    2.71, 2.7, 2.7, 2.7, 2.71, 2.69, 2.7, 2.72, 2.71, 2.71, 2.71, 
    2.68, 2.71, 2.69, 2.72, 2.71, 2.71, 2.71, 2.7, 2.7, 2.7, 
    2.7, 2.68, 2.71)), class = "data.frame", row.names = c(NA, 
    -50L)))

I have the following code that works on a single dataframe

df <- df_list[["Fe"]] 
df_sd <- lapply(aggregate(df$Result, list(df$Sample_No), FUN=mean),sd)


which produces the desired outcome for a single dataframe

I have tried

df_list_sd <- lapply(aggregate(df_list$Result, list(df_list$Sample_No), FUN=mean),sd)

df_list_sd <- lapply(lapply(df_list, function(x) {aggregate(Result ~ Sample_No, data = x, mean) })),x sd)

But I get the following error message Error: unexpected ',' in "lapply(lapply(df_list, function(x) {aggregate(Result ~ Sample_No, data = x, mean) })),"

I am not quite sure how to fix the syntax/code

CodePudding user response:

We can use anonymous function

out <- lapply(df_list, function(dat) 
     aggregate(Result ~ Sample_No, FUN = mean, data = dat))

-output

out
$Fe
   Sample_No Result
1      80601 54.098
2      80606 53.916
3      80769 54.020
4      80850 54.092
5      80947 54.078
6      81001 54.098
7      81089 54.104
8      81493 54.068
9      81753 54.076
10     81760 54.066

$SiO2
   Sample_No Result
1      80601 11.618
2      80606 11.744
3      80769 11.704
4      80850 11.652
5      80947 11.670
6      81001 11.594
7      81089 11.604
8      81493 11.674
9      81753 11.650
10     81760 11.660

$Al2O3
   Sample_No Result
1      80601  2.706
2      80606  2.722
3      80769  2.696
4      80850  2.702
5      80947  2.686
6      81001  2.700
7      81089  2.690
8      81493  2.710
9      81753  2.700
10     81760  2.698

To find the sd of 'Result`

sapply(out, function(x) sd(x$Result))
   Fe       SiO2      Al2O3 
0.05660624 0.04548260 0.01016530 

Or using tidyverse

library(dplyr)
library(purrr)
 map_dbl(df_list, ~ .x %>%
         group_by(Sample_No) %>%
         summarise(Result = mean(Result), .groups = 'drop') %>% 
         summarise(sd = sd(Result)) %>% 
         pull(sd))
        Fe       SiO2      Al2O3 
0.05660624 0.04548260 0.01016530 

CodePudding user response:

library(tidyverse)

df_list %>%
  bind_rows(.id = 'group') %>%
  group_by(group, Sample_No) %>%
  summarise(res = mean(Result), .groups = 'drop_last') %>%
  summarise(res = sd(res), .groups = 'drop')

# A tibble: 3 x 2
  group    res
  <chr>  <dbl>
1 Al2O3 0.0102
2 Fe    0.0566
3 SiO2  0.0455
  • Related