Home > other >  tidyr summarize and mutate by multiple groups - calculation
tidyr summarize and mutate by multiple groups - calculation

Time:06-17

I have some data, see subset below. For each Method I want to calculate the difference in the mean Cq between the 2uL and 4 uL and the 4 uL and the 8uL.

I have a function to calculate the mean for each Method, grouped by Volume. But I can't figure out how to add another column with the difference. I think I might have to summarize the summ table, but I"m getting confused. Any help appreciated. Thanks

dat_summ<-
  dat %>%
  group_by(Volume,Method) %>%
  summarise(mean_Cq = mean(Cq,na.rm=T), sd_Cq=sd(Cq,na.rm=T),
            CV=(sd(Cq,na.rm=T)/mean(Cq,na.rm=T))*100)

what I want but know if wrong:

dat_summ<-
  dat %>%
  group_by(Volume,Method) %>%
  summarise(mean_Cq = mean(Cq,na.rm=T), sd_Cq=sd(Cq,na.rm=T),
            CV=(sd(Cq,na.rm=T)/mean(Cq,na.rm=T))*100) 
  **mutate(delta_doub=mean_Cq_for2uL-meanCq_for4uL)**

current output:

> dat_summ
# A tibble: 12 × 5
# Groups:   Volume [3]
   Volume Method mean_Cq sd_Cq    CV
   <chr>  <fct>    <dbl> <dbl> <dbl>
 1 2ul    2ew       20.0 0.295 1.47 
 2 2ul    3ew       21.9 1.79  8.18 
 3 2ul    Manual    22.2 0.248 1.12 
 4 2ul    WN2ew     20.5 0.604 2.94 
 5 4ul    2ew       19.3 0.278 1.44 
 6 4ul    3ew       21.2 1.33  6.29 
 7 4ul    Manual    22.2 0.139 0.627
 8 4ul    WN2ew     19.9 0.493 2.48 
 9 8ul    2ew       18.8 0.270 1.43 
10 8ul    3ew       20.8 1.21  5.81 
11 8ul    Manual    23.7 1.50  6.35 
12 8ul    WN2ew     19.5 0.463 2.38 



subset of dat:
    sample Method Volume    Cq
1   Sample 1    2ew    2ul 20.11
2   Sample 2    2ew    2ul 20.12
3   Sample 3    2ew    2ul 19.76
17  Sample 1  WN2ew    2ul 19.89
18  Sample 2  WN2ew    2ul 20.62
19  Sample 3  WN2ew    2ul 21.07
20  Sample 4  WN2ew    2ul 20.08
52  Sample 1    2ew    4ul 19.30
53  Sample 2    2ew    4ul 19.33
54  Sample 3    2ew    4ul 19.16
68  Sample 1  WN2ew    4ul 19.49
69  Sample 2  WN2ew    4ul 19.46
70  Sample 3  WN2ew    4ul 20.42
103 Sample 1    2ew    8ul 18.91
104 Sample 2    2ew    8ul 18.60
105 Sample 3    2ew    8ul 18.42
119 Sample 1  WN2ew    8ul 18.66
120 Sample 2  WN2ew    8ul 19.13
121 Sample 3  WN2ew    8ul 19.52

> dput(dat)
structure(list(sample = c("Sample 1", "Sample 2", "Sample 3", 
"Sample 4", "Sample 5", "Sample 6", "Sample 7", "Sample 8", "Sample 9", 
"Sample 10", "Sample 11", "Sample 12", "Sample 13", "Sample 14", 
"Sample 15", "Sample 16", "Sample 1", "Sample 2", "Sample 3", 
"Sample 4", "Sample 5", "Sample 6", "Sample 7", "Sample 8", "Sample 9", 
"Sample 10", "Sample 11", "Sample 12", "Sample 13", "Sample 14", 
"Sample 15", "Sample 16", "Sample 1", "Sample 2", "Sample 3", 
"Sample 4", "Sample 5", "Sample 6", "Sample 7", "Sample 8", "Sample 10", 
"Sample 11", "Sample 12", "Sample 13", "Sample 14", "Sample 15", 
"Sample 16", "Sample 1", "Sample 2", "Sample 3", "Sample 4", 
"Sample 1", "Sample 2", "Sample 3", "Sample 4", "Sample 5", "Sample 6", 
"Sample 7", "Sample 8", "Sample 9", "Sample 10", "Sample 11", 
"Sample 12", "Sample 13", "Sample 14", "Sample 15", "Sample 16", 
"Sample 1", "Sample 2", "Sample 3", "Sample 4", "Sample 5", "Sample 6", 
"Sample 7", "Sample 8", "Sample 9", "Sample 10", "Sample 11", 
"Sample 12", "Sample 13", "Sample 14", "Sample 15", "Sample 16", 
"Sample 1", "Sample 2", "Sample 3", "Sample 4", "Sample 5", "Sample 6", 
"Sample 7", "Sample 8", "Sample 10", "Sample 11", "Sample 12", 
"Sample 13", "Sample 14", "Sample 15", "Sample 16", "Sample 1", 
"Sample 2", "Sample 3", "Sample 4", "Sample 1", "Sample 2", "Sample 3", 
"Sample 4", "Sample 5", "Sample 6", "Sample 7", "Sample 8", "Sample 9", 
"Sample 10", "Sample 11", "Sample 12", "Sample 13", "Sample 14", 
"Sample 15", "Sample 16", "Sample 1", "Sample 2", "Sample 3", 
"Sample 4", "Sample 5", "Sample 6", "Sample 7", "Sample 8", "Sample 9", 
"Sample 10", "Sample 11", "Sample 12", "Sample 13", "Sample 14", 
"Sample 15", "Sample 16", "Sample 1", "Sample 2", "Sample 3", 
"Sample 4", "Sample 5", "Sample 6", "Sample 7", "Sample 8", "Sample 10", 
"Sample 11", "Sample 12", "Sample 13", "Sample 14", "Sample 15", 
"Sample 16", "Sample 1", "Sample 2", "Sample 3", "Sample 4"), 
    Method = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 
    3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L), .Label = c("2ew", "3ew", 
    "Manual", "WN2ew"), class = "factor"), Volume = c("2ul", 
    "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", 
    "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", 
    "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", 
    "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", 
    "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", "2ul", 
    "2ul", "2ul", "2ul", "2ul", "2ul", "4ul", "4ul", "4ul", "4ul", 
    "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", 
    "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", 
    "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", 
    "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", 
    "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", "4ul", 
    "4ul", "4ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", 
    "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", 
    "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", 
    "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", 
    "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", 
    "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul", "8ul"), 
    Cq = c(20.11, 20.12, 19.76, 20.07, 20.19, 19.87, 20.33, 19.81, 
    20.15, 19.79, 19.67, 20.23, 19.9, 20.9, 19.93, 19.96, 19.89, 
    20.62, 21.07, 20.08, 21.32, 21.15, 21.07, 20.85, 21.16, 21.03, 
    20.79, 19.39, 20.25, 19.6, 20.14, 20.32, 26.35, 21.36, 21.67, 
    21.13, 21.28, 21.27, 21.36, 21.08, 20.56, 26.18, 21.31, 21.35, 
    21.06, 21.15, 21.28, 22.2, 22.18, 21.96, 22.56, 19.3, 19.33, 
    19.16, 19.27, 19.42, 19.16, 19.53, 19.1, 19.38, 19.08, 19.2, 
    19.44, 19.18, 20.11, 19.43, 18.81, 19.49, 19.46, 20.42, 19.21, 
    20.69, 20.39, 20.19, 20.13, 20.29, 20.49, 20.09, 19.19, 19.63, 
    19.27, 19.82, 19.76, 25.57, 20.45, 20.83, 20.68, 20.72, 21.25, 
    21.14, 21.06, 20.47, 22.51, 20.49, 20.9, 20.47, 20.24, 20.71, 
    22.09, 22.07, 22.13, 22.37, 18.91, 18.6, 18.42, 18.64, 19.14, 
    18.77, 18.77, 18.71, 19.39, 18.7, 18.67, 19.18, 18.79, 19.22, 
    18.73, NA, 18.66, 19.13, 19.52, 19.02, 20.25, 19.66, 19.78, 
    19.71, 19.89, 20.25, 19.47, 19.06, 19.49, 18.84, 19.27, 19.22, 
    24.97, 20.05, 20.33, 20.05, 20.59, 20.39, 20.08, 20.73, 20.3, 
    20.76, 21.12, 20.81, 20.22, 20.32, 20.69, 22.15, 25.2, 24.69, 
    22.63)), row.names = c(NA, -153L), class = "data.frame")

CodePudding user response:

If the mean delta should be for each 'Method', then create the column first grouped by 'Method' (or if it is based across all the Method, then we do not need any grouping), get the mean difference of 'Cq' where 'Volume is '2ul' and '4ul' respectively, use that in grouping for calculating the rest of the summarised columns

library(dplyr)
dat %>%
   group_by(Method) %>%
   mutate(delta_doub =mean(Cq[Volume == '2ul'], na.rm = TRUE) - 
                      mean(Cq[Volume=='4ul'], na.rm = TRUE) ) %>% 
   group_by(Volume, Method, delta_doub) %>% 
   summarise(mean_Cq = mean(Cq,na.rm=TRUE), sd_Cq=sd(Cq,na.rm=TRUE),
            CV=(sd(Cq,na.rm=TRUE)/mean(Cq,na.rm=TRUE))*100, .groups = "drop")

-output

# A tibble: 12 × 6
   Volume Method delta_doub mean_Cq sd_Cq    CV
   <chr>  <fct>       <dbl>   <dbl> <dbl> <dbl>
 1 2ul    2ew        0.743     20.0 0.295 1.47 
 2 2ul    3ew        0.727     21.9 1.79  8.18 
 3 2ul    Manual     0.0600    22.2 0.248 1.12 
 4 2ul    WN2ew      0.638     20.5 0.604 2.94 
 5 4ul    2ew        0.743     19.3 0.278 1.44 
 6 4ul    3ew        0.727     21.2 1.33  6.29 
 7 4ul    Manual     0.0600    22.2 0.139 0.627
 8 4ul    WN2ew      0.638     19.9 0.493 2.48 
 9 8ul    2ew        0.743     18.8 0.270 1.43 
10 8ul    3ew        0.727     20.8 1.21  5.81 
11 8ul    Manual     0.0600    23.7 1.50  6.35 
12 8ul    WN2ew      0.638     19.5 0.463 2.38 

Or it can be

dat %>%
  group_by(Volume,Method) %>% 
  summarise(mean_Cq = mean(Cq,na.rm=TRUE), sd_Cq=sd(Cq,na.rm=TRUE),
             CV=(sd(Cq,na.rm=TRUE)/mean(Cq,na.rm=TRUE))*100,
   .groups = 'drop') %>% 
  mutate(delta_doub_2_4 = mean(mean_Cq[Volume == '2ul']) - 
      mean(mean_Cq[Volume == '4ul']), 
      delta_doub_4_8 = mean(mean_Cq[Volume == '4ul']) - 
              mean(mean_Cq[Volume == '8ul']))

-output

# A tibble: 12 × 7
   Volume Method mean_Cq sd_Cq    CV delta_doub_2_4 delta_doub_4_8
   <chr>  <fct>    <dbl> <dbl> <dbl>          <dbl>          <dbl>
 1 2ul    2ew       20.0 0.295 1.47           0.542        -0.0443
 2 2ul    3ew       21.9 1.79  8.18           0.542        -0.0443
 3 2ul    Manual    22.2 0.248 1.12           0.542        -0.0443
 4 2ul    WN2ew     20.5 0.604 2.94           0.542        -0.0443
 5 4ul    2ew       19.3 0.278 1.44           0.542        -0.0443
 6 4ul    3ew       21.2 1.33  6.29           0.542        -0.0443
 7 4ul    Manual    22.2 0.139 0.627          0.542        -0.0443
 8 4ul    WN2ew     19.9 0.493 2.48           0.542        -0.0443
 9 8ul    2ew       18.8 0.270 1.43           0.542        -0.0443
10 8ul    3ew       20.8 1.21  5.81           0.542        -0.0443
11 8ul    Manual    23.7 1.50  6.35           0.542        -0.0443
12 8ul    WN2ew     19.5 0.463 2.38           0.542        -0.0443
  • Related