How to add median and standard deviation for each condition-CodePudding

My data look like this

df<-structure(list(Data = c("P718", "Trans17", "Ham8Y"), Rep1_Condition = c(30100000, 
1.11e 10, 2.05e 09), Rep2_Condition = c(37700000, 1.3e 10, 3.52e 09
), Rep3_Condition = c(4.27e 10, 0, 1.75e 10), Rep1_H1 = c(4.49e 08, 
1.28e 11, 5.5e 10), Rep2_H1 = c(4.31e 08, 1.5e 11, 6.38e 10), 
    Rep3_H1 = c(3.89e 10, 0, 1.89e 10), Rep1_H2 = c(4.07e 08, 
    1.24e 11, 4.41e 10), Rep2_H2 = c(1.98e 08, 8.21e 10, 3.14e 10
    ), Rep3_H2 = c(1.75e 10, 0, 1.46e 09)), class = "data.frame", row.names = c(NA, 
-3L))

I want to add median and SD for each 3 replicate for example

P718  3.01e 07  3.77e 07  4.27e 10

The value can be next to it

Something like this structure

out<- structure(list(Data = c("P718", "Trans17", "Ham8Y"), Rep1_Condition = c(30100000, 
1.11e 10, 2.05e 09), Rep2_Condition = c(37700000, 1.3e 10, 3.52e 09
), Rep3_Condition = c(4.27e 10, 0, 1.75e 10), Condition_median = c(NA, 
NA, NA), Condition_SD = c(NA, NA, NA), Rep1_H1 = c(4.49e 08, 
1.28e 11, 5.5e 10), Rep2_H1 = c(4.31e 08, 1.5e 11, 6.38e 10), 
    Rep3_H1 = c(3.89e 10, 0, 1.89e 10), H1_Median = c(NA, NA, 
    NA), H1_SD = c(NA, NA, NA), Rep1_H2 = c(4.07e 08, 1.24e 11, 
    4.41e 10), Rep2_H2 = c(1.98e 08, 8.21e 10, 3.14e 10), Rep3_H2 = c(1.75e 10, 
    0, 1.46e 09), H2_Median = c(NA, NA, NA), H2_SD = c(NA, NA, 
    NA)), class = "data.frame", row.names = c(NA, -3L))

CodePudding user response：

One option is to reshape to 'long' format with pivot_longer excluding the 'Data' column, then grouped by 'Data', mutate across the numeric columns to create two additional columns median, SD (as there are multiple columns, this returns as suffix names when we apply the functions in a named list), finally, reshape back to 'wide' format with pivot_wider (

library(dplyr)
library(tidyr)
out2 <- df %>% 
   pivot_longer(cols = -Data, names_to = c("grp", ".value"), 
     names_sep = "_") %>% 
   group_by(Data) %>%
   mutate(across(where(is.numeric), 
     list(median= ~ median(.x), SD = ~ sd(.x)))) %>% 
   ungroup %>%
   pivot_wider(names_from = grp, values_from = Condition:H2)

-checking

> out2$Condition_median
[1] 3.77e 07 1.11e 10 3.52e 09
> matrixStats::rowMedians(as.matrix(df[2:4]))
[1] 3.77e 07 1.11e 10 3.52e 09

-output

> out2
# A tibble: 3 × 16
  Data    Condition_median Condition_SD    H1_median     H1_SD H2_median   H2_SD Condition_Rep1 Condition_Rep2 Condition_Rep3 H1_Rep1 H1_Rep2 H1_Rep3
  <chr>              <dbl>        <dbl>        <dbl>     <dbl>     <dbl>   <dbl>          <dbl>          <dbl>          <dbl>   <dbl>   <dbl>   <dbl>
1 P718            37700000 24633284613.    449000000   2.22e10   4.07e 8 9.93e 9       30100000       37700000    42700000000 4.49e 8 4.31e 8 3.89e10
2 Trans17      11100000000  7021633238. 128000000000   8.10e10   8.21e10 6.31e10    11100000000    13000000000              0 1.28e11 1.5 e11 0      
3 Ham8Y         3520000000  8527443931.  55000000000   2.38e10   3.14e10 2.19e10     2050000000     3520000000    17500000000 5.5 e10 6.38e10 1.89e10
# … with 3 more variables: H2_Rep1 <dbl>, H2_Rep2 <dbl>, H2_Rep3 <dbl>