My data look like this
df<-structure(list(Data = c("P718", "Trans17", "Ham8Y"), Rep1_Condition = c(30100000,
1.11e 10, 2.05e 09), Rep2_Condition = c(37700000, 1.3e 10, 3.52e 09
), Rep3_Condition = c(4.27e 10, 0, 1.75e 10), Rep1_H1 = c(4.49e 08,
1.28e 11, 5.5e 10), Rep2_H1 = c(4.31e 08, 1.5e 11, 6.38e 10),
Rep3_H1 = c(3.89e 10, 0, 1.89e 10), Rep1_H2 = c(4.07e 08,
1.24e 11, 4.41e 10), Rep2_H2 = c(1.98e 08, 8.21e 10, 3.14e 10
), Rep3_H2 = c(1.75e 10, 0, 1.46e 09)), class = "data.frame", row.names = c(NA,
-3L))
I want to add median and SD for each 3 replicate for example
P718 3.01e 07 3.77e 07 4.27e 10
The value can be next to it
Something like this structure
out<- structure(list(Data = c("P718", "Trans17", "Ham8Y"), Rep1_Condition = c(30100000,
1.11e 10, 2.05e 09), Rep2_Condition = c(37700000, 1.3e 10, 3.52e 09
), Rep3_Condition = c(4.27e 10, 0, 1.75e 10), Condition_median = c(NA,
NA, NA), Condition_SD = c(NA, NA, NA), Rep1_H1 = c(4.49e 08,
1.28e 11, 5.5e 10), Rep2_H1 = c(4.31e 08, 1.5e 11, 6.38e 10),
Rep3_H1 = c(3.89e 10, 0, 1.89e 10), H1_Median = c(NA, NA,
NA), H1_SD = c(NA, NA, NA), Rep1_H2 = c(4.07e 08, 1.24e 11,
4.41e 10), Rep2_H2 = c(1.98e 08, 8.21e 10, 3.14e 10), Rep3_H2 = c(1.75e 10,
0, 1.46e 09), H2_Median = c(NA, NA, NA), H2_SD = c(NA, NA,
NA)), class = "data.frame", row.names = c(NA, -3L))
CodePudding user response:
One option is to reshape to 'long' format with pivot_longer
excluding the 'Data' column, then grouped by 'Data', mutate
across the numeric
columns to create two additional columns median
, SD
(as there are multiple columns, this returns as suffix names when we apply the functions in a named
list), finally, reshape back to 'wide' format with pivot_wider
(
library(dplyr)
library(tidyr)
out2 <- df %>%
pivot_longer(cols = -Data, names_to = c("grp", ".value"),
names_sep = "_") %>%
group_by(Data) %>%
mutate(across(where(is.numeric),
list(median= ~ median(.x), SD = ~ sd(.x)))) %>%
ungroup %>%
pivot_wider(names_from = grp, values_from = Condition:H2)
-checking
> out2$Condition_median
[1] 3.77e 07 1.11e 10 3.52e 09
> matrixStats::rowMedians(as.matrix(df[2:4]))
[1] 3.77e 07 1.11e 10 3.52e 09
-output
> out2
# A tibble: 3 × 16
Data Condition_median Condition_SD H1_median H1_SD H2_median H2_SD Condition_Rep1 Condition_Rep2 Condition_Rep3 H1_Rep1 H1_Rep2 H1_Rep3
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 P718 37700000 24633284613. 449000000 2.22e10 4.07e 8 9.93e 9 30100000 37700000 42700000000 4.49e 8 4.31e 8 3.89e10
2 Trans17 11100000000 7021633238. 128000000000 8.10e10 8.21e10 6.31e10 11100000000 13000000000 0 1.28e11 1.5 e11 0
3 Ham8Y 3520000000 8527443931. 55000000000 2.38e10 3.14e10 2.19e10 2050000000 3520000000 17500000000 5.5 e10 6.38e10 1.89e10
# … with 3 more variables: H2_Rep1 <dbl>, H2_Rep2 <dbl>, H2_Rep3 <dbl>