Home > Back-end >  format table to have mean (sd) instead of separate columns R
format table to have mean (sd) instead of separate columns R

Time:11-17

I Have a data frame of several water quality measures. For each measure I have a calculated mean and SD. I have a value for 6 sites and 4 seasons. Currently my dataframe has the means in a column for examples 'Temp_1' and then a column for the standard deviation as 'Temp_2'. I want to export the file with one column for each water quality measure with the format mean (SD).

current output

This is an example for the first water measure, but I'd like to code it so it is also done to remaining factors as well.

enter image description here

desired output

enter image description here

Head of dataframe

structure(list(season = structure(c(1L, 1L, 1L, 1L, 1L, 1L), levels = c("Winter", 
"Spring", "Summer", "Autumn"), class = "factor"), Site = structure(1:6, levels = c("1", 
"2", "3", "4", "5", "6"), class = "factor"), Temp_1 = c(7.2, 
7.05, 6.3, 6.25, 6.2, 5.4), Temp_2 = c(1.55563491861041, 1.90918830920368, 
1.69705627484771, 2.33345237791561, 2.40416305603426, 2.40416305603426
), pH_1 = c(7.435, 7.38, 7.52, 7.525, 7.38, 7.565), pH_2 = c(0.289913780286484, 
0.282842712474619, 0.0989949493661164, 0.120208152801713, 0.0565685424949239, 
0.261629509039023), DO_1 = c(9, 9.1, 8.25, 8.85, 9.25, 9), DO_2 = c(0, 
0.424264068711928, 0.0707106781186558, 0.494974746830583, 0.636396103067892, 
0.42426406871193), EC_1 = c(337.5, 333, 321.5, 322, 309, 300.5
), EC_2 = c(55.8614357137373, 41.0121933088198, 51.618795026618, 
32.5269119345812, 25.4558441227157, 30.4055915910215), SS_1 = c(5.945, 
3.65, 5.025, 2.535, 10.22, 4.595), SS_2 = c(0.728319984622144, 
1.06066017177982, 2.93449314192417, 0.473761543394987, 8.23072293301141, 
0.67175144212722), TP_1 = c(73.5, 75, 61.5, 66.5, 83, 87), TP_2 = c(3.53553390593274, 
12.7279220613579, 9.19238815542512, 6.36396103067893, 26.8700576850888, 
24.0416305603426), SRP_1 = c(19, 19, 10, 14, 13.5, 23.5), SRP_2 = c(2.82842712474619, 
1.4142135623731, 2.82842712474619, 0, 0.707106781186548, 3.53553390593274
), PP_1 = c(54.5, 56, 51.5, 52.5, 69.5, 63.5), PP_2 = c(6.36396103067893, 
11.3137084989848, 6.36396103067893, 6.36396103067893, 26.1629509039023, 
20.5060966544099), DA_1 = c(0.083, 0.0775, 0.0775, 0.044, 0.059, 
0.051), DA_2 = c(0.00282842712474619, 0.0120208152801713, 0.00919238815542513, 
0.0014142135623731, 0.0127279220613579, 0.00848528137423857), 
    DNI_1 = c(0.048739437, 0.041015562, 0.0617723365, 0.0337441755, 
    0.041480944, 0.0143461675), DNI_2 = c(0.0345079125942686, 
    0.0223312453226695, 0.0187360224120165, 0.0162032493604065, 
    0.0258169069873252, 0.0202885446465761), DNA_1 = c(20.43507986, 
    20.438919615, 14.98692746, 19.953408625, 17.03060377, 8.5767502525
    ), DNA_2 = c(1.80288106961836, 1.2687128010491, 2.28839365291436, 
    1.03116172040732, 0.396528484042397, 1.72350828181138), DF_1 = c(0.0992379715, 
    0.0947268395, 0.094323125, 0.098064875, 0.0980304675, 0.085783911
    ), DF_2 = c(0.00372072305060515, 0.00724914346231915, 0.0142932471712976, 
    0.0116895470668939, 0.00255671780854136, 0.00830519117656529
    ), DC_1 = c(12.18685357, 12.73924378, 13.09550326, 13.417557825, 
    15.140975265, 21.429763715), DC_2 = c(0.57615880774946, 0.0430071960969884, 
    0.702539578486863, 0.134642528587041, 0.66786605299916, 0.17012889453292
    ), DS_1 = c(15.834380095, 15.69623116, 14.37636388, 15.444235935, 
    14.647596185, 11.9877372), DS_2 = c(1.67153135346354, 1.69978765863781, 
    2.47560570280853, 1.03831263471691, 1.24488755930594, 0.975483163720397
    ), DOC_1 = c(19.74, 20.08, 21.24, 20.34, 21.88, 24.92), DOC_2 = c(2.7435743110038, 
    1.69705627484772, 2.60215295476649, 1.04651803615609, 0.226274169979695, 
    0.452548339959388)), row.names = c(NA, 6L), class = "data.frame")

CodePudding user response:

Using mutate across with some tricks to organize paired data we can do it this way. Further adaptation is possible (for example just to keep the mean_sd columns (just use transmute instead of mutate):

Update:

library(dplyr)
library(stringr)

df %>%
  mutate(across(-c(season, Site), ~round(.,2))) %>% 
  mutate(across(ends_with('_1'), ~ paste0(.,
                                         "(",
                  get(str_replace(cur_column(), "_1$", "_2")),
                  ")"
                  ), .names = "mean_sd_{.col}")) %>%  
  rename_at(vars(starts_with('mean_sd')), ~ str_remove(., "\\_1"))
 season Site Temp_1 Temp_2 pH_1 pH_2 DO_1 DO_2  EC_1  EC_2  SS_1 SS_2 TP_1  TP_2 SRP_1 SRP_2 PP_1  PP_2 DA_1 DA_2 DNI_1 DNI_2 DNA_1 DNA_2 DF_1
1 Winter    1   7.20   1.56 7.43 0.29 9.00 0.00 337.5 55.86  5.94 0.73 73.5  3.54  19.0  2.83 54.5  6.36 0.08 0.00  0.05  0.03 20.44  1.80 0.10
2 Winter    2   7.05   1.91 7.38 0.28 9.10 0.42 333.0 41.01  3.65 1.06 75.0 12.73  19.0  1.41 56.0 11.31 0.08 0.01  0.04  0.02 20.44  1.27 0.09
3 Winter    3   6.30   1.70 7.52 0.10 8.25 0.07 321.5 51.62  5.03 2.93 61.5  9.19  10.0  2.83 51.5  6.36 0.08 0.01  0.06  0.02 14.99  2.29 0.09
4 Winter    4   6.25   2.33 7.53 0.12 8.85 0.49 322.0 32.53  2.54 0.47 66.5  6.36  14.0  0.00 52.5  6.36 0.04 0.00  0.03  0.02 19.95  1.03 0.10
5 Winter    5   6.20   2.40 7.38 0.06 9.25 0.64 309.0 25.46 10.22 8.23 83.0 26.87  13.5  0.71 69.5 26.16 0.06 0.01  0.04  0.03 17.03  0.40 0.10
6 Winter    6   5.40   2.40 7.57 0.26 9.00 0.42 300.5 30.41  4.60 0.67 87.0 24.04  23.5  3.54 63.5 20.51 0.05 0.01  0.01  0.02  8.58  1.72 0.09
  DF_2  DC_1 DC_2  DS_1 DS_2 DOC_1 DOC_2 mean_sd_Temp mean_sd_pH mean_sd_DO   mean_sd_EC  mean_sd_SS mean_sd_TP mean_sd_SRP  mean_sd_PP mean_sd_DA
1 0.00 12.19 0.58 15.83 1.67 19.74  2.74    7.2(1.56) 7.43(0.29)       9(0) 337.5(55.86)  5.94(0.73) 73.5(3.54)    19(2.83)  54.5(6.36)    0.08(0)
2 0.01 12.74 0.04 15.70 1.70 20.08  1.70   7.05(1.91) 7.38(0.28)  9.1(0.42)   333(41.01)  3.65(1.06)  75(12.73)    19(1.41)   56(11.31) 0.08(0.01)
3 0.01 13.10 0.70 14.38 2.48 21.24  2.60     6.3(1.7)  7.52(0.1) 8.25(0.07) 321.5(51.62)  5.03(2.93) 61.5(9.19)    10(2.83)  51.5(6.36) 0.08(0.01)
4 0.01 13.42 0.13 15.44 1.04 20.34  1.05   6.25(2.33) 7.53(0.12) 8.85(0.49)   322(32.53)  2.54(0.47) 66.5(6.36)       14(0)  52.5(6.36)    0.04(0)
5 0.00 15.14 0.67 14.65 1.24 21.88  0.23     6.2(2.4) 7.38(0.06) 9.25(0.64)   309(25.46) 10.22(8.23)  83(26.87)  13.5(0.71) 69.5(26.16) 0.06(0.01)
6 0.01 21.43 0.17 11.99 0.98 24.92  0.45     5.4(2.4) 7.57(0.26)    9(0.42) 300.5(30.41)   4.6(0.67)  87(24.04)  23.5(3.54) 63.5(20.51) 0.05(0.01)
  mean_sd_DNI mean_sd_DNA mean_sd_DF  mean_sd_DC  mean_sd_DS mean_sd_DOC
1  0.05(0.03)  20.44(1.8)     0.1(0) 12.19(0.58) 15.83(1.67) 19.74(2.74)
2  0.04(0.02) 20.44(1.27) 0.09(0.01) 12.74(0.04)   15.7(1.7)  20.08(1.7)
3  0.06(0.02) 14.99(2.29) 0.09(0.01)   13.1(0.7) 14.38(2.48)  21.24(2.6)
4  0.03(0.02) 19.95(1.03)  0.1(0.01) 13.42(0.13) 15.44(1.04) 20.34(1.05)
5  0.04(0.03)  17.03(0.4)     0.1(0) 15.14(0.67) 14.65(1.24) 21.88(0.23)
6  0.01(0.02)  8.58(1.72) 0.09(0.01) 21.43(0.17) 11.99(0.98) 24.92(0.45)

First answer: We could do this like so:

library(dplyr)

df %>% mutate(mean_sd = paste0(Temp_1, " (", round(Temp_2,2), ")"), .before=5)
  season Site Temp_1   Temp_2     mean_sd  pH_1       pH_2 DO_1       DO_2  EC_1     EC_2   SS_1      SS_2 TP_1      TP_2 SRP_1     SRP_2 PP_1
1 Winter    1   7.20 1.555635  7.2 (1.56) 7.435 0.28991378 9.00 0.00000000 337.5 55.86144  5.945 0.7283200 73.5  3.535534  19.0 2.8284271 54.5
2 Winter    2   7.05 1.909188 7.05 (1.91) 7.380 0.28284271 9.10 0.42426407 333.0 41.01219  3.650 1.0606602 75.0 12.727922  19.0 1.4142136 56.0
3 Winter    3   6.30 1.697056   6.3 (1.7) 7.520 0.09899495 8.25 0.07071068 321.5 51.61880  5.025 2.9344931 61.5  9.192388  10.0 2.8284271 51.5
4 Winter    4   6.25 2.333452 6.25 (2.33) 7.525 0.12020815 8.85 0.49497475 322.0 32.52691  2.535 0.4737615 66.5  6.363961  14.0 0.0000000 52.5
5 Winter    5   6.20 2.404163   6.2 (2.4) 7.380 0.05656854 9.25 0.63639610 309.0 25.45584 10.220 8.2307229 83.0 26.870058  13.5 0.7071068 69.5
6 Winter    6   5.40 2.404163   5.4 (2.4) 7.565 0.26162951 9.00 0.42426407 300.5 30.40559  4.595 0.6717514 87.0 24.041631  23.5 3.5355339 63.5
       PP_2   DA_1        DA_2      DNI_1      DNI_2    DNA_1     DNA_2       DF_1        DF_2     DC_1      DC_2     DS_1      DS_2 DOC_1
1  6.363961 0.0830 0.002828427 0.04873944 0.03450791 20.43508 1.8028811 0.09923797 0.003720723 12.18685 0.5761588 15.83438 1.6715314 19.74
2 11.313708 0.0775 0.012020815 0.04101556 0.02233125 20.43892 1.2687128 0.09472684 0.007249143 12.73924 0.0430072 15.69623 1.6997877 20.08
3  6.363961 0.0775 0.009192388 0.06177234 0.01873602 14.98693 2.2883937 0.09432312 0.014293247 13.09550 0.7025396 14.37636 2.4756057 21.24
4  6.363961 0.0440 0.001414214 0.03374418 0.01620325 19.95341 1.0311617 0.09806487 0.011689547 13.41756 0.1346425 15.44424 1.0383126 20.34
5 26.162951 0.0590 0.012727922 0.04148094 0.02581691 17.03060 0.3965285 0.09803047 0.002556718 15.14098 0.6678661 14.64760 1.2448876 21.88
6 20.506097 0.0510 0.008485281 0.01434617 0.02028854  8.57675 1.7235083 0.08578391 0.008305191 21.42976 0.1701289 11.98774 0.9754832 24.92
      DOC_2
1 2.7435743
2 1.6970563
3 2.6021530
4 1.0465180
5 0.2262742
6 0.4525483

CodePudding user response:

You can create a new column like this

df$Temp <- paste0(df$Temp_1, ' (', df$Temp_2, ')')

And select only the desired output columns

df[, c('season', 'Site', 'Temp')]

CodePudding user response:

library(tidyverse)

df %>%  
  pivot_longer(-c(season, Site)) %>%  
  mutate(name = name %>% str_remove_all("[^a-zA-Z]")) %>%  
  group_by(season, Site, name) %>%
  summarise(value = str_c(round(value, 2), collapse = ", ")) %>% 
  pivot_wider(names_from = name, 
              values_from = value) 

# A tibble: 6 x 17
# Groups:   season, Site [6]
  season Site  DA         DC          DF         DNA         DNI        DO       DOC   DS    EC    pH    PP    SRP   SS    Temp  TP   
  <fct>  <fct> <chr>      <chr>       <chr>      <chr>       <chr>      <chr>    <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 Winter 1     0.08, 0    12.19, 0.58 0.1, 0     20.44, 1.8  0.05, 0.03 9, 0     19.7~ 15.8~ 337.~ 7.43~ 54.5~ 19, ~ 5.94~ 7.2,~ 73.5~
2 Winter 2     0.08, 0.01 12.74, 0.04 0.09, 0.01 20.44, 1.27 0.04, 0.02 9.1, 0.~ 20.0~ 15.7~ 333,~ 7.38~ 56, ~ 19, ~ 3.65~ 7.05~ 75, ~
3 Winter 3     0.08, 0.01 13.1, 0.7   0.09, 0.01 14.99, 2.29 0.06, 0.02 8.25, 0~ 21.2~ 14.3~ 321.~ 7.52~ 51.5~ 10, ~ 5.03~ 6.3,~ 61.5~
4 Winter 4     0.04, 0    13.42, 0.13 0.1, 0.01  19.95, 1.03 0.03, 0.02 8.85, 0~ 20.3~ 15.4~ 322,~ 7.53~ 52.5~ 14, 0 2.54~ 6.25~ 66.5~
5 Winter 5     0.06, 0.01 15.14, 0.67 0.1, 0     17.03, 0.4  0.04, 0.03 9.25, 0~ 21.8~ 14.6~ 309,~ 7.38~ 69.5~ 13.5~ 10.2~ 6.2,~ 83, ~
6 Winter 6     0.05, 0.01 21.43, 0.17 0.09, 0.01 8.58, 1.72  0.01, 0.02 9, 0.42  24.9~ 11.9~ 300.~ 7.57~ 63.5~ 23.5~ 4.6,~ 5.4,~ 87, ~
  • Related