I have fish count data and am trying to create a new dataframe using averages of the measurements based on conditions of two different columns. here is my data:
df <- structure(list(SITE = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L,
3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L,
2L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L), .Label = c("1", "2", "3"), class = "factor"),
ZONE = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), .Label = c("CREST", "INNER_FLAT", "MID_FLAT",
"OUTER_FLAT", "SLOPE"), class = "factor"), C_TOTAL = c(44L,
7L, 20L, 14L, 0L, 4L, 2L, 3L, 1L, 8L, 28L, 24L, 31L, 12L,
33L, 6L, 16L, 33L, 75L, 21L, 60L, 81L, 37L, 89L, 21L, 35L,
71L, 5L, 2L, 0L, 0L, 10L, 23L, 0L, 5L, 11L, 3L, 1L, 5L, 0L,
0L, 8L, 7L, 6L, 42L), C_M2 = c(0.210465706, 0.029861994,
0.090324177, 0.066599319, 0, 0.022092452, 0.011750593, 0.015245519,
0.004710433, 0.033111594, 0.155094195, 0.110576495, 0.193659068,
0.059152822, 0.192379108, 0.047800772, 0.08917095, 0.141336411,
0.402538785, 0.130438337, 0.315206235, 0.460746849, 0.278643938,
0.467754275, 0.192830321, 0.119928472, 0.411502497, 0.015370489,
0.005150184, 0, 0, 0.034651441, 0.067824733, 0, 0.009805851,
0.034844309, 0.010614352, 0.004131048, 0.01850898, 0, 0,
0.029195413, 0.021409016, 0.030498145, 0.172406074), TRANS_A = c(209.0601875,
234.411677, 221.4246571, 210.2123593, 226.6158348, 181.0573136,
170.2041767, 196.7791332, 212.294701, 241.6072127, 180.5354478,
217.0443184, 160.0751279, 202.8643689, 171.536298, 125.5209863,
179.4306337, 233.485481, 186.3174499, 160.9956132, 190.3515643,
175.801528, 132.7859497, 190.2708425, 108.9040348, 291.8406241,
172.5384427, 325.2986863, 388.3356059, 303.1957479, 261.1574528,
288.5882879, 339.1093313, 239.1118021, 509.89965, 315.6899993,
282.6362022, 242.0693453, 270.1391425, 294.8864591, 321.2013381,
274.0156514, 326.9650539, 196.7332763, 243.6109069), SCARID_T = c(35L,
4L, 4L, 13L, 0L, 4L, 2L, 0L, 1L, 4L, 20L, 12L, 17L, 5L, 20L,
6L, 6L, 18L, 63L, 11L, 41L, 75L, 34L, 89L, 14L, 33L, 68L,
0L, 0L, 0L, 0L, 10L, 22L, 0L, 0L, 10L, 0L, 0L, 1L, 0L, 0L,
6L, 0L, 4L, 42L), ACAN_T = c(4L, 0L, 11L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 8L, 5L, 0L, 0L, 0L, 0L, 3L, 2L, 7L, 8L, 8L, 1L,
1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 5L, 0L, 0L,
0L, 1L, 0L, 0L, 2L, 0L, 0L, 0L), SIG_T = c(5L, 3L, 5L, 1L,
0L, 0L, 0L, 3L, 0L, 4L, 0L, 7L, 14L, 7L, 13L, 0L, 7L, 13L,
5L, 2L, 11L, 5L, 2L, 0L, 7L, 1L, 3L, 5L, 2L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 3L, 1L, 3L, 0L, 0L, 0L, 7L, 2L, 0L)), row.names = c(NA,
-45L), class = "data.frame")
I want to average all the measurements by each zone, but also according to site. So I want anew data frame where each site has one measurement for each zone. Can anyone help me? Thanks!
CodePudding user response:
library(dplyr)
df %>%
group_by(SITE, ZONE) %>%
summarise(
across(where(is.numeric), mean)
)
# A tibble: 15 x 8
# Groups: SITE [3]
SITE ZONE C_TOTAL C_M2 TRANS_A SCARID_T ACAN_T SIG_T
<fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 CREST 20 0.0996 213. 12 4.33 3.67
2 1 INNER_FLAT 3 0.0111 265. 0.333 0.333 2.33
3 1 MID_FLAT 2.33 0.00684 339. 0 0 2.33
4 1 OUTER_FLAT 52 0.283 179. 38.3 7.67 6
5 1 SLOPE 23.7 0.110 222. 14.3 5 4.33
6 2 CREST 25.3 0.148 178. 14 0 11.3
7 2 INNER_FLAT 2.67 0.00973 297. 2 0.667 0
8 2 MID_FLAT 11 0.0342 296. 10.7 0.333 0
9 2 OUTER_FLAT 69 0.402 166. 66 0.667 2.33
10 2 SLOPE 6 0.0296 206. 5.67 0 0.333
11 3 CREST 18.3 0.0928 179. 10 1.67 6.67
12 3 INNER_FLAT 18.3 0.0748 256. 15.3 0 3
13 3 MID_FLAT 5.33 0.0149 355. 3.33 1.67 0.333
14 3 OUTER_FLAT 42.3 0.241 191. 38.3 0.333 3.67
15 3 SLOPE 2 0.0106 193. 1 0 1