I have data that is organized by ID value. Each ID value has up to 1-6 rows of data. I want to calculate the total Volume and the average Volume of each ID value, and then force it all into one row.
Here's the data:
> dput(head(COMPLETED_DATASET_allsizes, n = 150))
structure(list(PVC = c("6,056,589", "6,056,589", "6,056,589",
"6,013,925", "6,013,925", "6,013,925", "6,034,050", "6,034,050",
"6,034,050", "6070852", "6070852", "6070852", "6070862", "6014535",
"6014326", "6013176", "6013176", "6013176", "AF6", "AF6", "AF6",
"6019049", "6019049", "6008710", "6008710", "6013198", "601321_",
"AF600", "AF600", "AF600", "6020617", "6020617", "6020617", "6008806",
"6008806", "6021296", "6021296", "6021296", "6014829", "6014829",
"6013907", "6013907", "6013907", "601404_", "601404_", "6013766",
"6013766", "6013766", "6034421", "6034421", "6034421", "20F",
"ZHD", "ZHD", "X11", "X11", "28R", "28R", "37V", "UVX", "37V",
"HY3", "HY3", "6,014,837", "ZWJ", "ZWJ", "ZWJ", "BX4", "BX4",
"BX4", "BHD", "BHD", "BHD", "ACX", "ACX", "ACX", "XXS", "XXS",
"XXS", "6,005,684", "6,005,684", "6,005,684", "BHX", "BHX", "BHX",
"SP3", "SP3", "SP3", "B1A", "B1A", "B1A", "B5F", "B5F", "B5F",
"BJN", "BJN", "BJN", "AW5", "AW5", "AW5", "HNU", "HNU", "HNU",
"BSP", "BSP", "BSP", "FW2", "FW2", "ANB", "ANB", "ANB", "SJM",
"SJM", "FSR", "FSR", "FSR", "UYF", "UYF", "2HZ", "2HZ", "F1X",
"F1X", "FFR", "FFR", "FFR", "HX1", "HX1", "SUL", "SUL", "SF0",
"SF0", "SF0", "SHJ", "SHJ", "SHJ", "HRJ", "HRJ", "AP8", "AP8",
"J4H", "J4H", "XLV", "XLV", "XLV", "H94", "H94", "S2Y", "S2Y",
"S2Y", "UU9"), Egg_order = c("1", "2", "3", "1", "2", "3", "1",
"2", "3", "9", "9", "9", "9", "9", "9", "1", "2", "3", "1", "2",
"3", "1", "2", "9", "9", "1", "1", "9", "9", "9", "1", "2", "3",
"9", "9", "9", "9", "9", "1", "2", "9", "9", "9", "1", "2", "9",
"9", "9", "9", "9", "9", "1", "9", "9", "9", "9", "9", "9", "9",
"9", "9", "9", "9", "9", "9", "9", "9", "1", "2", "3", "3", "2",
"1", "9", "9", "3", "9", "3", "9", "1", "2", "3", "9", "9", "9",
"9", "9", "3", "3", "9", "9", "9", "9", "9", "9", "9", "3", "1",
"2", "3", "3", "9", "9", "9", "9", "9", "2", "1", "9", "9", "9",
"1", "2", "9", "9", "3", "9", "9", "1", "2", "2", "1", "9", "9",
"9", "9", "9", "1", "2", "9", "9", "3", "2", "3", "1", "9", "9",
"1", "2", "2", "1", "1", "2", "3", "9", "9", "9", "9", "3", "3"
), Volume = c(62.00468911, 63.19034545, 64.16343369, 62.77290704,
62.64955122, 56.63460056, 49.49860059, 57.88686491, 55.4345925,
57.89358963, 58.12847589, 51.49938302, 58.49927302, 57.34171155,
54.93179064, 64.56208923, 62.27948266, 56.38218371, 60.06696521,
62.21206032, 62.90496759, 59.13580929, 57.13564917, 59.70944308,
55.19527717, 64.84888743, 67.34476853, 65.0132184, 59.66593229,
62.01691353, 55.27413286, 54.68206904, 54.3717803, 57.36582528,
60.15748095, 55.06099115, 59.07830469, 56.82857152, 62.76590186,
55.36801363, 71.70516332, 65.4342034, 59.83429871, 59.18484744,
57.34744503, 56.11336458, 65.73279116, 57.95890035, 55.09798577,
58.94565679, 59.97265877, 58.93118052, 67.62983812, 62.45908065,
61.78370028, 60.67968894, 61.49240694, 60.85435534, 57.82379732,
52.64023837, 53.7619782, 65.19952241, 62.84622159, 66.40863935,
57.62326609, 53.89348305, 47.2993185, 61.48633419, 66.30863861,
58.42476707, 53.26130145, 63.20292711, 62.49620272, 61.32741787,
62.72989825, 51.11998856, 62.26934629, 58.56945328, 60.3546269,
55.01982539, 62.3489285, 53.77593518, 56.49758306, 63.53920939,
64.94851437, 62.25274976, 64.39375777, 54.22321067, 51.28730416,
58.31060124, 55.25169993, 58.25732223, 57.18221296, 56.44838126,
53.77395184, 57.43319075, 52.95671938, 61.9705976, 57.10131146,
54.57651784, 59.66268577, 62.44376038, 59.27393481, 55.46520431,
54.98693888, 64.79370574, 55.48500646, 54.60260849, 64.98274004,
63.29243525, 61.18560498, 26.13699345, 23.31596147, 64.26879934,
65.30612915, 57.39204739, 57.32271224, 53.93674759, 60.36058271,
55.62742343, 57.91773772, 61.03729104, 54.03165992, 60.84743965,
59.86017015, 59.90175385, 56.49922666, 63.50802185, 65.16800489,
64.94391206, 67.2387067, 59.05588275, 57.91108456, 54.43705009,
59.39818285, 63.9228549, 56.78342653, 54.58236138, 60.47677836,
60.82957858, 60.51827256, 58.34245187, 57.28814624, 54.81098523,
54.26852328, 52.43400015, 57.57770424, 60.09658763, 56.22640525,
59.19068945), Clutch = c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L,
1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 3L,
3L, 3L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 3L), Year = c(1996L,
1996L, 1996L, 1996L, 1996L, 1996L, 1996L, 1996L, 1996L, 1997L,
1997L, 1997L, 1998L, 1998L, 1998L, 1999L, 1999L, 1999L, 1999L,
1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L,
1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L,
1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L,
1999L, 1999L, 1999L, 1999L, 1999L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L), egg_status = c("complete",
"complete", "complete", "complete", "complete", "complete", "complete",
"complete", "complete", "none", "none", "none", "none", "none",
"none", "complete", "complete", "complete", "complete", "complete",
"complete", "complete", "complete", "none", "none", "complete",
"complete", "none", "none", "none", "complete", "complete", "complete",
"none", "none", "none", "none", "none", "complete", "complete",
"none", "none", "none", "complete", "complete", "none", "none",
"none", "none", "none", "none", "complete", "none", "none", "none",
"none", "none", "none", "none", "none", "none", "none", "none",
"none", "none", "none", "none", "complete", "complete", "complete",
"complete", "complete", "complete", "estimated", "estimated",
"estimated", "estimated", "estimated", "estimated", "complete",
"complete", "complete", "none", "none", "none", "estimated",
"estimated", "estimated", "estimated", "estimated", "estimated",
"none", "none", "none", "estimated", "estimated", "estimated",
"complete", "complete", "complete", "estimated", "estimated",
"estimated", "none", "none", "none", "complete", "complete",
"none", "none", "none", "complete", "complete", "estimated",
"estimated", "estimated", "none", "none", "complete", "complete",
"complete", "complete", "none", "none", "none", "none", "none",
"complete", "complete", "estimated", "estimated", "estimated",
"complete", "complete", "complete", "none", "none", "complete",
"complete", "complete", "complete", "complete", "complete", "complete",
"none", "none", "estimated", "estimated", "estimated", "complete"
), estimated_Egg_order = c("1", "2", "3", "1", "2", "3", "1",
"2", "3", "9", "9", "9", "9", "9", "9", "1", "2", "3", "1", "2",
"3", "1", "2", "9", "9", "1", "1", "9", "9", "9", "1", "2", "3",
"9", "9", "9", "9", "9", "1", "2", "9", "9", "9", "1", "2", "9",
"9", "9", "9", "9", "9", "1", "9", "9", "9", "9", "9", "9", "9",
"9", "9", "9", "9", "9", "9", "9", "9", "1", "2", "3", "3", "2",
"1", "1", "2", "3", "1", "3", "2", "1", "2", "3", "9", "9", "9",
"1", "2", "3", "3", "1", "2", "9", "9", "9", "1", "2", "3", "1",
"2", "3", "3", "1", "2", "9", "9", "9", "2", "1", "9", "9", "9",
"1", "2", "1", "2", "3", "9", "9", "1", "2", "2", "1", "9", "9",
"9", "9", "9", "1", "2", "1", "2", "3", "2", "3", "1", "9", "9",
"1", "2", "2", "1", "1", "2", "3", "9", "9", "1", "2", "3", "3"
), Edat = c(7L, 7L, 7L, 9L, 9L, 9L, 4L, 4L, 4L, 6L, 6L, 6L, 7L,
2L, 1L, 13L, 13L, 13L, 11L, 11L, 11L, 12L, 12L, 14L, 14L, 13L,
13L, 11L, 11L, 11L, 12L, 12L, 12L, 14L, 14L, 9L, 9L, 9L, 13L,
13L, 12L, 12L, 12L, 13L, 13L, 12L, 12L, 12L, 9L, 9L, 9L, 4L,
5L, 5L, 6L, 6L, 4L, 4L, 3L, 7L, 3L, 9L, 9L, 15L, 5L, 5L, 5L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 5L, 5L, 5L, 16L,
16L, 16L, 13L, 13L, 13L, 8L, 8L, 8L, 13L, 13L, 13L, 9L, 9L, 9L,
13L, 13L, 13L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 13L,
13L, 13L, 8L, 8L, 10L, 10L, 10L, 7L, 7L, 4L, 4L, 13L, 13L, 9L,
9L, 9L, 12L, 12L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 11L,
11L, 11L, 11L, 6L, 6L, 6L, 10L, 10L, 8L, 8L, 8L, 7L), ID = c(378L,
378L, 378L, 362L, 362L, 362L, 370L, 370L, 370L, 383L, 383L, 383L,
434L, 426L, 416L, 511L, 511L, 511L, 499L, 499L, 499L, 459L, 459L,
458L, 458L, 521L, 532L, 501L, 501L, 501L, 470L, 470L, 470L, 500L,
500L, 481L, 481L, 481L, 576L, 576L, 554L, 554L, 554L, 565L, 565L,
543L, 543L, 543L, 492L, 492L, 492L, 695L, 722L, 722L, 706L, 706L,
800L, 800L, 734L, 700L, 734L, 802L, 802L, 737L, 727L, 727L, 727L,
771L, 771L, 771L, 763L, 763L, 763L, 742L, 742L, 742L, 715L, 715L,
715L, 735L, 735L, 735L, 764L, 764L, 764L, 815L, 815L, 815L, 754L,
754L, 754L, 759L, 759L, 759L, 765L, 765L, 765L, 752L, 752L, 752L,
791L, 791L, 791L, 769L, 769L, 769L, 785L, 785L, 748L, 748L, 748L,
814L, 814L, 781L, 781L, 781L, 702L, 702L, 732L, 732L, 774L, 774L,
777L, 777L, 777L, 799L, 799L, 817L, 817L, 810L, 810L, 810L, 813L,
813L, 813L, 795L, 795L, 749L, 749L, 803L, 803L, 712L, 712L, 712L,
787L, 787L, 807L, 807L, 807L, 697L)), row.names = c(NA, 150L), class = "data.frame")
Here is a picture of what I hope it will look like. This photo doesn't have the ID value but image if each row was a PVC value.
Thanks for the help!
CodePudding user response:
A dplyr
solution
library(tidyverse)
df %>%
group_by(PVC, Year, ID) %>%
summarise(totalV = sum(Volume),
averageV = mean(Volume, na.rm = TRUE),
Clutch = last(Clutch),
Edat = last(Edat), .groups = "drop")
# A tibble: 63 x 7
PVC Year ID totalV averageV Clutch Edat
<chr> <int> <int> <dbl> <dbl> <int> <int>
1 20F 2001 695 58.9 58.9 1 4
2 28R 2001 800 122. 61.2 2 4
3 2HZ 2001 732 116. 58.0 2 4
4 37V 2001 734 112. 55.8 2 3
5 6,005,684 2001 735 171. 57.0 3 16
6 6,013,925 1996 362 182. 60.7 3 9
7 6,014,837 2001 737 66.4 66.4 1 15
8 6,034,050 1996 370 163. 54.3 3 4
9 6,056,589 1996 378 189. 63.1 3 7
10 6008710 1999 458 115. 57.5 2 14
# ... with 53 more rows
CodePudding user response:
We can do this in one line with data.table, grouping by PVC and getting either the sum of Volume or Average. I'm assuming for Year, Clutch, and Edat all of the same PVC rows have the same values so we can use max or min there.
library(data.table)
dt <- data.table(COMPLETED_DATASET_allsizes)
dt1 <- dt[,.(totalV = sum(Volume), averageV = mean(Volume), Year = max(Year), Clutch = max(Clutch), Edat = max(Edat)), by = PVC]
> head(dt1)
PVC totalV averageV Year Clutch Edat
1: 6,056,589 189.35847 63.11949 1996 3 7
2: 6,013,925 182.05706 60.68569 1996 3 9
3: 6,034,050 162.82006 54.27335 1996 3 4
4: 6070852 167.52145 55.84048 1997 3 6
5: 6070862 58.49927 58.49927 1998 1 7
6: 6014535 57.34171 57.34171 1998 1 2
CodePudding user response:
Dplyr
solution. I grouped by ID
, PVC
, Year
, Clutch
and Edat
Since those were in your new data.frame. I assume ID
is the identifier for this combination?
#group by and summarise
dt %>% group_by(ID, PVC, Year, Clutch, Edat) %>%
summarise(averageV = mean((Volume)),
totalV = sum(Volume)) %>% ungroup() |> head()
ID PVC Year Clutch Edat averageV totalV
<int> <chr> <int> <int> <int> <dbl> <dbl>
1 362 6,013,925 1996 3 9 60.7 182.
2 370 6,034,050 1996 3 4 54.3 163.
3 378 6,056,589 1996 3 7 63.1 189.
4 383 6070852 1997 3 6 55.8 168.
5 416 6014326 1998 1 1 54.9 54.9
6 426 6014535 1998 1 2 57.3 57.3