Im trying to aggregate the DAMT
column from my data below based on RC
by I cant because I get one row less than my initial dataframe
dt1<-structure(list(Population = c("p1", "p1", "p1", "p1", "p1", "p1",
"p1", "p1", "p1", "p1", "p2", "p2", "p2", "p2", "p2", "p2", "p3",
"p4", "p5", "p5", "p6", "p6", "p6", "p6", "p6", "p6", "p7", "p7",
"p7", "p7", "p7", "p7", "p7", "p7", "p7", "p8"), RC = c("p1_1_a",
"p1_1_b", "p1_2_a", "p1_2_b", "p1_3_a", "p1_3_b", "p1_4_a", "p1_4_b",
"p1_5_a", "p1_5_b", "p2_a", "p2_b", "p2_c", "p2_d", "p2_e", "p2_f",
"p3_a", "p4_a", "p5_a", "p5_a", "p6_a", "p6_b", "p6_c", "p6_d",
"p6_e", "p6_f", "p7_a", "p7_b", "p7_c", "p7_d", "p7_e", "p7_f",
"p7_g", "p7_h", "p7_i", "p8_1"), DAMT = c(25, 12, 14, 11, 3,
3, 2, 4, 5, 0, 0, 0, 0, 25, 12, 14, 11, 3, 3, 2, 4, 5, 0, 0,
0, 0, 25, 12, 14, 11, 3, 3, 2, 4, 5, 0), EU = c(10, 20, 30, 40,
50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180,
190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310,
320, 330, 340, 350, 360)), row.names = c(NA, -36L), class = c("tbl_df",
"tbl", "data.frame"))
dt1$CAT<-aggregate(dt1$DAMT, by=list(CAT=dt1$RC), FUN=sum)
CodePudding user response:
aggregate
summarises the data - it returns only a single row of sum
value for each group, we need ave
to create a new column
dt1$CAT <- with(dt1, ave(DAMT, RC, FUN = sum))
CodePudding user response:
You could do this also simply using dplyr
with a group_by
and mutate
:
dt1<-structure(list(Population = c("p1", "p1", "p1", "p1", "p1", "p1",
"p1", "p1", "p1", "p1", "p2", "p2", "p2", "p2", "p2", "p2", "p3",
"p4", "p5", "p5", "p6", "p6", "p6", "p6", "p6", "p6", "p7", "p7",
"p7", "p7", "p7", "p7", "p7", "p7", "p7", "p8"), RC = c("p1_1_a",
"p1_1_b", "p1_2_a", "p1_2_b", "p1_3_a", "p1_3_b", "p1_4_a", "p1_4_b",
"p1_5_a", "p1_5_b", "p2_a", "p2_b", "p2_c", "p2_d", "p2_e", "p2_f",
"p3_a", "p4_a", "p5_a", "p5_a", "p6_a", "p6_b", "p6_c", "p6_d",
"p6_e", "p6_f", "p7_a", "p7_b", "p7_c", "p7_d", "p7_e", "p7_f",
"p7_g", "p7_h", "p7_i", "p8_1"), DAMT = c(25, 12, 14, 11, 3,
3, 2, 4, 5, 0, 0, 0, 0, 25, 12, 14, 11, 3, 3, 2, 4, 5, 0, 0,
0, 0, 25, 12, 14, 11, 3, 3, 2, 4, 5, 0), EU = c(10, 20, 30, 40,
50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180,
190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310,
320, 330, 340, 350, 360)), row.names = c(NA, -36L), class = c("tbl_df",
"tbl", "data.frame"))
library(dplyr)
dt1 %>%
group_by(RC) %>%
mutate(CAT = sum(DAMT))
#> # A tibble: 36 × 5
#> # Groups: RC [35]
#> Population RC DAMT EU CAT
#> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 p1 p1_1_a 25 10 25
#> 2 p1 p1_1_b 12 20 12
#> 3 p1 p1_2_a 14 30 14
#> 4 p1 p1_2_b 11 40 11
#> 5 p1 p1_3_a 3 50 3
#> 6 p1 p1_3_b 3 60 3
#> 7 p1 p1_4_a 2 70 2
#> 8 p1 p1_4_b 4 80 4
#> 9 p1 p1_5_a 5 90 5
#> 10 p1 p1_5_b 0 100 0
#> # … with 26 more rows
#> # ℹ Use `print(n = ...)` to see more rows
Created on 2022-07-22 by the reprex package (v2.0.1)