Home > Net >  Cannot add an extra column with aggregated data to my original dataset due to number of rows
Cannot add an extra column with aggregated data to my original dataset due to number of rows

Time:07-23

Im trying to aggregate the DAMT column from my data below based on RC by I cant because I get one row less than my initial dataframe

dt1<-structure(list(Population = c("p1", "p1", "p1", "p1", "p1", "p1", 
                                   "p1", "p1", "p1", "p1", "p2", "p2", "p2", "p2", "p2", "p2", "p3", 
                                   "p4", "p5", "p5", "p6", "p6", "p6", "p6", "p6", "p6", "p7", "p7", 
                                   "p7", "p7", "p7", "p7", "p7", "p7", "p7", "p8"), RC = c("p1_1_a", 
                                                                                           "p1_1_b", "p1_2_a", "p1_2_b", "p1_3_a", "p1_3_b", "p1_4_a", "p1_4_b", 
                                                                                           "p1_5_a", "p1_5_b", "p2_a", "p2_b", "p2_c", "p2_d", "p2_e", "p2_f", 
                                                                                           "p3_a", "p4_a", "p5_a", "p5_a", "p6_a", "p6_b", "p6_c", "p6_d", 
                                                                                           "p6_e", "p6_f", "p7_a", "p7_b", "p7_c", "p7_d", "p7_e", "p7_f", 
                                                                                           "p7_g", "p7_h", "p7_i", "p8_1"), DAMT = c(25, 12, 14, 11, 3, 
                                                                                                                                     3, 2, 4, 5, 0, 0, 0, 0, 25, 12, 14, 11, 3, 3, 2, 4, 5, 0, 0, 
                                                                                                                                     0, 0, 25, 12, 14, 11, 3, 3, 2, 4, 5, 0), EU = c(10, 20, 30, 40, 
                                                                                                                                                                                     50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 
                                                                                                                                                                                     190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 
                                                                                                                                                                                     320, 330, 340, 350, 360)), row.names = c(NA, -36L), class = c("tbl_df", 
                                                                                                                                                                                                                                                   "tbl", "data.frame"))

dt1$CAT<-aggregate(dt1$DAMT, by=list(CAT=dt1$RC), FUN=sum)

CodePudding user response:

aggregate summarises the data - it returns only a single row of sum value for each group, we need ave to create a new column

dt1$CAT <- with(dt1, ave(DAMT, RC, FUN = sum))

CodePudding user response:

You could do this also simply using dplyr with a group_by and mutate:

dt1<-structure(list(Population = c("p1", "p1", "p1", "p1", "p1", "p1", 
                                   "p1", "p1", "p1", "p1", "p2", "p2", "p2", "p2", "p2", "p2", "p3", 
                                   "p4", "p5", "p5", "p6", "p6", "p6", "p6", "p6", "p6", "p7", "p7", 
                                   "p7", "p7", "p7", "p7", "p7", "p7", "p7", "p8"), RC = c("p1_1_a", 
                                                                                           "p1_1_b", "p1_2_a", "p1_2_b", "p1_3_a", "p1_3_b", "p1_4_a", "p1_4_b", 
                                                                                           "p1_5_a", "p1_5_b", "p2_a", "p2_b", "p2_c", "p2_d", "p2_e", "p2_f", 
                                                                                           "p3_a", "p4_a", "p5_a", "p5_a", "p6_a", "p6_b", "p6_c", "p6_d", 
                                                                                           "p6_e", "p6_f", "p7_a", "p7_b", "p7_c", "p7_d", "p7_e", "p7_f", 
                                                                                           "p7_g", "p7_h", "p7_i", "p8_1"), DAMT = c(25, 12, 14, 11, 3, 
                                                                                                                                     3, 2, 4, 5, 0, 0, 0, 0, 25, 12, 14, 11, 3, 3, 2, 4, 5, 0, 0, 
                                                                                                                                     0, 0, 25, 12, 14, 11, 3, 3, 2, 4, 5, 0), EU = c(10, 20, 30, 40, 
                                                                                                                                                                                     50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 
                                                                                                                                                                                     190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 
                                                                                                                                                                                     320, 330, 340, 350, 360)), row.names = c(NA, -36L), class = c("tbl_df", 
                                                                                                                                                                                                                                                   "tbl", "data.frame"))


library(dplyr)
dt1 %>% 
  group_by(RC) %>%
  mutate(CAT = sum(DAMT))
#> # A tibble: 36 × 5
#> # Groups:   RC [35]
#>    Population RC      DAMT    EU   CAT
#>    <chr>      <chr>  <dbl> <dbl> <dbl>
#>  1 p1         p1_1_a    25    10    25
#>  2 p1         p1_1_b    12    20    12
#>  3 p1         p1_2_a    14    30    14
#>  4 p1         p1_2_b    11    40    11
#>  5 p1         p1_3_a     3    50     3
#>  6 p1         p1_3_b     3    60     3
#>  7 p1         p1_4_a     2    70     2
#>  8 p1         p1_4_b     4    80     4
#>  9 p1         p1_5_a     5    90     5
#> 10 p1         p1_5_b     0   100     0
#> # … with 26 more rows
#> # ℹ Use `print(n = ...)` to see more rows

Created on 2022-07-22 by the reprex package (v2.0.1)

  •  Tags:  
  • r
  • Related