Home > Software engineering >  Sum monthly values to yearly values in R
Sum monthly values to yearly values in R

Time:09-22

I have a df like this with many groups in column y:

> head(example)
# A tibble: 6 x 3
   time y               z
  <int> <chr>       <dbl>
1     0 group_1        0 
2     1 group_1 32441317.
3     2 group_1 30567426.
4     3 group_1 30328724.
5     4 group_1 31669422.
6     5 group_1 32457023.

The column time represents the projected month. What I'm trying to achieve is to sum/aggregate the values in column z to yearly values, i.e. sum group_1 for time = 0,...,12, sum group_1 for time = 13,...24 and so on. I wrote a function that loops over time and does the summation, but I am sure, that there is an more R-like-way to do this.

Here is an example df:

structure(list(time = 0:48, y = c("group_1", "group_1", "group_1", 
"group_1", "group_1", "group_1", "group_1", "group_1", "group_1", 
"group_1", "group_1", "group_1", "group_1", "group_1", "group_1", 
"group_1", "group_1", "group_1", "group_1", "group_1", "group_1", 
"group_1", "group_1", "group_1", "group_1", "group_1", "group_1", 
"group_1", "group_1", "group_1", "group_1", "group_1", "group_1", 
"group_1", "group_1", "group_1", "group_1", "group_1", "group_1", 
"group_1", "group_1", "group_1", "group_1", "group_1", "group_1", 
"group_1", "group_1", "group_1", "group_1"), z = c(0, 32441317.442841, 
30567426.097856, 30328724.080608, 31669422.143856, 32457022.965129, 
43380100.665038, 38512706.430005, 28338868.386996, 30854161.208595, 
32276124.786534, 29341793.796199, 27226404.410877, 28778762.637601, 
26978821.108941, 26669524.593051, 28035336.885386, 29279340.742514, 
38215592.274059, 34013445.376372, 25065907.669508, 27591198.71768, 
28113667.649211, 25656922.065041, 24093750.174799, 25491713.777652, 
23825429.327112, 23505717.779449, 24846795.105652, 25821964.573916, 
34064353.303268, 29483101.216339, 21761967.166822, 23307488.432137, 
24904470.059888, 22768331.410693, 21266194.870375, 22622443.869567, 
20942198.648107, 20746900.437655, 21985508.249395, 22885605.457792, 
29941337.378827, 26079117.988682, 19196902.521694, 20521161.64971, 
22170801.329356, 19958077.873417, 18515973.202047)), row.names = c(NA, 
-49L), groups = structure(list(y = "group_1", .rows = structure(list(
    1:49), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", 
"list"))), row.names = c(NA, -1L), class = c("tbl_df", "tbl", 
"data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df", 
"tbl", "data.frame"))

CodePudding user response:

We may use

library(dplyr)
df1 %>%
    group_by(y, grp = cumsum(time %% 13 == 0)) %>% 
    summarise(z = sum(z, na.rm = TRUE), .groups = 'drop')

-output

# A tibble: 4 × 3
  y         grp          z
  <chr>   <int>      <dbl>
1 group_1     1 387394072.
2 group_1     2 367983984.
3 group_1     3 319120456.
4 group_1     4 222001386.

OP's grouping sum group_1 for time = 0,...,12

CodePudding user response:

df |> 
  group_by(y, year = 1   ((time-1) %/% 12)) |> 
  summarise(sum(z))
#   y        year   `sum(z)`
#   <chr>   <dbl>      <dbl>
# 1 group_1     0         0 
# 2 group_1     1 387394072.
# 3 group_1     2 342492270.
# 4 group_1     3 301047527.
# 5 group_1     4 265566029.

CodePudding user response:

 library(dplyr)

Create a function to generate time ranges

create_ranges <- function(interval, nrows){
  
    number_of_ranges <- floor(nrows/interval)
  
    range_vec <- rep(paste0("range_",1:number_of_ranges), each=interval)

    # file fill with the extra group
    if(length(range_vec) < nrows){
        length_to_fill <- nrows - length(range_vec)
        new_range <- number_of_ranges   1
        to_append <- rep(paste0("range_",new_range), times=length_to_fill)
        range_vec <- c(range_vec,to_append)
     }
 
     return(range_vec) 
  
} 

calculate sum based on the required interval for aggregation

interval <- 13 
df %>% 
    mutate(Ranges=create_ranges(interval, nrow(.))) %>% 
    select(Ranges, everything()) %>% 
    group_by(Ranges) %>%
    summarise(Range_sum=sum(z, na.rm=TRUE)) 

Output

# A tibble: 4 x 2
  Ranges   Range_sum
  <chr>        <dbl>
1 range_1 387394072.
2 range_2 367983984.
3 range_3 319120456.
4 range_4 222001386.
  •  Tags:  
  • r
  • Related