I have a df like this with many groups in column y
:
> head(example)
# A tibble: 6 x 3
time y z
<int> <chr> <dbl>
1 0 group_1 0
2 1 group_1 32441317.
3 2 group_1 30567426.
4 3 group_1 30328724.
5 4 group_1 31669422.
6 5 group_1 32457023.
The column time represents the projected month. What I'm trying to achieve is to sum/aggregate the values in column z to yearly values, i.e. sum group_1 for time = 0,...,12, sum group_1 for time = 13,...24 and so on. I wrote a function that loops over time and does the summation, but I am sure, that there is an more R-like-way to do this.
Here is an example df:
structure(list(time = 0:48, y = c("group_1", "group_1", "group_1",
"group_1", "group_1", "group_1", "group_1", "group_1", "group_1",
"group_1", "group_1", "group_1", "group_1", "group_1", "group_1",
"group_1", "group_1", "group_1", "group_1", "group_1", "group_1",
"group_1", "group_1", "group_1", "group_1", "group_1", "group_1",
"group_1", "group_1", "group_1", "group_1", "group_1", "group_1",
"group_1", "group_1", "group_1", "group_1", "group_1", "group_1",
"group_1", "group_1", "group_1", "group_1", "group_1", "group_1",
"group_1", "group_1", "group_1", "group_1"), z = c(0, 32441317.442841,
30567426.097856, 30328724.080608, 31669422.143856, 32457022.965129,
43380100.665038, 38512706.430005, 28338868.386996, 30854161.208595,
32276124.786534, 29341793.796199, 27226404.410877, 28778762.637601,
26978821.108941, 26669524.593051, 28035336.885386, 29279340.742514,
38215592.274059, 34013445.376372, 25065907.669508, 27591198.71768,
28113667.649211, 25656922.065041, 24093750.174799, 25491713.777652,
23825429.327112, 23505717.779449, 24846795.105652, 25821964.573916,
34064353.303268, 29483101.216339, 21761967.166822, 23307488.432137,
24904470.059888, 22768331.410693, 21266194.870375, 22622443.869567,
20942198.648107, 20746900.437655, 21985508.249395, 22885605.457792,
29941337.378827, 26079117.988682, 19196902.521694, 20521161.64971,
22170801.329356, 19958077.873417, 18515973.202047)), row.names = c(NA,
-49L), groups = structure(list(y = "group_1", .rows = structure(list(
1:49), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr",
"list"))), row.names = c(NA, -1L), class = c("tbl_df", "tbl",
"data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"))
CodePudding user response:
We may use
library(dplyr)
df1 %>%
group_by(y, grp = cumsum(time %% 13 == 0)) %>%
summarise(z = sum(z, na.rm = TRUE), .groups = 'drop')
-output
# A tibble: 4 × 3
y grp z
<chr> <int> <dbl>
1 group_1 1 387394072.
2 group_1 2 367983984.
3 group_1 3 319120456.
4 group_1 4 222001386.
OP's grouping sum group_1 for time = 0,...,12
CodePudding user response:
df |>
group_by(y, year = 1 ((time-1) %/% 12)) |>
summarise(sum(z))
# y year `sum(z)`
# <chr> <dbl> <dbl>
# 1 group_1 0 0
# 2 group_1 1 387394072.
# 3 group_1 2 342492270.
# 4 group_1 3 301047527.
# 5 group_1 4 265566029.
CodePudding user response:
library(dplyr)
Create a function to generate time ranges
create_ranges <- function(interval, nrows){
number_of_ranges <- floor(nrows/interval)
range_vec <- rep(paste0("range_",1:number_of_ranges), each=interval)
# file fill with the extra group
if(length(range_vec) < nrows){
length_to_fill <- nrows - length(range_vec)
new_range <- number_of_ranges 1
to_append <- rep(paste0("range_",new_range), times=length_to_fill)
range_vec <- c(range_vec,to_append)
}
return(range_vec)
}
calculate sum based on the required interval for aggregation
interval <- 13
df %>%
mutate(Ranges=create_ranges(interval, nrow(.))) %>%
select(Ranges, everything()) %>%
group_by(Ranges) %>%
summarise(Range_sum=sum(z, na.rm=TRUE))
Output
# A tibble: 4 x 2
Ranges Range_sum
<chr> <dbl>
1 range_1 387394072.
2 range_2 367983984.
3 range_3 319120456.
4 range_4 222001386.