I was wondering if there is a possibility to group data which is in weeks into months for simple consolidation. The data frame looks like:
structure(list(c("PCE", "PCE", "PCE", "PCE", "PCE", "PCE"), c("3",
"0", "0", "0", "0", "0"), c("2", "0", "0", "0", "0", "0"), c("4",
"0", "0", "0", "0", "0"), c("5", "0", "0", "0", "0", "0"), c("3",
"0", "2", "0", "0", "0"), c("3", "0", "0", "0", "0", "0"), c("2",
"0", "0", "0", "0", "0"), c("8", "0", "0", "0", "0", "0"), c("9",
"0", "0", "0", "0", "0"), c("6", "0", "0", "0", "0", "0"), c("5",
"0", "2", "0", "0", "0"), c("6", "0", "0", "0", "0", "0"), c("9",
"4", "1", "0", "0", "0"), c("4", "0", "0", "0", "0", "0"), c("3",
"1", "0", "0", "0", "0"), c("2", "0", "0", "0", "1", "0"), c("3",
"0", "0", "0", "1", "0"), c("3", "0", "0", "0", "2", "0"), c("2",
"0", "0", "0", "1", "0"), c("2", "0", "0", "0", "1", "0"), c("0",
"0", "0", "0", "2", "1"), c("0", "0", "0", "1", "1", "0"), c("0",
"0", "0", "1", "2", "0"), c("0", "0", "0", "1", "2", "0"), c("2",
"0", "0", "3", "1", "0"), c("3", "0", "0", "2", "1", "0"), c("3",
"0", "0", "0", "0", "0"), c("3", "0", "0", "0", "0", "0"), c("3",
"0", "0", "0", "1", "0"), c("3", "0", "0", "0", "1", "0"), c("3",
"0", "0", "0", "0", "0"), c("3", "0", "0", "0", "0", "0"), c("3",
"0", "0", "0", "0", "0"), c("3", "0", "0", "0", "1", "0"), c("3",
"0", "0", "0", "0", "0"), c("3", "0", "0", "0", "0", "0"), c("3",
"0", "0", "0", "0", "0"), c("1", "0", "0", "0", "2", "0"), c("3",
"0", "0", "0", "0", "0"), c("3", "0", "0", "0", "1", "0"), c("3",
"0", "0", "0", "0", "0"), c("1", "0", "0", "0", "0", "0"), c("3",
"0", "0", "0", "1", "0"), c("3", "0", "0", "0", "1", "0"), c("3",
"0", "0", "0", "0", "0"), c("3", "0", "0", "0", "0", "0"), c("2",
"0", "0", "0", "0", "0"), c("3", "0", "0", "0", "0", "0"), c("3",
"0", "0", "0", "0", "0"), c("3", "0", "0", "0", "1", "0"), c("156",
"5", "5", "8", "24", "1"), c(0, 0, 0, 0, 0, 0), c(0, 0, 0, 0,
0, 0), c(0, 0, 0, 0, 0, 0), c(0, 0, 0, 0, 0, 0), c(0, 0, 0, 0,
0, 0), c(0, 0, 0, 0, 0, 0), c(0, 0, 0, 0, 0, 0), c(0, 0, 0, 0,
0, 0), c(0, 0, 0, 0, 0, 0), c(0, 0, 0, 0, 0, 0), c(0, 0, 0, 0,
0, 0)), names = c("Calendar year / week", "2022-09-12", "2022-09-19",
"2022-09-26", "2022-10-03", "2022-10-10", "2022-10-17", "2022-10-24",
"2022-10-31", "2022-11-07", "2022-11-14", "2022-11-21", "2022-11-28",
"2022-12-05", "2022-12-12", "2022-12-19", "2022-12-26", "2023-01-02",
"2023-01-09", "2023-01-16", "2023-01-23", "2023-01-30", "2023-02-06",
"2023-02-13", "2023-02-20", "2023-02-27", "2023-03-06", "2023-03-13",
"2023-03-20", "2023-03-27", "2023-04-03", "2023-04-10", "2023-04-17",
"2023-04-24", "2023-05-01", "2023-05-08", "2023-05-15", "2023-05-22",
"2023-05-29", "2023-06-05", "2023-06-12", "2023-06-19", "2023-06-26",
"2023-07-03", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
The above mentioned dataframe look like this image below:
The Excepted Output is this:
I would like to know how to aggregate the weekly data together into monthly data as well the sum of the value under it.
CodePudding user response:
We may remove the columns with names that are NA
(complete.cases
), then reshape to long with pivot_longer
, conver the 'week' names to month
by using floor_date
(from lubridate
), and do a group by sum
on the 'value' and reshape back to wide with pivot_wider
library(dplyr)
library(tidyr)
library(lubridate)
# remove the columns with NA as column names
df[complete.cases(names(df))] %>%
# create a row sequence identifier
mutate(rn = row_number()) %>%
# reshape to long format by selecting columns other than the 1st and rn
pivot_longer(cols = -c(1, rn), names_to = 'week') %>%
# grouped by rn, 1st column and week dates floored to month
group_by(rn, `Calendar year / week`,
month = floor_date(ymd(week), 'month')) %>%
# get the sum of value
summarise(value = sum(as.numeric(value), na.rm = TRUE), .groups = 'drop') %>%
# reshape to wide format
pivot_wider(names_from = month, values_from = value) %>%
select(-rn)
-output
# A tibble: 6 × 12
`Calendar year /…` `2022-09-01` `2022-10-01` `2022-11-01` `2022-12-01` `2023-01-01` `2023-02-01` `2023-03-01` `2023-04-01` `2023-05-01`
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 PCE 9 21 26 18 10 2 12 12 13
2 PCE 0 0 0 5 0 0 0 0 0
3 PCE 0 2 2 1 0 0 0 0 0
4 PCE 0 0 0 0 0 6 2 0 0
5 PCE 0 0 0 1 7 6 2 1 3
6 PCE 0 0 0 0 1 0 0 0 0
# … with 2 more variables: `2023-06-01` <dbl>, `2023-07-01` <dbl>