I have the following code:
library(zoo)
library(xts)
df1<-structure(list(Date = structure(c(13523, 13532, 13539, 13551,
13565, 13567, 13579, 13588, 13600, 13607, 13616, 13628, 13637,
13656, 13658, 13670, 13686, 13691, 13698, 13705, 13721, 13735,
13768, 13770, 13783, 13789, 13797, 13811, 13819, 13824, 13838,
13846, 13852, 13860), class = "Date"), Category = c("Type 1",
"Type 2", "Type 1", "Type 1", "Type 1", "Type 2", "Type 1", "Type 3",
"Type 1", "Type 1", "Type 2", "Type 1", "Type 1", "Type 1", "Type 2",
"Type 1", "Type 3", "Type 1", "Type 1", "Type 1", "Type 1", "Type 2",
"Type 1", "Type 3", "Type 1", "Type 1", "Type 1", "Type 1", "Type 2",
"Type 1", "Type 1", "Type 1", "Type 3", "Type 2"), Value = c(2250,
1200, 625, 2250, 1000, 2750, 2250, 2750, 950, 2000, 1100, 950,
2250, 1000, 2500, 2250, 2500, 1000, 2250, 1200, 700, 2500, 2000,
2500, 900, 2250, 1200, 925, 2500, 2250, 750, 2000, 2500, 950)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -34L), groups = structure(list(
Date = structure(c(13523, 13532, 13539, 13551, 13565, 13567,
13579, 13588, 13600, 13607, 13616, 13628, 13637, 13656, 13658,
13670, 13686, 13691, 13698, 13705, 13721, 13735, 13768, 13770,
13783, 13789, 13797, 13811, 13819, 13824, 13838, 13846, 13852,
13860), class = "Date"), .rows = structure(list(1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -34L), .drop = TRUE))
I've created a rolling_sum by month for this particular dataset using:
df_month <- df1 %>%
group_by(Category, Month = format(Date, "%Y-%m-%d")) %>%
summarize(Rolling_Sum = sum(Value))
df_month$Month <- as.yearmon(df_month$Month)
In preparation for a conversion to an xts format I'd like to pivot-wider and replace all null/NAs values with 0. However the pivot-wider seems to break the dataset, making the null replacement and xts conversion impossible:
df_turned <- df_month %>% group_by(Category) %>% pivot_wider(names_from = Category, values_from = Rolling_Sum, id_cols = Month)
If that had worked, I would have done:
df_turned <- df_turned %>% replace(.=="NULL", 0)
Then:
df_turned <- xts(df_turned, order.by = df_turned$Month)
Any advice most appreciated.
CodePudding user response:
If we don't want duplicates, then use values_fn
library(tidyr)
library(dplyr)
df_turned <- df_month %>%
ungroup %>%
pivot_wider(names_from = Category, values_from = Rolling_Sum,
values_fn = sum, values_fill = 0)
-output
df_turned
# A tibble: 12 × 4
Month `Type 1` `Type 2` `Type 3`
<yearmon> <dbl> <dbl> <dbl>
1 Jan 2007 2875 1200 0
2 Feb 2007 3250 2750 0
3 Mar 2007 3200 0 2750
4 Apr 2007 2950 1100 0
5 May 2007 3250 2500 0
6 Jun 2007 3250 0 2500
7 Jul 2007 4150 0 0
8 Sep 2007 2900 0 2500
9 Oct 2007 4375 0 0
10 Nov 2007 5000 2500 0
11 Aug 2007 0 2500 0
12 Dec 2007 0 950 2500
Now, we can convert to xts
xts(df_turned[-1], order.by = df_turned$Month)
Type 1 Type 2 Type 3
Jan 2007 2875 1200 0
Feb 2007 3250 2750 0
Mar 2007 3200 0 2750
Apr 2007 2950 1100 0
May 2007 3250 2500 0
Jun 2007 3250 0 2500
Jul 2007 4150 0 0
Aug 2007 0 2500 0
Sep 2007 2900 0 2500
Oct 2007 4375 0 0
Nov 2007 5000 2500 0
Dec 2007 0 950 2500
CodePudding user response:
As indicated in my comment, your problem is that you create duplicates because as.yearmon
is called after the grouping by "Month". You are de facto grouping by "Date". We could do:
library(dplyr)
library(tidyr)
df1 |>
group_by(Category,
Month = as.yearmon(Date)) |>
pivot_wider(names_from = Category,
values_from = Value,
values_fn = sum,
values_fill = 0
) |>
select(-Date) # Or mutate "Date" above instead of creating "Month".
Then call xts
.
Month = as.yearmon(Date)
shouldn't cause a problem if Date is a date-type. However, if it is causing trouble as you indicate in your comment, as.yearmon(format(Date, "%Y-%m-%d"))
.
Output:
# A tibble: 12 × 4
Month `Type 1` `Type 2` `Type 3`
<yearmon> <dbl> <dbl> <dbl>
1 Jan 2007 2875 1200 0
2 Feb 2007 3250 2750 0
3 Mar 2007 3200 0 2750
4 Apr 2007 2950 1100 0
5 May 2007 3250 2500 0
6 Jun 2007 3250 0 2500
7 Jul 2007 4150 0 0
8 Sep 2007 2900 0 2500
9 Oct 2007 4375 0 0
10 Nov 2007 5000 2500 0
11 Aug 2007 0 2500 0
12 Dec 2007 0 950 2500
Update After @akrun updated answer with a similar solution, my solution seems more verbose. The reason is that my approach works directly on the df1
object and solves the problem there.
CodePudding user response:
Use read.zoo
like this:
library(zoo)
df_month |>
read.zoo(index = "Month", split = "Category", aggregate = sum) |>
na.fill(0)
giving this zoo object -- as.xts
can be used to convert that to xts if needed.
Type 1 Type 2 Type 3
Jan 2007 2875 1200 0
Feb 2007 3250 2750 0
Mar 2007 3200 0 2750
Apr 2007 2950 1100 0
May 2007 3250 2500 0
Jun 2007 3250 0 2500
Jul 2007 4150 0 0
Aug 2007 0 2500 0
Sep 2007 2900 0 2500
Oct 2007 4375 0 0
Nov 2007 5000 2500 0
Dec 2007 0 950 2500
or directly from df1
modified from the comment below
df1 |>
read.zoo(df1, FUN = as.yearmon, split = "Category", aggregate = sum) |>
na.fill(0)
Note
df_month
from question in immediately reproducible form
df_month <-
structure(list(Category = c("Type 1", "Type 1", "Type 1", "Type 1",
"Type 1", "Type 1", "Type 1", "Type 1", "Type 1", "Type 1", "Type 1",
"Type 1", "Type 1", "Type 1", "Type 1", "Type 1", "Type 1", "Type 1",
"Type 1", "Type 1", "Type 1", "Type 1", "Type 1", "Type 2", "Type 2",
"Type 2", "Type 2", "Type 2", "Type 2", "Type 2", "Type 3", "Type 3",
"Type 3", "Type 3"), Month = structure(c(2007, 2007, 2007.08333333333,
2007.08333333333, 2007.16666666667, 2007.16666666667, 2007.25,
2007.25, 2007.33333333333, 2007.33333333333, 2007.41666666667,
2007.41666666667, 2007.5, 2007.5, 2007.5, 2007.66666666667, 2007.66666666667,
2007.75, 2007.75, 2007.75, 2007.83333333333, 2007.83333333333,
2007.83333333333, 2007, 2007.08333333333, 2007.25, 2007.33333333333,
2007.58333333333, 2007.83333333333, 2007.91666666667, 2007.16666666667,
2007.41666666667, 2007.66666666667, 2007.91666666667), class = "yearmon"),
Rolling_Sum = c(2250, 625, 2250, 1000, 2250, 950, 2000, 950,
2250, 1000, 2250, 1000, 2250, 1200, 700, 2000, 900, 2250,
1200, 925, 2250, 750, 2000, 1200, 2750, 1100, 2500, 2500,
2500, 950, 2750, 2500, 2500, 2500)), row.names = c(NA, -34L
), groups = structure(list(Category = c("Type 1", "Type 2", "Type 3"
), .rows = structure(list(1:23, 24:30, 31:34), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -3L), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))