Is there a way to convert dataframe to time series (same like mdeaths
)
df1 <- structure(list(Year = c(2021, 2022, 2020, 2021, 2022, 2020, 2021,
2020, 2021, 2020, 2021, 2020, 2021, 2020, 2021, 2020, 2021, 2020,
2021, 2020, 2021, 2020, 2021, 2020, 2021), mon_day = c("January",
"January", "February", "February", "February", "March", "March",
"April", "April", "May", "May", "June", "June", "July", "July",
"August", "August", "September", "September", "October", "October",
"November", "November", "December", "December"), `R` = c(-427,
-389, 18, -357, -230, -383, -259, -1233, -232, -875, -190, -489,
-299, -448, -577, -662, -762, -552, -621, -488, -409, -290, -76,
-155, -10)), row.names = c(NA, -25L), groups = structure(list(
Year = c(2020, 2021, 2022), .rows = structure(list(c(3L,
6L, 8L, 10L, 12L, 14L, 16L, 18L, 20L, 22L, 24L), c(1L, 4L,
7L, 9L, 11L, 13L, 15L, 17L, 19L, 21L, 23L, 25L), c(2L, 5L
)), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr",
"list"))), row.names = c(NA, -3L), class = c("tbl_df", "tbl",
"data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"))
expected output
df1_ts
Jan Feb Mar
2020 -- -- --
2021 -- -- --
2022 -- -- --
It should be a timeseries data?
CodePudding user response:
In base R
, we can do
dates <- with(df1, as.Date(paste(Year, mon_day, 1), "%Y %B %d"))
df2 <- df1[order(dates),]
ts1 <- ts(df2$R, start = c(df2$Year[1], match(df2$mon_day[1], month.name)),
frequency = 12)
ts1
Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
2020 18 -383 -1233 -875 -489 -448 -662 -552 -488 -290 -155
2021 -427 -357 -259 -232 -190 -299 -577 -762 -621 -409 -76 -10
2022 -389 -230
> str(ts1)
Time-Series [1:25] from 2020 to 2022: 18 -383 -1233 -875 -489 ...
CodePudding user response:
You can try this approach using dplyr
and tidyr
. Convert to wide format and sort by Year. Then convert to time_series.
df_ts = df1 %>%
pivot_wider(Year,names_from=mon_day,values_from=R) %>%
arrange(Year)
df_ts = ts(df_wide[,-1], start=df_wide[1,1], end=df_wide[nrow(df_wide),1])
Output:
Time Series:
Start = 2020
End = 2022
Frequency = 1
January February March April May June July August September October November December
2020 NA 18 -383 -1233 -875 -489 -448 -662 -552 -488 -290 -155
2021 -427 -357 -259 -232 -190 -299 -577 -762 -621 -409 -76 -10
2022 -389 -230 NA NA NA NA NA NA NA NA NA NA
Class:
class(df_ts)
[1] "mts" "ts" "matrix"
CodePudding user response:
Try this...
library(tidyr)
library(dplyr)
df1 %>%
mutate(mon_day = factor(mon_day,
levels =c("January", "February", "March",
"April", "May", "June", "July",
"August", "September", "October",
"November", "December")))%>%
spread(mon_day, R) %>%
replace(is.na(.), 0)