mydata <- data.frame(co = c(2.3,3.1,5.7), b = c(3,5,6), mydate = c(as.Date ("2015-10-01"), as.Date ("2015-11-01"), as.Date ("2015-12-01")))
I would like to add and fill columns based on a date. I want the final output to look something like this:
co b mydate m1 m2 m3 m4 m5 m6 m7 m8 m9 m10 m11 m12
1 2.3 3 2015-10-01 0 0 0 0 0 0 0 0 0 1 0 0
2 3.1 5 2015-11-01 0 0 0 0 0 0 0 0 0 0 1 0
3 5.7 6 2015-12-01 0 0 0 0 0 0 0 0 0 0 0 1
CodePudding user response:
For both, I add a for
loop that adds the missing months; if your real data is full-enough (all months are included), then you may not need it (or may not care even if a month is missing).
Note: (I missed this the first time) format(.., format="%m")
will zero-pad the months. I prefer zero-padded personally (for many reasons, including that it sorts correctly), but if you don't want that, then replace format(...)
with sub("^m0", "m", format(...))
.
base R (and reshape2)
out <- mydata |>
transform(m = format(mydate, format = "m%m"), v = 1L) |>
reshape2::dcast(formula = co b mydate ~ m, value.var = "v", fill = 0L)
for (col in paste0("m", 1:12)) if (!col %in% names(out)) out[[col]] <- 0L
out
# co b mydate m10 m11 m12 m1 m2 m3 m4 m5 m6 m7 m8 m9
# 1 2.3 3 2015-10-01 1 0 0 0 0 0 0 0 0 0 0 0
# 2 3.1 5 2015-11-01 0 1 0 0 0 0 0 0 0 0 0 0
# 3 5.7 6 2015-12-01 0 0 1 0 0 0 0 0 0 0 0 0
dplyr
library(dplyr)
out <- mydata %>%
mutate(m = format(mydate, format = "m%m"), v = 1L) %>%
tidyr::pivot_wider(c(co, b, mydate), names_from = "m", values_from = "v", values_fill = 0L)
for (col in paste0("m", 1:12)) if (!col %in% names(out)) out[[col]] <- 0L
out
# # A tibble: 3 x 15
# co b mydate m10 m11 m12 m1 m2 m3 m4 m5 m6 m7 m8 m9
# <dbl> <dbl> <date> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
# 1 2.3 3 2015-10-01 1 0 0 0 0 0 0 0 0 0 0 0
# 2 3.1 5 2015-11-01 0 1 0 0 0 0 0 0 0 0 0 0
# 3 5.7 6 2015-12-01 0 0 1 0 0 0 0 0 0 0 0 0