I have a data like this:
library(lubridate)
df <- data.frame(
date = today() - days(1:1800),
x = runif(1800)
)
how i can split month into two half for each year, something like this
date x Yrmo_half
2021-11-01 0.900592335 21-11-half_1
2021-11-02 0.719112418 21-11-half_1
2021-11-03 0.871992620 21-11-half_1
.......... ....
.......... ....
.......... ....
.......... ....
2021-11-16 0.105625816 21-11-half_2
2021-11-17 0.267352196 21-11-half_2
2021-11-18 0.072306432 21-11-half_2
.......... ....
.......... ....
.......... ....
.......... ....
2021-12-01 0.900592335 21-12-half_1
2021-12-02 0.719112418 21-12-half_1
2021-12-03 0.871992620 21-12-half_1
.......... ....
.......... ....
.......... ....
.......... ....
Any ideas please
CodePudding user response:
Using substr
, substring
.
df <- transform(df, Yrmo_half=paste0(substr(date, 1, 8),
ifelse(as.numeric(substring(date, 9)) > 15,
'half_2', 'half_1')))
df[8:18, ]
# date x Yrmo_half
# 8 2021-12-20 0.1639289 2021-12-half_2
# 9 2021-12-19 0.4417741 2021-12-half_2
# 10 2021-12-18 0.5234077 2021-12-half_2
# 11 2021-12-17 0.4769192 2021-12-half_2
# 12 2021-12-16 0.5246433 2021-12-half_2
# 13 2021-12-15 0.6852282 2021-12-half_1
# 14 2021-12-14 0.3517920 2021-12-half_1
# 15 2021-12-13 0.8396313 2021-12-half_1
# 16 2021-12-12 0.9219137 2021-12-half_1
# 17 2021-12-11 0.1063096 2021-12-half_1
# 18 2021-12-10 0.2065289 2021-12-half_1
CodePudding user response:
One option would be to split based on whether a date is closer to the start or the end of the month:
library(lubridate)
set.seed(123)
df <- data.frame(
date = today() - days(1:1800),
x = runif(1800)
)
half_month <- function(x) {
from_start <- x - floor_date(x, unit = "month")
to_end <- ceiling_date(x, unit = "month") - x
y <- ifelse(from_start < to_end, "half_1", "half_2")
paste(format(x, "%y"), format(x, "%m"), y, sep = "-")
}
df$Yrmo_half <- half_month(df$date)
df[sample(seq(nrow(df)), 10),]
#> date x Yrmo_half
#> 138 2021-08-12 0.78628155 21-08-half_1
#> 1762 2017-03-02 0.01816313 17-03-half_1
#> 893 2019-07-19 0.66960355 19-07-half_2
#> 1574 2017-09-06 0.35866030 17-09-half_1
#> 1229 2018-08-17 0.85880034 18-08-half_2
#> 780 2019-11-09 0.80861204 19-11-half_1
#> 1315 2018-05-23 0.70487785 18-05-half_2
#> 1128 2018-11-26 0.56833801 18-11-half_2
#> 1211 2018-09-04 0.41654735 18-09-half_1
#> 1057 2019-02-05 0.65919793 19-02-half_1
CodePudding user response:
Here's a regex solution:
library(dplyr)
library(stringr)
df %>%
mutate(
# create temporary column `temp` with day value:
temp = as.numeric(str_extract(date, "\\d $")),
# convert `temp` value to "half_1" or "half_2" value:
temp = ifelse(temp <= 15, "half_1", "half_2"),
# extract year-month value and collate with `temp` value:
Yrmo_half = str_c(str_extract(date, "[0-9-] -"), temp)) %>%
# remove temporary column:
select(-temp)
CodePudding user response:
One way would be the following:
df$Yrmo_half <- ifelse(as.integer(format(df$date, '%d')) < 15,
paste0(format(df$date, '%y-%m'), '-half_1'),
paste0(format(df$date, '%y-%m'), '-half_2'))
which will yield:
# date x Yrmo_half
# 1 2021-12-27 0.1931067 21-12-half_2
# 2 2021-12-26 0.1684232 21-12-half_2
# 3 2021-12-25 0.1118781 21-12-half_2
# 4 2021-12-24 0.4864917 21-12-half_2
# 5 2021-12-23 0.9466433 21-12-half_2
# 6 2021-12-22 0.5007273 21-12-half_2