Home > Net >  R:how to split date and create new variable
R:how to split date and create new variable

Time:04-08

simple example

dat=structure(list(var = c(78L, 84L, 4L, 22L, 19L, 18L, 85L, 77L, 
    62L, 19L, 70L, 92L, 18L, 25L, 30L, 42L, 98L, 87L, 32L, 36L, 68L, 
    44L, 79L), dt1 = c("28.03.2022 10:00", "28.03.2022 12:00", 
    "28.03.2022 12:00", "29.03.2022 8:00", "29.03.2022 12:00", "29.03.2022 10:00", 
    "30.03.2022 10:00", "30.03.2022 12:00", "30.03.2022 12:00", "31.03.2022 10:00", 
    "31.03.2022 11:00", "31.03.2022 12:00", "01.04.2022 8:00", "01.04.2022 8:00", 
    "01.04.2022 10:00", "01.04.2022 12:00", "01.04.2022 12:00", "02.04.2022 10:00", 
    "02.04.2022 12:00", "02.04.2022 12:00", "03.04.2022 10:00", "03.04.2022 12:00", 
    "03.04.2022 12:00"), dt2 = c("28.03.2022 20:00", "28.03.2022 22:00", 
    "28.03.2022 22:00", "29.03.2022 10:00", "29.03.2022 22:00", "29.03.2022 20:00", 
    "30.03.2022 20:00", "30.03.2022 22:00", "30.03.2022 22:00", "31.03.2022 20:00", 
    "31.03.2022 21:00", "31.03.2022 22:00", "01.04.2022 10:00", "01.04.2022 10:00", 
    "01.04.2022 20:00", "01.04.2022 22:00", "01.04.2022 22:00", "02.04.2022 20:00", 
    "02.04.2022 22:00", "02.04.2022 22:00", "03.04.2022 20:00", "03.04.2022 22:00", 
    "03.04.2022 22:00")), class = "data.frame", row.names = c(NA, 
    -23L))

dt1 and dt2 has format 30.03.2022 12:00. How part with date separate from hours and minutes and select it into new colum date , but part with hh and mm left in their columns dt1 and dt2, but change date into yyyy-mm-dd? In this data output for me should be

date              dt1     dt2
2022.03.28  10:00   20:00
2022.03.28  12:00   22:00
2022.03.28  12:00   22:00
2022.03.29  8:00    10:00
2022.03.29  12:00   22:00
2022.03.29  10:00   20:00
2022.03.30  10:00   20:00
2022.03.30  12:00   22:00
2022.03.31  12:00   22:00
2022.03.31  10:00   20:00
2022.03.31  11:00   21:00
2022.04.01  12:00   22:00
2022.04.01  8:00    10:00
2022.04.01  8:00    10:00
2022.04.01  10:00   20:00
2022.04.01  12:00   22:00
2022.04.01  12:00   22:00
2022.04.02  10:00   20:00
2022.04.02  12:00   22:00
2022.04.02  12:00   22:00
2022.04.03  10:00   20:00
2022.04.03  12:00   22:00
2022.04.03  12:00   22:00

How can i do it simple? Thank you.

CodePudding user response:

dplyr

library(dplyr)
library(tidyr) # separate
dat %>%
  separate(dt1, into = c("date1", "dt1"), sep = " ") %>%
  separate(dt2, into = c("date2", "dt2"), sep = " ")
#    var      date1   dt1      date2   dt2
# 1   78 28.03.2022 10:00 28.03.2022 20:00
# 2   84 28.03.2022 12:00 28.03.2022 22:00
# 3    4 28.03.2022 12:00 28.03.2022 22:00
# 4   22 29.03.2022  8:00 29.03.2022 10:00
# 5   19 29.03.2022 12:00 29.03.2022 22:00
# 6   18 29.03.2022 10:00 29.03.2022 20:00
# 7   85 30.03.2022 10:00 30.03.2022 20:00
# 8   77 30.03.2022 12:00 30.03.2022 22:00
# 9   62 30.03.2022 12:00 30.03.2022 22:00
# 10  19 31.03.2022 10:00 31.03.2022 20:00
# 11  70 31.03.2022 11:00 31.03.2022 21:00
# 12  92 31.03.2022 12:00 31.03.2022 22:00
# 13  18 01.04.2022  8:00 01.04.2022 10:00
# 14  25 01.04.2022  8:00 01.04.2022 10:00
# 15  30 01.04.2022 10:00 01.04.2022 20:00
# 16  42 01.04.2022 12:00 01.04.2022 22:00
# 17  98 01.04.2022 12:00 01.04.2022 22:00
# 18  87 02.04.2022 10:00 02.04.2022 20:00
# 19  32 02.04.2022 12:00 02.04.2022 22:00
# 20  36 02.04.2022 12:00 02.04.2022 22:00
# 21  68 03.04.2022 10:00 03.04.2022 20:00
# 22  44 03.04.2022 12:00 03.04.2022 22:00
# 23  79 03.04.2022 12:00 03.04.2022 22:00

base R

dat <- cbind(
  dat[1],
  strcapture("(.*) (.*)", dat$dt1, list(date1="", dt1="")),
  strcapture("(.*) (.*)", dat$dt2, list(date2="", dt2=""))
)

data.table

library(data.table)
DT <- as.data.table(dat) # setDT(dat) is canonical/preferred
DT[, c("date1", "dt1") := tstrsplit(dt1, " ")
  ][, c("date2", "dt2") := tstrsplit(dt2, " ")]

CodePudding user response:

Using strptime and strftime if you can cope with the leading zero 08:00. The new date column has "Date" format. The stopifnot stops if dates are not equal.

tm <- lapply(dat[2:3], strptime, '%d.%m.%Y %R')
stopifnot(all.equal(dt <- as.Date(tm[[1]]), as.Date(tm[[2]])))  
cbind(dat[1], date=dt, dt1=strftime(tm[[1]], '%R'), dt2=strftime(tm[[2]], '%R'))
#    var       date   dt1   dt2
# 1   78 2022-03-28 10:00 20:00
# 2   84 2022-03-28 12:00 22:00
# 3    4 2022-03-28 12:00 22:00
# 4   22 2022-03-29 08:00 10:00
# 5   19 2022-03-29 12:00 22:00
# 6   18 2022-03-29 10:00 20:00
# 7   85 2022-03-30 10:00 20:00
# 8   77 2022-03-30 12:00 22:00
# 9   62 2022-03-30 12:00 22:00
# 10  19 2022-03-31 10:00 20:00
# 11  70 2022-03-31 11:00 21:00
# 12  92 2022-03-31 12:00 22:00
# 13  18 2022-04-01 08:00 10:00
# 14  25 2022-04-01 08:00 10:00
# 15  30 2022-04-01 10:00 20:00
# 16  42 2022-04-01 12:00 22:00
# 17  98 2022-04-01 12:00 22:00
# 18  87 2022-04-02 10:00 20:00
# 19  32 2022-04-02 12:00 22:00
# 20  36 2022-04-02 12:00 22:00
# 21  68 2022-04-03 10:00 20:00
# 22  44 2022-04-03 12:00 22:00
# 23  79 2022-04-03 12:00 22:00
  •  Tags:  
  • r
  • Related