How correct convert date format in integer format in R-CodePudding

df1=structure(list(date = c("22.04.2022", "22.04.2022", "22.04.2022", 
"22.04.2022", "23.04.2022", "23.04.2022", "23.04.2022", "24.04.2022", 
"24.04.2022", "24.04.2022"), d1 = c("8:00:00", "10:00:00", "12:00:00", 
"12:00:00", "10:00:00", "12:00:00", "12:00:00", "10:00:00", "12:00:00", 
"12:00:00"), d2 = c("10:00:00", "20:00:00", "22:00:00", "22:00:00", 
"20:00:00", "22:00:00", "22:00:00", "20:00:00", "22:00:00", "22:00:00"
)), class = "data.frame", row.names = c(NA, -10L))

here 3 columns with date format, all of them i need convert to integer, so that desired output will be

date         d1          d2         date1      d1_1 d2_1
22.04.2022  8:00:00     10:00:00    20220422    8   10
22.04.2022  10:00:00    20:00:00    20220422    10  20
22.04.2022  12:00:00    22:00:00    20220422    12  22
22.04.2022  12:00:00    22:00:00    20220422    12  22
23.04.2022  10:00:00    20:00:00    20220423    10  20
23.04.2022  12:00:00    22:00:00    20220423    12  22
23.04.2022  12:00:00    22:00:00    20220423    12  22
24.04.2022  10:00:00    20:00:00    20220424    10  20
24.04.2022  12:00:00    22:00:00    20220424    12  22
24.04.2022  12:00:00    22:00:00    20220424    12  22

where date1 d1_1 d2 are integer from date d1 d2

How can it can be done better? thank you.

CodePudding user response：

A possible solution:

library(tidyverse)
library(lubridate)

df1 %>% 
  mutate(date1 = dmy(date) %>% str_remove_all("-"),
         across(matches("d[1-2]"), ~ str_extract(.x, "^\\d (?=\\:)"),
                .names = "{.col}_1"))

#>          date       d1       d2    date1 d1_1 d2_1
#> 1  22.04.2022  8:00:00 10:00:00 20220422    8   10
#> 2  22.04.2022 10:00:00 20:00:00 20220422   10   20
#> 3  22.04.2022 12:00:00 22:00:00 20220422   12   22
#> 4  22.04.2022 12:00:00 22:00:00 20220422   12   22
#> 5  23.04.2022 10:00:00 20:00:00 20220423   10   20
#> 6  23.04.2022 12:00:00 22:00:00 20220423   12   22
#> 7  23.04.2022 12:00:00 22:00:00 20220423   12   22
#> 8  24.04.2022 10:00:00 20:00:00 20220424   10   20
#> 9  24.04.2022 12:00:00 22:00:00 20220424   12   22
#> 10 24.04.2022 12:00:00 22:00:00 20220424   12   22

CodePudding user response：

We may also convert to Date class and use format as well as use hour to extract the hour part

library(dplyr)
library(lubridate)
df1 %>%
   mutate(date1 = format(dmy(date), '%Y%m%d'),
     across(d1:d2, ~ hour(hms(.x)), .names = "{.col}_1"))

-output

         date       d1       d2    date1 d1_1 d2_1
1  22.04.2022  8:00:00 10:00:00 20220422    8   10
2  22.04.2022 10:00:00 20:00:00 20220422   10   20
3  22.04.2022 12:00:00 22:00:00 20220422   12   22
4  22.04.2022 12:00:00 22:00:00 20220422   12   22
5  23.04.2022 10:00:00 20:00:00 20220423   10   20
6  23.04.2022 12:00:00 22:00:00 20220423   12   22
7  23.04.2022 12:00:00 22:00:00 20220423   12   22
8  24.04.2022 10:00:00 20:00:00 20220424   10   20
9  24.04.2022 12:00:00 22:00:00 20220424   12   22
10 24.04.2022 12:00:00 22:00:00 20220424   12   22