df1=structure(list(date = c("22.04.2022", "22.04.2022", "22.04.2022",
"22.04.2022", "23.04.2022", "23.04.2022", "23.04.2022", "24.04.2022",
"24.04.2022", "24.04.2022"), d1 = c("8:00:00", "10:00:00", "12:00:00",
"12:00:00", "10:00:00", "12:00:00", "12:00:00", "10:00:00", "12:00:00",
"12:00:00"), d2 = c("10:00:00", "20:00:00", "22:00:00", "22:00:00",
"20:00:00", "22:00:00", "22:00:00", "20:00:00", "22:00:00", "22:00:00"
)), class = "data.frame", row.names = c(NA, -10L))
here 3 columns with date format, all of them i need convert to integer, so that desired output will be
date d1 d2 date1 d1_1 d2_1
22.04.2022 8:00:00 10:00:00 20220422 8 10
22.04.2022 10:00:00 20:00:00 20220422 10 20
22.04.2022 12:00:00 22:00:00 20220422 12 22
22.04.2022 12:00:00 22:00:00 20220422 12 22
23.04.2022 10:00:00 20:00:00 20220423 10 20
23.04.2022 12:00:00 22:00:00 20220423 12 22
23.04.2022 12:00:00 22:00:00 20220423 12 22
24.04.2022 10:00:00 20:00:00 20220424 10 20
24.04.2022 12:00:00 22:00:00 20220424 12 22
24.04.2022 12:00:00 22:00:00 20220424 12 22
where date1 d1_1 d2
are integer from date d1 d2
How can it can be done better? thank you.
CodePudding user response:
A possible solution:
library(tidyverse)
library(lubridate)
df1 %>%
mutate(date1 = dmy(date) %>% str_remove_all("-"),
across(matches("d[1-2]"), ~ str_extract(.x, "^\\d (?=\\:)"),
.names = "{.col}_1"))
#> date d1 d2 date1 d1_1 d2_1
#> 1 22.04.2022 8:00:00 10:00:00 20220422 8 10
#> 2 22.04.2022 10:00:00 20:00:00 20220422 10 20
#> 3 22.04.2022 12:00:00 22:00:00 20220422 12 22
#> 4 22.04.2022 12:00:00 22:00:00 20220422 12 22
#> 5 23.04.2022 10:00:00 20:00:00 20220423 10 20
#> 6 23.04.2022 12:00:00 22:00:00 20220423 12 22
#> 7 23.04.2022 12:00:00 22:00:00 20220423 12 22
#> 8 24.04.2022 10:00:00 20:00:00 20220424 10 20
#> 9 24.04.2022 12:00:00 22:00:00 20220424 12 22
#> 10 24.04.2022 12:00:00 22:00:00 20220424 12 22
CodePudding user response:
We may also convert to Date
class and use format
as well as use hour
to extract the hour part
library(dplyr)
library(lubridate)
df1 %>%
mutate(date1 = format(dmy(date), '%Y%m%d'),
across(d1:d2, ~ hour(hms(.x)), .names = "{.col}_1"))
-output
date d1 d2 date1 d1_1 d2_1
1 22.04.2022 8:00:00 10:00:00 20220422 8 10
2 22.04.2022 10:00:00 20:00:00 20220422 10 20
3 22.04.2022 12:00:00 22:00:00 20220422 12 22
4 22.04.2022 12:00:00 22:00:00 20220422 12 22
5 23.04.2022 10:00:00 20:00:00 20220423 10 20
6 23.04.2022 12:00:00 22:00:00 20220423 12 22
7 23.04.2022 12:00:00 22:00:00 20220423 12 22
8 24.04.2022 10:00:00 20:00:00 20220424 10 20
9 24.04.2022 12:00:00 22:00:00 20220424 12 22
10 24.04.2022 12:00:00 22:00:00 20220424 12 22