I have a dataset in which the info
column looks similar to the data below. How can I split it into year, month and day column?
Code:
df = structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8), info = c("PRISM_ppt_provisional_4kmD2_20220925_bil",
"PRISM_ppt_provisional_4kmD2_20220926_bil", "PRISM_ppt_provisional_4kmD2_20220927_bil",
"PRISM_ppt_provisional_4kmD2_20220928_bil", "PRISM_ppt_provisional_4kmD2_20220929_bil",
"PRISM_ppt_provisional_4kmD2_20220930_bil", "PRISM_ppt_provisional_4kmD2_20220925_bil",
"PRISM_ppt_provisional_4kmD2_20220926_bil")), class = "data.frame", row.names = c(NA,
-8L))
desired_df = structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8), info = c("PRISM_ppt_provisional_4kmD2_20220925_bil",
"PRISM_ppt_provisional_4kmD2_20220926_bil", "PRISM_ppt_provisional_4kmD2_20220927_bil",
"PRISM_ppt_provisional_4kmD2_20220928_bil", "PRISM_ppt_provisional_4kmD2_20220929_bil",
"PRISM_ppt_provisional_4kmD2_20220930_bil", "PRISM_ppt_provisional_4kmD2_20220925_bil",
"PRISM_ppt_provisional_4kmD2_20220926_bil"), year = c(2022, 2022,
2022, 2022, 2022, 2022, 2022, 2022), month = c(9, 9, 9, 9, 9,
9, 9, 9), day = c(25, 26, 27, 28, 29, 30, 25, 26)), class = "data.frame", row.names = c(NA,
-8L))
# Extract year, month and day from info column
df = separate(df, info, into = c("year", "month", "day"), sep = ?, convert = T)
CodePudding user response:
It's better to use extract
in this case:
library(tidyr)
df %>%
extract(info, "PRISM_ppt_provisional_4kmD2_(\\d{4})(\\d{2})(\\d{2})_bil",
into = c("year", "month", "day"), remove = F)
# id info year month day
# 1 1 PRISM_ppt_provisional_4kmD2_20220925_bil 2022 09 25
# 2 2 PRISM_ppt_provisional_4kmD2_20220926_bil 2022 09 26
# 3 3 PRISM_ppt_provisional_4kmD2_20220927_bil 2022 09 27
# 4 4 PRISM_ppt_provisional_4kmD2_20220928_bil 2022 09 28
# 5 5 PRISM_ppt_provisional_4kmD2_20220929_bil 2022 09 29
# 6 6 PRISM_ppt_provisional_4kmD2_20220930_bil 2022 09 30
# 7 7 PRISM_ppt_provisional_4kmD2_20220925_bil 2022 09 25
# 8 8 PRISM_ppt_provisional_4kmD2_20220926_bil 2022 09 26
If your end goal is to create a date
column, then this might be better:
library(tidyr)
library(lubridate)
df %>%
extract(info, "PRISM_ppt_provisional_4kmD2_(.*)_bil",
into = "date", remove = F) %>%
mutate(date = ymd(date),
year = year(date),
month = month(date),
day = day(date))