Home > Software engineering >  split string column into year month and day
split string column into year month and day

Time:10-14

I have a dataset in which the info column looks similar to the data below. How can I split it into year, month and day column?

Code:

  df = structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8), info = c("PRISM_ppt_provisional_4kmD2_20220925_bil", 
    "PRISM_ppt_provisional_4kmD2_20220926_bil", "PRISM_ppt_provisional_4kmD2_20220927_bil", 
    "PRISM_ppt_provisional_4kmD2_20220928_bil", "PRISM_ppt_provisional_4kmD2_20220929_bil", 
    "PRISM_ppt_provisional_4kmD2_20220930_bil", "PRISM_ppt_provisional_4kmD2_20220925_bil", 
    "PRISM_ppt_provisional_4kmD2_20220926_bil")), class = "data.frame", row.names = c(NA, 
    -8L))
    
desired_df = structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8), info = c("PRISM_ppt_provisional_4kmD2_20220925_bil", 
"PRISM_ppt_provisional_4kmD2_20220926_bil", "PRISM_ppt_provisional_4kmD2_20220927_bil", 
"PRISM_ppt_provisional_4kmD2_20220928_bil", "PRISM_ppt_provisional_4kmD2_20220929_bil", 
"PRISM_ppt_provisional_4kmD2_20220930_bil", "PRISM_ppt_provisional_4kmD2_20220925_bil", 
"PRISM_ppt_provisional_4kmD2_20220926_bil"), year = c(2022, 2022, 
2022, 2022, 2022, 2022, 2022, 2022), month = c(9, 9, 9, 9, 9, 
9, 9, 9), day = c(25, 26, 27, 28, 29, 30, 25, 26)), class = "data.frame", row.names = c(NA, 
-8L))

    # Extract year, month and day from info column
    df = separate(df, info, into = c("year", "month", "day"), sep = ?, convert = T)

CodePudding user response:

It's better to use extract in this case:

library(tidyr)
df %>% 
  extract(info, "PRISM_ppt_provisional_4kmD2_(\\d{4})(\\d{2})(\\d{2})_bil",
          into = c("year", "month", "day"), remove = F)

#   id                                     info year month day
# 1  1 PRISM_ppt_provisional_4kmD2_20220925_bil 2022    09  25
# 2  2 PRISM_ppt_provisional_4kmD2_20220926_bil 2022    09  26
# 3  3 PRISM_ppt_provisional_4kmD2_20220927_bil 2022    09  27
# 4  4 PRISM_ppt_provisional_4kmD2_20220928_bil 2022    09  28
# 5  5 PRISM_ppt_provisional_4kmD2_20220929_bil 2022    09  29
# 6  6 PRISM_ppt_provisional_4kmD2_20220930_bil 2022    09  30
# 7  7 PRISM_ppt_provisional_4kmD2_20220925_bil 2022    09  25
# 8  8 PRISM_ppt_provisional_4kmD2_20220926_bil 2022    09  26

If your end goal is to create a date column, then this might be better:

library(tidyr)
library(lubridate)
df %>% 
  extract(info, "PRISM_ppt_provisional_4kmD2_(.*)_bil",
          into = "date", remove = F) %>% 
  mutate(date = ymd(date),
         year = year(date),
         month = month(date),
         day = day(date))
  • Related