separate column into different columns-CodePudding

Some sample data:

df <- data.frame(xyz1_t1 = sample(1:5, size = 100, replace = TRUE), 
                 xyz1_t2 = sample(1:5, size = 100, replace = TRUE), 
                 xyz1_t3 = sample(1:5, size = 100, replace = TRUE),
                 xyz2_t1 = sample(1:5, size = 100, replace = TRUE), 
                 xyz2_t2 = sample(1:5, size = 100, replace = TRUE), 
                 xyz2_t3 = sample(1:5, size = 100, replace = TRUE),
                 ab1_t1 = sample(1:5, size = 100, replace = TRUE), 
                 ab1_t2 = sample(1:5, size = 100, replace = TRUE), 
                 ab1_t3 = sample(1:5, size = 100, replace = TRUE) 
)

> df %>% pivot_longer(cols = starts_with(c("xyz", "abc")))
# A tibble: 900 × 2
   name    value
   <chr>   <int>
 1 xyz1_t1     2
 2 xyz1_t2     4
 3 xyz1_t3     4
 4 xyz2_t1     2
 5 xyz2_t2     3
 6 xyz2_t3     3
 7 ab1_t1     2
 8 ab1_t2     2
 9 ab1_t3     5

How can I split the variable name into three variables scale (xyz, ab, ...), item (1, 2, ...) and time (1, 2, 3)?

Thanks for help!

CodePudding user response：

Since you're using tidyverse already you could use extract from tidyr

df |> tidyr::pivot_longer(cols = starts_with(c("xyz", "ab"))) |>
      tidyr::extract(name, 
               into=c("scale", "item", "time"), 
               regex="([a-z] )([0-9] )_t([0-9] )",
               convert=TRUE,
               remove=FALSE)

# A tibble: 900 × 5
   name    scale  item  time value
   <chr>   <chr> <int> <int> <int>
 1 xyz1_t1 xyz       1     1     1
 2 xyz1_t2 xyz       1     2     3
 3 xyz1_t3 xyz       1     3     3
 4 xyz2_t1 xyz       2     1     3
 5 xyz2_t2 xyz       2     2     2
 6 xyz2_t3 xyz       2     3     3
 7 ab1_t1  ab        1     1     2
 8 ab1_t2  ab        1     2     1
 9 ab1_t3  ab        1     3     1
10 xyz1_t1 xyz       1     1     1

CodePudding user response：

Please try

library(stringr)
df2 <- df %>% pivot_longer(cols = starts_with(c("xyz", "ab"))) %>% 
mutate(scale=str_extract_all(name,'^\\w[a-zA-Z]*(?=\\d\\_)'),
 item=str_extract_all(name,'(?<=\\w[a-zA-Z]{1,3})\\d*(?=\\_)'),
time=str_extract_all(name,'(?<=\\_\\w)\\d*'))