Some sample data:
df <- data.frame(xyz1_t1 = sample(1:5, size = 100, replace = TRUE),
xyz1_t2 = sample(1:5, size = 100, replace = TRUE),
xyz1_t3 = sample(1:5, size = 100, replace = TRUE),
xyz2_t1 = sample(1:5, size = 100, replace = TRUE),
xyz2_t2 = sample(1:5, size = 100, replace = TRUE),
xyz2_t3 = sample(1:5, size = 100, replace = TRUE),
ab1_t1 = sample(1:5, size = 100, replace = TRUE),
ab1_t2 = sample(1:5, size = 100, replace = TRUE),
ab1_t3 = sample(1:5, size = 100, replace = TRUE)
)
> df %>% pivot_longer(cols = starts_with(c("xyz", "abc")))
# A tibble: 900 × 2
name value
<chr> <int>
1 xyz1_t1 2
2 xyz1_t2 4
3 xyz1_t3 4
4 xyz2_t1 2
5 xyz2_t2 3
6 xyz2_t3 3
7 ab1_t1 2
8 ab1_t2 2
9 ab1_t3 5
How can I split the variable name
into three variables scale
(xyz, ab, ...), item
(1, 2, ...) and time
(1, 2, 3)?
Thanks for help!
CodePudding user response:
Since you're using tidyverse
already you could use extract
from tidyr
df |> tidyr::pivot_longer(cols = starts_with(c("xyz", "ab"))) |>
tidyr::extract(name,
into=c("scale", "item", "time"),
regex="([a-z] )([0-9] )_t([0-9] )",
convert=TRUE,
remove=FALSE)
# A tibble: 900 × 5
name scale item time value
<chr> <chr> <int> <int> <int>
1 xyz1_t1 xyz 1 1 1
2 xyz1_t2 xyz 1 2 3
3 xyz1_t3 xyz 1 3 3
4 xyz2_t1 xyz 2 1 3
5 xyz2_t2 xyz 2 2 2
6 xyz2_t3 xyz 2 3 3
7 ab1_t1 ab 1 1 2
8 ab1_t2 ab 1 2 1
9 ab1_t3 ab 1 3 1
10 xyz1_t1 xyz 1 1 1
CodePudding user response:
Please try
library(stringr)
df2 <- df %>% pivot_longer(cols = starts_with(c("xyz", "ab"))) %>%
mutate(scale=str_extract_all(name,'^\\w[a-zA-Z]*(?=\\d\\_)'),
item=str_extract_all(name,'(?<=\\w[a-zA-Z]{1,3})\\d*(?=\\_)'),
time=str_extract_all(name,'(?<=\\_\\w)\\d*'))