I have the following data frame:
A1_Q1 <- c(1, 2, 3)
A1_Q2 <- c(4, 5, 6)
A1_Q3 <- c(7, 8, 9)
A1_Q4 <- c(10, 11, 12)
A1_Q5 <- c(13, 14, 15)
A1_Q6 <- c(16, 17, 18)
A2_Q1 <- c(1, 2, 3)
A2_Q2 <- c(4, 5, 6)
A2_Q3 <- c(7, 8, 9)
A2_Q4 <- c(10, 11, 12)
A2_Q5 <- c(13, 14, 15)
A2_Q6 <- c(16, 17, 18)
df <- data.frame(A1_Q1, A1_Q2, A1_Q3, A1_Q4, A1_Q5, A1_Q6,
A2_Q1, A2_Q2, A2_Q3, A2_Q4, A2_Q5, A2_Q6)
I want to create additional variables called col1a, col1b, col1c
.
df <- df %>%
unite("col1a",
A1_Q1,
A1_Q2,
sep="-",
remove = FALSE) %>%
unite("col1b",
A1_Q3,
A1_Q4,
sep="-",
remove = FALSE) %>%
unite("col1c",
A1_Q5,
A1_Q6,
sep="-",
remove = FALSE)
I also want to do the same thing for the A2 variables.
Is there any way I can also create variables called col2a, col2b, col2c
in one go? I'm envisioning a for loop that looks something like this:
for (i in 1:2) {
df <- df %>%
unite("colia",
Ai_Q1,
Ai_Q2,
sep="-",
remove = FALSE) %>%
unite("colib",
Ai_Q3,
Ai_Q4,
sep="-",
remove = FALSE) %>%
unite("colic",
Ai_Q5,
Ai_Q6,
sep="-",
remove = FALSE)
}
CodePudding user response:
We can use the regex ^A1
and ^A2
to grep
the columns subset sb
; to create it, we loop using lapply
over the numbers 1:2
and use sprintf
(the advantage is, it's safe not to confuse A1
and A2
columns, and we can use the numbers later for the new column names). In the second step we basically paste0
the respective two columns together, e.g. sb[(1:2) x]
for the first two sb
columns together, and increment it by 2
until ncol(sb)
is reached. At the end of an iteration we paste0
col
, number x
and according element of letters
constant to a string to setNames
. Finally, we simply cbind
the resulting list to the data frame.
res <- lapply(1:2, \(x) {
sb <- df[grep(sprintf('^A%s', x), names(df))]
lapply((0:((ncol(sb) - 1)/2))*2, \(x) Reduce(\(y, z) paste0(y, '-', z), sb[(1:2) x])) |>
setNames(paste0('col', x, letters[seq_len((ncol(sb))/2)]))
}) |> cbind(df)
res
# col1a col1b col1c col2a col2b col2c A1_Q1 A1_Q2 A1_Q3 A1_Q4 A1_Q5 A1_Q6 A2_Q1
# 1 1-4 7-10 13-16 1-4 7-10 13-16 1 4 7 10 13 16 1
# 2 2-5 8-11 14-17 2-5 8-11 14-17 2 5 8 11 14 17 2
# 3 3-6 9-12 15-18 3-6 9-12 15-18 3 6 9 12 15 18 3
# A2_Q2 A2_Q3 A2_Q4 A2_Q5 A2_Q6
# 1 4 7 10 13 16
# 2 5 8 11 14 17
# 3 6 9 12 15 18
Note, that this might need a preprocessing step, if the A*
are not so nicely sorted by Q
as shown in your example.
CodePudding user response:
It may be easier with split.default
in base R
nm1 <- paste0("col", rep(unique( sub(".(\\d )_.*", "\\1",
names(df))), each = 3), letters[1:3])
df[nm1] <- lapply(split.default(df, as.integer(gl(ncol(df), 2, ncol(df)))),
\(x) do.call(paste, c(x, sep = '-')))
-output
> df
A1_Q1 A1_Q2 A1_Q3 A1_Q4 A1_Q5 A1_Q6 A2_Q1 A2_Q2 A2_Q3 A2_Q4 A2_Q5 A2_Q6 col1a col1b col1c col2a col2b col2c
1 1 4 7 10 13 16 1 4 7 10 13 16 1-4 7-10 13-16 1-4 7-10 13-16
2 2 5 8 11 14 17 2 5 8 11 14 17 2-5 8-11 14-17 2-5 8-11 14-17
3 3 6 9 12 15 18 3 6 9 12 15 18 3-6 9-12 15-18 3-6 9-12 15-18
Or using tidyverse
, we could reshape to 'long' format and then do the paste
library(dplyr)
library(tidyr)
library(stringr)
df %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = -rn) %>%
separate(name, into = c("name1", "name2")) %>%
mutate( name2 = case_when(name2 %in% c("Q1", "Q2") ~ "a",
name2 %in% c("Q3", "Q4") ~ "b", TRUE ~ "c")) %>%
group_by(rn, name1, name2) %>%
summarise(value = str_c(value, collapse = '-'), .groups = 'drop') %>%
mutate(name1 = str_replace(name1, "\\D ", "col")) %>%
unite(name, name1, name2, sep = "") %>%
pivot_wider(names_from = name, values_from = value) %>%
select(-rn) %>%
bind_cols(df, .)
-output
A1_Q1 A1_Q2 A1_Q3 A1_Q4 A1_Q5 A1_Q6 A2_Q1 A2_Q2 A2_Q3 A2_Q4 A2_Q5 A2_Q6 col1a col1b col1c col2a col2b col2c
1 1 4 7 10 13 16 1 4 7 10 13 16 1-4 7-10 13-16 1-4 7-10 13-16
2 2 5 8 11 14 17 2 5 8 11 14 17 2-5 8-11 14-17 2-5 8-11 14-17
3 3 6 9 12 15 18 3 6 9 12 15 18 3-6 9-12 15-18 3-6 9-12 15-18