How do you loop through a range of numbers and apply it to every variable?-CodePudding

I have the following data frame:

A1_Q1 <- c(1, 2, 3)
A1_Q2 <- c(4, 5, 6)
A1_Q3 <- c(7, 8, 9)
A1_Q4 <- c(10, 11, 12)
A1_Q5 <- c(13, 14, 15)
A1_Q6 <- c(16, 17, 18)

A2_Q1 <- c(1, 2, 3)
A2_Q2 <- c(4, 5, 6)
A2_Q3 <- c(7, 8, 9)
A2_Q4 <- c(10, 11, 12)
A2_Q5 <- c(13, 14, 15)
A2_Q6 <- c(16, 17, 18)

df <- data.frame(A1_Q1, A1_Q2, A1_Q3, A1_Q4, A1_Q5, A1_Q6,
                 A2_Q1, A2_Q2, A2_Q3, A2_Q4, A2_Q5, A2_Q6)

I want to create additional variables called col1a, col1b, col1c.

df <- df %>%
  unite("col1a",
        A1_Q1,
        A1_Q2,
        sep="-",
        remove = FALSE) %>%
  unite("col1b",
        A1_Q3,
        A1_Q4,
        sep="-",
        remove = FALSE) %>%
  unite("col1c",
        A1_Q5,
        A1_Q6,
        sep="-",
        remove = FALSE)

I also want to do the same thing for the A2 variables.

Is there any way I can also create variables called col2a, col2b, col2c in one go? I'm envisioning a for loop that looks something like this:

for (i in 1:2) {
  df <- df %>%
    unite("colia",
          Ai_Q1,
          Ai_Q2,
          sep="-",
          remove = FALSE) %>%
    unite("colib",
          Ai_Q3,
          Ai_Q4,
          sep="-",
          remove = FALSE) %>%
    unite("colic",
          Ai_Q5,
          Ai_Q6,
          sep="-",
          remove = FALSE) 
}

CodePudding user response：

We can use the regex ^A1 and ^A2 to grep the columns subset sb; to create it, we loop using lapply over the numbers 1:2 and use sprintf (the advantage is, it's safe not to confuse A1 and A2 columns, and we can use the numbers later for the new column names). In the second step we basically paste0 the respective two columns together, e.g. sb[(1:2) x] for the first two sb columns together, and increment it by 2 until ncol(sb) is reached. At the end of an iteration we paste0 col, number x and according element of letters constant to a string to setNames. Finally, we simply cbind the resulting list to the data frame.

res <- lapply(1:2, \(x) {
  sb <- df[grep(sprintf('^A%s', x), names(df))]
  lapply((0:((ncol(sb) - 1)/2))*2, \(x) Reduce(\(y, z) paste0(y, '-', z), sb[(1:2)   x])) |>
    setNames(paste0('col', x, letters[seq_len((ncol(sb))/2)]))
}) |> cbind(df)

res
#   col1a col1b col1c col2a col2b col2c A1_Q1 A1_Q2 A1_Q3 A1_Q4 A1_Q5 A1_Q6 A2_Q1
# 1   1-4  7-10 13-16   1-4  7-10 13-16     1     4     7    10    13    16     1
# 2   2-5  8-11 14-17   2-5  8-11 14-17     2     5     8    11    14    17     2
# 3   3-6  9-12 15-18   3-6  9-12 15-18     3     6     9    12    15    18     3
#   A2_Q2 A2_Q3 A2_Q4 A2_Q5 A2_Q6
# 1     4     7    10    13    16
# 2     5     8    11    14    17
# 3     6     9    12    15    18

Note, that this might need a preprocessing step, if the A* are not so nicely sorted by Q as shown in your example.

CodePudding user response：

It may be easier with split.default in base R

nm1 <-  paste0("col", rep(unique( sub(".(\\d )_.*", "\\1", 
      names(df))), each = 3), letters[1:3])
df[nm1] <-  lapply(split.default(df, as.integer(gl(ncol(df), 2, ncol(df)))), 
      \(x) do.call(paste, c(x, sep = '-')))

-output

> df
  A1_Q1 A1_Q2 A1_Q3 A1_Q4 A1_Q5 A1_Q6 A2_Q1 A2_Q2 A2_Q3 A2_Q4 A2_Q5 A2_Q6 col1a col1b col1c col2a col2b col2c
1     1     4     7    10    13    16     1     4     7    10    13    16   1-4  7-10 13-16   1-4  7-10 13-16
2     2     5     8    11    14    17     2     5     8    11    14    17   2-5  8-11 14-17   2-5  8-11 14-17
3     3     6     9    12    15    18     3     6     9    12    15    18   3-6  9-12 15-18   3-6  9-12 15-18

Or using tidyverse, we could reshape to 'long' format and then do the paste

library(dplyr)
library(tidyr)
library(stringr)
df %>% 
  mutate(rn = row_number()) %>%
  pivot_longer(cols = -rn) %>%
  separate(name, into = c("name1", "name2")) %>%
  mutate( name2 = case_when(name2 %in% c("Q1", "Q2") ~ "a", 
     name2 %in% c("Q3", "Q4") ~ "b", TRUE ~ "c")) %>% 
  group_by(rn, name1, name2) %>% 
  summarise(value = str_c(value, collapse = '-'), .groups = 'drop') %>%
   mutate(name1 = str_replace(name1, "\\D ", "col")) %>%
   unite(name, name1, name2, sep = "") %>% 
  pivot_wider(names_from = name, values_from = value) %>% 
  select(-rn) %>% 
  bind_cols(df, .)

-output

 A1_Q1 A1_Q2 A1_Q3 A1_Q4 A1_Q5 A1_Q6 A2_Q1 A2_Q2 A2_Q3 A2_Q4 A2_Q5 A2_Q6 col1a col1b col1c col2a col2b col2c
1     1     4     7    10    13    16     1     4     7    10    13    16   1-4  7-10 13-16   1-4  7-10 13-16
2     2     5     8    11    14    17     2     5     8    11    14    17   2-5  8-11 14-17   2-5  8-11 14-17
3     3     6     9    12    15    18     3     6     9    12    15    18   3-6  9-12 15-18   3-6  9-12 15-18