Home > OS >  Writing a loop in R to mutate multiple times
Writing a loop in R to mutate multiple times

Time:07-21

I have an example data set, where I would like the column "type" to be copied every 6 rows

Original data:

capacity_ex <- data.frame("type" = c("p", "t", "t", "r,"),
                         "1_q1" = 1:4,
                         "1_q2" = c("a", "b", "c", "d"),
                         '1_q3' = 10:13,
                         '1_q4' = 100:103,
                         '1_q5' = 110:113,
                         '1_q6' = 11:14,
                         "2_q1" = 22:25,
                         "2_q2" = c("i", "j", "k", "l"),
                         '2_q3' = 20:23,
                         '2_q4' = 200:203,
                         '2_q5' = 210:213,
                         '2_q6' = 21:24,
                          "3_q1" = 90:93,
                          "3_q2" = c("p", "q", "r", "s"), 
                          '3_q3' = 10:13,
                          '3_q4' = 300:303,
                          '3_q5' = 310:313,
                          '3_q6' = 31:34,check.names = FALSE)

How I would like the data to be formatted

capacity_want <- ("type" = c("p", "t", "t", "r,"),
                         "1_q1" = 1:4,
                         "1_q2" = c("a", "b", "c", "d"),
                         '1_q3' = 10:13,
                         '1_q4' = 100:103,
                         '1_q5' = 110:113,
                         '1_q6' = 11:14,
                         ("type" = c("p", "t", "t", "r,"),
                         "2_q1" = 22:25,
                         "2_q2" = c("i", "j", "k", "l"),
                         '2_q3' = 20:23,
                         '2_q4' = 200:203,
                          '2_q5' = 210:213,
                          '2_q6' = 21:24,
                          ("type" = c("p", "t", "t", "r,"),
                          "3_q1" = 90:93,
                          "3_q2" = c("p", "q", "r", "s"), 
                           '3_q3' = 10:13,
                          '3_q4' = 300:303,
                           '3_q5' = 310:313,
                          '3_q6' = 31:34,check.names = FALSE)

Type could have any kind of additional name at the end to keep each name unique. This sample data is very abbreviated - my data set is much longer so don't want to do this manually

I tried to write a loop to do this but my looping skills are minimal. This is what I tried with many errors that I couldn't work out

capacity_test_fix <- do.call(
   lapply(seq(2, (ncol(capacity_ex), 6), \(i){
    capacity_ex %>%
       mutate("type" = type, .after = (i 6)) %>%
  })))

Happy to provide better context if needed!

CodePudding user response:

This is a quick and easy one to achieve what you define - Also please check your original post as the sample output can be understand but not runnable in R.

capacity_ex <- data.frame("type" = c("p", "t", "t", "r,"),
                          "1_q1" = 1:4,
                          "1_q2" = c("a", "b", "c", "d"),
                          '1_q3' = 10:13,
                          '1_q4' = 100:103,
                          '1_q5' = 110:113,
                          '1_q6' = 11:14,
                          "2_q1" = 22:25,
                          "2_q2" = c("i", "j", "k", "l"),
                          '2_q3' = 20:23,
                          '2_q4' = 200:203,
                          '2_q5' = 210:213,
                          '2_q6' = 21:24,
                          "3_q1" = 90:93,
                          "3_q2" = c("p", "q", "r", "s"), 
                          '3_q3' = 10:13,
                          '3_q4' = 300:303,
                          '3_q5' = 310:313,
                          '3_q6' = 31:34,check.names = FALSE)

library(purrr)
library(dplyr)
library(tidyr)

# change to number of groups you have in your data
groups_index <- c(1:3)
map(.x = groups_index,
    .f = function(x) {
      # this function change capcity_ex in global var for every run
      capacity_ex <<- capacity_ex %>% mutate("{x}_q_type" := type) 
    })

After map call just need to reorder column name noted that to have the type column appear first for every group I named them {x}_q_type

capacity_ex %>%
  select(order(colnames(.)), -type)
#>   1_q_type 1_q1 1_q2 1_q3 1_q4 1_q5 1_q6 2_q_type 2_q1 2_q2 2_q3 2_q4 2_q5 2_q6
#> 1        p    1    a   10  100  110   11        p   22    i   20  200  210   21
#> 2        t    2    b   11  101  111   12        t   23    j   21  201  211   22
#> 3        t    3    c   12  102  112   13        t   24    k   22  202  212   23
#> 4       r,    4    d   13  103  113   14       r,   25    l   23  203  213   24
#>   3_q_type 3_q1 3_q2 3_q3 3_q4 3_q5 3_q6
#> 1        p   90    p   10  300  310   31
#> 2        t   91    q   11  301  311   32
#> 3        t   92    r   12  302  312   33
#> 4       r,   93    s   13  303  313   34

Created on 2022-07-21 by the reprex package (v2.0.1)

CodePudding user response:

I think you need something like this (a base R solution)

capacity_ex <- data.frame("type" = c("p", "t", "t", "r"),
                          "1_q1" = 1:4,
                          "1_q2" = c("a", "b", "c", "d"),
                          '1_q3' = 10:13,
                          '1_q4' = 100:103,
                          '1_q5' = 110:113,
                          '1_q6' = 11:14,
                          "2_q1" = 22:25,
                          "2_q2" = c("i", "j", "k", "l"),
                          '2_q3' = 20:23,
                          '2_q4' = 200:203,
                          '2_q5' = 210:213,
                          '2_q6' = 21:24,
                          "3_q1" = 90:93,
                          "3_q2" = c("p", "q", "r", "s"), 
                          '3_q3' = 10:13,
                          '3_q4' = 300:303,
                          '3_q5' = 310:313,
                          '3_q6' = 31:34,check.names = FALSE)

capacity_ex
#>   type 1_q1 1_q2 1_q3 1_q4 1_q5 1_q6 2_q1 2_q2 2_q3 2_q4 2_q5 2_q6 3_q1 3_q2
#> 1    p    1    a   10  100  110   11   22    i   20  200  210   21   90    p
#> 2    t    2    b   11  101  111   12   23    j   21  201  211   22   91    q
#> 3    t    3    c   12  102  112   13   24    k   22  202  212   23   92    r
#> 4    r    4    d   13  103  113   14   25    l   23  203  213   24   93    s
#>   3_q3 3_q4 3_q5 3_q6
#> 1   10  300  310   31
#> 2   11  301  311   32
#> 3   12  302  312   33
#> 4   13  303  313   34
split.default(capacity_ex[,-1], 1   ((seq_along(capacity_ex[,-1]) - 1) %/% 6) ) |>
  lapply(\(.x) cbind(type = capacity_ex[,1], .x)) |> 
  (function (.x) { do.call(cbind, .x)})()
#>   1.type 1.1_q1 1.1_q2 1.1_q3 1.1_q4 1.1_q5 1.1_q6 2.type 2.2_q1 2.2_q2 2.2_q3
#> 1      p      1      a     10    100    110     11      p     22      i     20
#> 2      t      2      b     11    101    111     12      t     23      j     21
#> 3      t      3      c     12    102    112     13      t     24      k     22
#> 4      r      4      d     13    103    113     14      r     25      l     23
#>   2.2_q4 2.2_q5 2.2_q6 3.type 3.3_q1 3.3_q2 3.3_q3 3.3_q4 3.3_q5 3.3_q6
#> 1    200    210     21      p     90      p     10    300    310     31
#> 2    201    211     22      t     91      q     11    301    311     32
#> 3    202    212     23      t     92      r     12    302    312     33
#> 4    203    213     24      r     93      s     13    303    313     34

Created on 2022-07-21 by the reprex package (v2.0.1)

  • Related