Home > OS >  Specifying multiple column names inside mutate
Specifying multiple column names inside mutate

Time:12-07

How do I specify column names inside mutate when multiple columns are generated?

In this example:

set.seed(5)
data.frame(x2 = sample(1:10, 10),
           x3 = sample(1:10, 10),
           x1 = sample(1:10, 10),
           y3 = sample(1:10, 10),
           y2 = sample(1:10, 10),
           y1 = sample(1:10, 10)) |>
 mutate(z1 = x1 - y1,
        z2 = x2 - y2,
        z3 = x3 - y3) |>
 mutate(zz = across(num_range(prefix = 'x',
                               range = 1:3)) - across(num_range(prefix = 'y',
                                                                range = 1:3)))

Resulting in:

   x2 x3 x1 y3 y2 y1 z1 z2 z3 zz.x1 zz.x2 zz.x3
1   2  3  9 10  9  6  3 -7 -7     3    -7    -7
2   9 10  6  6  4  5  1  5  4     1     5     4
3   7  6  4  8  8  3  1 -1 -2     1    -1    -2
4   3  2  3  4 10  8 -5 -7 -2    -5    -7    -2
5   1  5  2  5  7  7 -5 -6  0    -5    -6     0
6   6  4  5  3  6  2  3  0  1     3     0     1
7   5  8 10  2  1  4  6  4  6     6     4     6
8  10  7  8  7  3  1  7  7  0     7     7     0
9   4  1  1  9  2  9 -8  2 -8    -8     2    -8
10  8  9  7  1  5 10 -3  3  8    -3     3     8

I want zz.x1 be named zz1, ...

CodePudding user response:

Here's a dplyr-way:

mutate takes its name(s) from the first across and we can change this using the .names-argument. It accepts glue-style input that we can adapt to your needs using str_replace().

library(dplyr)
library(stringr)

df |>
  mutate(across(num_range(prefix = 'x', range = 1:3),
                .names = "{str_replace(col, 'x', 'z')}")
         - across(num_range(prefix = 'y',range = 1:3)))

Output:

   x2 x3 x1 y3 y2 y1 z1 z2 z3
1   2  3  9 10  9  6  3 -7 -7
2   9 10  6  6  4  5  1  5  4
3   7  6  4  8  8  3  1 -1 -2
4   3  2  3  4 10  8 -5 -7 -2
5   1  5  2  5  7  7 -5 -6  0
6   6  4  5  3  6  2  3  0  1
7   5  8 10  2  1  4  6  4  6
8  10  7  8  7  3  1  7  7  0
9   4  1  1  9  2  9 -8  2 -8
10  8  9  7  1  5 10 -3  3  8

Data:

set.seed(5)
df <- data.frame(x2 = sample(1:10, 10),
           x3 = sample(1:10, 10),
           x1 = sample(1:10, 10),
           y3 = sample(1:10, 10),
           y2 = sample(1:10, 10),
           y1 = sample(1:10, 10)) 

Update: Or similar to OP's desired output

df2 |>
  mutate(across(num_range(prefix = 'x', range = 1:3),
                .names = "{str_replace(col, 'x', 'zz')}")
         - across(num_range(prefix = 'y',range = 1:3))) 

Output:

   x2 x3 x1 y3 y2 y1 z1 z2 z3 zz1 zz2 zz3
1   2  3  9 10  9  6  3 -7 -7   3  -7  -7
2   9 10  6  6  4  5  1  5  4   1   5   4
3   7  6  4  8  8  3  1 -1 -2   1  -1  -2
4   3  2  3  4 10  8 -5 -7 -2  -5  -7  -2
5   1  5  2  5  7  7 -5 -6  0  -5  -6   0
6   6  4  5  3  6  2  3  0  1   3   0   1
7   5  8 10  2  1  4  6  4  6   6   4   6
8  10  7  8  7  3  1  7  7  0   7   7   0
9   4  1  1  9  2  9 -8  2 -8  -8   2  -8
10  8  9  7  1  5 10 -3  3  8  -3   3   8

Data

set.seed(5)
df2 <- data.frame(x2 = sample(1:10, 10),
           x3 = sample(1:10, 10),
           x1 = sample(1:10, 10),
           y3 = sample(1:10, 10),
           y2 = sample(1:10, 10),
           y1 = sample(1:10, 10)) |>
  mutate(z1 = x1 - y1,
         z2 = x2 - y2,
         z3 = x3 - y3)

CodePudding user response:

I don't know how to do this with dplyr but in base R it is pretty straightforward. This might partly answer also your previous question.

# hard-coded variable suffixes
suff <- 1:3
# OR suffixes extracted from data
suff <- sort(unique(sub('[a-z]*', '', names(df))))

for (i in suff) {
  df[[paste0('zz', i)]] <- df[[paste0('x', i)]] - df[[paste0('y', i)]]
}

df
#    x2 x3 x1 y3 y2 y1 zz1 zz2 zz3
# 1   2  3  9 10  9  6   3  -7  -7
# 2   9 10  6  6  4  5   1   5   4
# 3   7  6  4  8  8  3   1  -1  -2
# 4   3  2  3  4 10  8  -5  -7  -2
# 5   1  5  2  5  7  7  -5  -6   0
# 6   6  4  5  3  6  2   3   0   1
# 7   5  8 10  2  1  4   6   4   6
# 8  10  7  8  7  3  1   7   7   0
# 9   4  1  1  9  2  9  -8   2  -8
# 10  8  9  7  1  5 10  -3   3   8

A more efficient way which avoids the loop over suffixes would be like this:

zz <- df[paste0('x', suff)] - df[paste0('y', suff)]
names(zz) <- paste0('zz', suff)
df <- cbind(df, zz)

Data:

set.seed(5)
df <- data.frame(x2 = sample(1:10, 10),
                 x3 = sample(1:10, 10),
                 x1 = sample(1:10, 10),
                 y3 = sample(1:10, 10),
                 y2 = sample(1:10, 10),
                 y1 = sample(1:10, 10))
  • Related