Home > Net >  Find all combinations of n1 elements from vector1 and n2 elements from vector2 in R?
Find all combinations of n1 elements from vector1 and n2 elements from vector2 in R?

Time:12-03

I have two vectors and I am trying to find all unique combinations of 3 elements from vector1 and 2 elements from vector2. I have tried the following code.

V1 = combn(1:5, 3)   # 10 combinations in total
V2 = combn(6:11, 2)  # 15 combinations in total

How to combine V1 and V2 so that there are 10 * 15 = 150 combinations in total? Thanks.

CodePudding user response:

You can try expand.grid along with asplit, e.g.,

expand.grid(asplit(V1,2), asplit(V2,2))

or

with(
  expand.grid(asplit(V1, 2), asplit(V2, 2)),
  mapply(c, Var1, Var2)
)

CodePudding user response:

The function comboGrid from RcppAlgos (I am the author) does just the trick:

library(RcppAlgos)

grid <- comboGrid(c(rep(list(1:5), 3), rep(list(6:11), 2)),
                  repetition = FALSE)

head(grid)
#>      Var1 Var2 Var3 Var4 Var5
#> [1,]    1    2    3    6    7
#> [2,]    1    2    3    6    8
#> [3,]    1    2    3    6    9
#> [4,]    1    2    3    6   10
#> [5,]    1    2    3    6   11
#> [6,]    1    2    3    7    8

tail(grid)
#>        Var1 Var2 Var3 Var4 Var5
#> [145,]    3    4    5    8    9
#> [146,]    3    4    5    8   10
#> [147,]    3    4    5    8   11
#> [148,]    3    4    5    9   10
#> [149,]    3    4    5    9   11
#> [150,]    3    4    5   10   11

It is quite efficient as well. It is written in C and pulls together many ideas from the excellent question: Picking unordered combinations from pools with overlap. The underlying algorithm avoids generating duplicates that would need to be filtered out.

Consider the following example where generating the Cartesian product contains more than 10 billion results:

system.time(huge <- comboGrid(c(rep(list(1:20), 5), rep(list(21:35), 3)),
                              repetition = FALSE))
#>    user  system elapsed 
#>   0.990   0.087   1.077

dim(huge)
#> [1] 7054320       8

CodePudding user response:

You can use expand.grid():

g <- expand.grid(seq_len(ncol(V1)), seq_len(ncol(V2)))
V3 <- rbind(V1[, g[, 1]], V2[, g[, 2]])

The result is in a similar format as V1 and V2, i.e. a 5 × 150 matrix (here printed transposed):

head(t(V3))
#      [,1] [,2] [,3] [,4] [,5]
# [1,]    1    2    3    6    7
# [2,]    1    2    4    6    7
# [3,]    1    2    5    6    7
# [4,]    1    3    4    6    7
# [5,]    1    3    5    6    7
# [6,]    1    4    5    6    7

dim(unique(t(V3)))
# [1] 150   5

And a generalized approach that can handle more than two initial matrices of combinations, stored in a list V:

V <- list(V1, V2)
g <- do.call(expand.grid, lapply(V, \(x) seq_len(ncol(x))))
V.comb <- do.call(rbind, mapply('[', V, T, g))

identical(V.comb, V3)
[1] TRUE

CodePudding user response:

After some helpful refactoring guidance from @onyambu, here is a shorter solution based on base::merge():

merge(t(combn(1:5, 3)),t(combn(6:11, 2)),by.x=NULL,by.y = NULL)

...and the first 20 rows of output:

> merge(t(combn(1:5, 3)),t(combn(6:11, 2)),by.x=NULL,by.y = NULL)
    V1.x V2.x V3 V1.y V2.y
1      1    2  3    6    7
2      1    2  4    6    7
3      1    2  5    6    7
4      1    3  4    6    7
5      1    3  5    6    7
6      1    4  5    6    7
7      2    3  4    6    7
8      2    3  5    6    7
9      2    4  5    6    7
10     3    4  5    6    7
11     1    2  3    6    8
12     1    2  4    6    8
13     1    2  5    6    8
14     1    3  4    6    8
15     1    3  5    6    8
16     1    4  5    6    8
17     2    3  4    6    8
18     2    3  5    6    8
19     2    4  5    6    8
20     3    4  5    6    8

original solution

A base R solution to create a Cartesian product with merge() looks like this:

df1 <- data.frame(t(combn(1:5, 3)))
df2 <- data.frame(t(combn(6:11, 2)))
colnames(df2) <- paste("y",1:2,sep=""))

merge(df1,df2,by.x=NULL,by.y = NULL)

...and the first 25 rows of output:

> merge(df1,df2,by.x=NULL,by.y = NULL)
    X1 X2 X3 y1 y2
1    1  2  3  6  7
2    1  2  4  6  7
3    1  2  5  6  7
4    1  3  4  6  7
5    1  3  5  6  7
6    1  4  5  6  7
7    2  3  4  6  7
8    2  3  5  6  7
9    2  4  5  6  7
10   3  4  5  6  7
11   1  2  3  6  8
12   1  2  4  6  8
13   1  2  5  6  8
14   1  3  4  6  8
15   1  3  5  6  8
16   1  4  5  6  8
17   2  3  4  6  8
18   2  3  5  6  8
19   2  4  5  6  8
20   3  4  5  6  8
21   1  2  3  6  9
22   1  2  4  6  9
23   1  2  5  6  9
24   1  3  4  6  9
25   1  3  5  6  9

CodePudding user response:

Similar idea, using apply

apply(expand.grid(seq(ncol(V1)), seq(ncol(V2))), 1, function(i) {
  c(V1[,i[1]], V2[,i[2]])})
#>      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
#> [1,]    1    1    1    1    1    1    2    2    2     3     1     1     1     1
#> [2,]    2    2    2    3    3    4    3    3    4     4     2     2     2     3
#> [3,]    3    4    5    4    5    5    4    5    5     5     3     4     5     4
#> [4,]    6    6    6    6    6    6    6    6    6     6     6     6     6     6
#> [5,]    7    7    7    7    7    7    7    7    7     7     8     8     8     8
#>      [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
#> [1,]     1     1     2     2     2     3     1     1     1     1     1     1
#> [2,]     3     4     3     3     4     4     2     2     2     3     3     4
#> [3,]     5     5     4     5     5     5     3     4     5     4     5     5
#> [4,]     6     6     6     6     6     6     6     6     6     6     6     6
#> [5,]     8     8     8     8     8     8     9     9     9     9     9     9
#>      [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38]
#> [1,]     2     2     2     3     1     1     1     1     1     1     2     2
#> [2,]     3     3     4     4     2     2     2     3     3     4     3     3
#> [3,]     4     5     5     5     3     4     5     4     5     5     4     5
#> [4,]     6     6     6     6     6     6     6     6     6     6     6     6
#> [5,]     9     9     9     9    10    10    10    10    10    10    10    10
#>      [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50]
#> [1,]     2     3     1     1     1     1     1     1     2     2     2     3
#> [2,]     4     4     2     2     2     3     3     4     3     3     4     4
#> [3,]     5     5     3     4     5     4     5     5     4     5     5     5
#> [4,]     6     6     6     6     6     6     6     6     6     6     6     6
#> [5,]    10    10    11    11    11    11    11    11    11    11    11    11
#>      [,51] [,52] [,53] [,54] [,55] [,56] [,57] [,58] [,59] [,60] [,61] [,62]
#> [1,]     1     1     1     1     1     1     2     2     2     3     1     1
#> [2,]     2     2     2     3     3     4     3     3     4     4     2     2
#> [3,]     3     4     5     4     5     5     4     5     5     5     3     4
#> [4,]     7     7     7     7     7     7     7     7     7     7     7     7
#> [5,]     8     8     8     8     8     8     8     8     8     8     9     9
#>      [,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73] [,74]
#> [1,]     1     1     1     1     2     2     2     3     1     1     1     1
#> [2,]     2     3     3     4     3     3     4     4     2     2     2     3
#> [3,]     5     4     5     5     4     5     5     5     3     4     5     4
#> [4,]     7     7     7     7     7     7     7     7     7     7     7     7
#> [5,]     9     9     9     9     9     9     9     9    10    10    10    10
#>      [,75] [,76] [,77] [,78] [,79] [,80] [,81] [,82] [,83] [,84] [,85] [,86]
#> [1,]     1     1     2     2     2     3     1     1     1     1     1     1
#> [2,]     3     4     3     3     4     4     2     2     2     3     3     4
#> [3,]     5     5     4     5     5     5     3     4     5     4     5     5
#> [4,]     7     7     7     7     7     7     7     7     7     7     7     7
#> [5,]    10    10    10    10    10    10    11    11    11    11    11    11
#>      [,87] [,88] [,89] [,90] [,91] [,92] [,93] [,94] [,95] [,96] [,97] [,98]
#> [1,]     2     2     2     3     1     1     1     1     1     1     2     2
#> [2,]     3     3     4     4     2     2     2     3     3     4     3     3
#> [3,]     4     5     5     5     3     4     5     4     5     5     4     5
#> [4,]     7     7     7     7     8     8     8     8     8     8     8     8
#> [5,]    11    11    11    11     9     9     9     9     9     9     9     9
#>      [,99] [,100] [,101] [,102] [,103] [,104] [,105] [,106] [,107] [,108]
#> [1,]     2      3      1      1      1      1      1      1      2      2
#> [2,]     4      4      2      2      2      3      3      4      3      3
#> [3,]     5      5      3      4      5      4      5      5      4      5
#> [4,]     8      8      8      8      8      8      8      8      8      8
#> [5,]     9      9     10     10     10     10     10     10     10     10
#>      [,109] [,110] [,111] [,112] [,113] [,114] [,115] [,116] [,117] [,118]
#> [1,]      2      3      1      1      1      1      1      1      2      2
#> [2,]      4      4      2      2      2      3      3      4      3      3
#> [3,]      5      5      3      4      5      4      5      5      4      5
#> [4,]      8      8      8      8      8      8      8      8      8      8
#> [5,]     10     10     11     11     11     11     11     11     11     11
#>      [,119] [,120] [,121] [,122] [,123] [,124] [,125] [,126] [,127] [,128]
#> [1,]      2      3      1      1      1      1      1      1      2      2
#> [2,]      4      4      2      2      2      3      3      4      3      3
#> [3,]      5      5      3      4      5      4      5      5      4      5
#> [4,]      8      8      9      9      9      9      9      9      9      9
#> [5,]     11     11     10     10     10     10     10     10     10     10
#>      [,129] [,130] [,131] [,132] [,133] [,134] [,135] [,136] [,137] [,138]
#> [1,]      2      3      1      1      1      1      1      1      2      2
#> [2,]      4      4      2      2      2      3      3      4      3      3
#> [3,]      5      5      3      4      5      4      5      5      4      5
#> [4,]      9      9      9      9      9      9      9      9      9      9
#> [5,]     10     10     11     11     11     11     11     11     11     11
#>      [,139] [,140] [,141] [,142] [,143] [,144] [,145] [,146] [,147] [,148]
#> [1,]      2      3      1      1      1      1      1      1      2      2
#> [2,]      4      4      2      2      2      3      3      4      3      3
#> [3,]      5      5      3      4      5      4      5      5      4      5
#> [4,]      9      9     10     10     10     10     10     10     10     10
#> [5,]     11     11     11     11     11     11     11     11     11     11
#>      [,149] [,150]
#> [1,]      2      3
#> [2,]      4      4
#> [3,]      5      5
#> [4,]     10     10
#> [5,]     11     11

Created on 2022-12-02 with reprex v2.0.2

  • Related