Return all matrices of a given dimension with a certain number of ones and remaining zeros-CodePudding

Consider the following simplified example, with all possible 2 x 2 matrices with one 1 and the remaining 0s.

library(arrangements)

# define function
generate_matrices <- function(nrow, ncol, ones_count) {
  
  vectors <- permutations(c(
    rep(1, ones_count),
    rep(0, nrow * ncol - ones_count)
  ))
  
  # remove redundancies
  vectors <- vectors[!duplicated(vectors),]
  
  # list of matrices
  out <- list()
  
  for (i in 1:ncol(vectors)) {
    out[[i]] <- matrix(
      data = vectors[,i],
      nrow = nrow,
      ncol = ncol,
      byrow = TRUE
    )
  }
  return(out)
}

Run function to generate all 2 by 2 matrices with one 1:

generate_matrices(nrow = 2, ncol = 2, ones_count = 1)

[[1]]
     [,1] [,2]
[1,]    1    0
[2,]    0    0

[[2]]
     [,1] [,2]
[1,]    0    1
[2,]    0    0

[[3]]
     [,1] [,2]
[1,]    0    0
[2,]    1    0

[[4]]
     [,1] [,2]
[1,]    0    0
[2,]    0    1

When I extend this to a matrix with 5 rows, 4 columns and 4 ones, it errors:

generate_matrices(nrow = 5, ncol = 4, ones_count = 4)
# Error in permutations(c(rep(1, ones_count), rep(0, nrow * ncol - ones_count))) :
# too many results

My guess is that the lines

vectors <- permutations(c(
    rep(1, ones_count),
    rep(0, nrow * ncol - ones_count)
  ))

takes too long to run and/or there is not enough memory on my laptop to store these. Is there a more efficient way to implement this?

It is worth noting that I would like to eventually extend this to the 6 x 5 case with 4 ones, and 8 x 5 case with 8 ones.

CodePudding user response：

You can take combination of indices on which is 1:

m <- 2
n <- 2
k <- 2

createMatrix <- function(m, n, indices){
  
  x <- matrix(0, m, n)
  x[indices] <- 1
  
  x
}

lapply(
  combn(seq_len(m*n), k, simplify = FALSE), 
  function(x) createMatrix(m, n, x)
)

where m is number of rows, n number of columns and k number of ones.

CodePudding user response：

You could use the function developed here to get all possible matrices of dim 5x4, and then filter by the number of 1s using sum.

f = function(nrow, ncol) lapply(asplit(do.call(expand.grid, rep(list(0:1), nrow * ncol)), 1), matrix, nrow, ncol)
list = f(5,4)
list[lapply(list, sum) == 4]

CodePudding user response：

Using partitions::multiset and convert result to array of appropriate dimensions seems more efficient.

f = function(nr, nc, n_1){
  m = partitions::multiset(c(rep(0, nr*nc - n_1), rep(1, n_1)))
  array(m, dim = c(nr, nc, length(m) / (nr*nc)))
}

f(nr = 2, nc = 2, n_1 = 1)
# , , 1
# 
# [,1] [,2]
# [1,]    0    0
# [2,]    0    1
# 
# , , 2
# 
# [,1] [,2]
# [1,]    0    1
# [2,]    0    0
# 
# , , 3
# 
# [,1] [,2]
# [1,]    0    0
# [2,]    1    0
# 
# , , 4
# 
# [,1] [,2]
# [1,]    1    0
# [2,]    0    0

This is faster on larger data, e.g. testing on 8*5 matrices with 6 1s (resulting in 3 838 380 matrices):

system.time({a = f(nr = 8, nc = 5, n_1 = 6)})
# user  system elapsed 
# 0.83    0.32    1.16

# dim(a)
# [1] 8 5 3838380

m <- 8
n <- 5
k <- 6

system.time({l = lapply(
  combn(seq_len(m*n), k, simplify = FALSE), 
  function(x) createMatrix(m, n, x))})
#  user  system elapsed 
# 19.55    0.53   20.08

length(l)
# [1] 3838380

With the input above, the code by Maël unfortunately errored on my PC ("cannot allocate vector of size..."), possibly due to the "expand.grid explosion".