I want to optimize my matrix computation.

My code generates an array (p x q x n x n).

Let x : n x p matrix, v: q x 1 vector

 
f1 <- function(i){
  sapply(seq_along(1:n), function(j) outer(x[i,]-x[j,], v, "*")^2, simplify = "array") 
}

sapply(seq_along(1:n), FUN = f1, simplify = "array")

Due to the large size of the array, I found this code should spend much memory space.

Then, improving its performance is limited by parallel computing or other apply methods.

How can I improve the computation performance?

CodePudding user response：

The following cut down execution time by 40% on my machine by:

avoiding redundant calculation (squared outer products are identical, regardless whether x[i,]-x[j,] or v. v. enters the equation)
supplying a zero matrix for squared outer products where i == j
pre-dimensioning the result array and ...
finally populating the result array at the correct positions

original function:

f_original <- function(){
  f1 <- function(i){
    sapply(seq_along(1:n), function(j) outer(x[i,]-x[j,], v, "*")^2,
           simplify = "array") 
  }
  sapply(seq_along(1:n), FUN = f1, simplify = "array")
}

alternative:

f_alternative <- function(){
  ## dimension the resulting array in advance:
  res = array(NA, c(p, q, n, n))
  ## these are the possible outcomes, because
  ## the order of i and j does not matter: 
  values <- combn(1:n,
                  m = 2,
                  FUN = function(ab){
                    a = ab[1]; b = ab[2]
                    outer(x[a,] - x[b,], v, '*')^2
                  }
                  )
  ## create all combinations of i and j, where i != j
  indices <- t(combn(1:n, m = 2))
  ## fill the result array at the positions ,,i,j and ,,j,i:
  for(r in 1:nrow(indices)){
    a = indices[r, 1]
    b = indices[r, 2]
    res[, , a, b] = res[ , , b , a] = values[ , , r]
  }
  ## fill the positions ,,i,j and ,,j,i with a zero matrix,
  ## (results from i == j):
  for(r in 1:n) res[, , r, r] = array(0, p * q)

  return(res)
}

benchmarking:

library(microbenchmark)

## initialize:
n = 30
p = 2
q = 5
x = matrix(rnorm(n * p), n, p)
v = rnorm(q)

microbenchmark(f_original(), times = 1e3)
## Unit: milliseconds
##          expr    min      lq     mean  median      uq    max neval
##  f_original() 7.4636 8.10685 9.274144 9.08785 9.97285 74.976  1000

microbenchmark(f_alternative(), times = 1e3)
## Unit: milliseconds
##             expr    min      lq     mean median     uq     max neval
## f_alternative() 4.7369 5.08235 5.941499 5.3563 6.7484 60.7656  1000

CodePudding user response：

Context

Before you even get to parallel computing with multiple cores and/or multiple machines, consider simplifying your calculation.

require( purrr )

# --- For illustration, let
p   <- 3
q   <- 4
n   <- 5
x   <- array( data = 1:( n * p ), dim = c( n, p ) )
V   <- matrix( q:1, nrow = q )

# --- The answer you seek is a multi-dimensional array, such as this
X <- array( data = NA, dim = c( p, q, n, n ) )

Simplifications

Do n calculations at once

Note that wherever i == j, the difference X[ i, ] - X[ j, ] will be a matrix of zeroes, e.g., matrix_of_zeroes <- matrix( data = 0, nrow = n, ncol = p )

and

outer( matrix_of_zeroes, V, "*" )^2

will be a vector of length q containing all zeros, e.g.,

zero_vector <- matrix( 0, ncol = q )

So you can take care of n calculations all at once as follows:

seq( n ) %>% walk( ~{ X[ , , .x, .x ] <<- zero_vector })

Calculate half the elements, re-use the results for the other half

Note that, since the last thing your function does is to square the result

f1( i, j ) == f1( j, i )

so if you calculate the upper triangle, you can use the results in the corresponding elements of the lower triangle, that is, set X[ , , j, i ] equal to element X[ , , i, j ].

Calculating elements of the upper triangle

index <- matrix( c( 1:n ), n, n, byrow = TRUE )

index
#     [,1] [,2] [,3] [,4] [,5]
#[1,]    1    2    3    4    5
#[2,]    1    2    3    4    5
#[3,]    1    2    3    4    5
#[4,]    1    2    3    4    5
#[5,]    1    2    3    4    5

# --- For index j, take everything above the diagonal
j <- index[ upper.tri( index, diag = FALSE ) ]

# j
# [1] 2 3 3 4 4 4 5 5 5 5

# --- For index i, build sequences 1, 12, 123, 1234, ...
2:( n - 1 ) %>%
  map( ~seq( from = 1, to = .x )) %>%
    unlist -> i
# --- Prepend the first i
i <- c( 1, i )

i
# [1] 1 1 2 1 2 3 1 2 3 4

elements <- data.frame( i = i, j = j )

elements
#   i j
#1  1 2
#2  1 3
#3  2 3
#4  1 4
#5  2 4
#6  3 4
#7  1 5
#8  2 5
#9  3 5
#10 4 5

# --- The function that does the work
fij <- function( i, j ) ( ( x[ i, ] - x[ j, ] ) %o%  V )^2

# --- Show what will happen when we walk along these elements.

walk2( elements$i, elements$j, ~cat( sprintf( "fij( %d, %d )\n", .x, .y ) ))
#fij( 1, 2 )
#fij( 1, 3 )
#fij( 2, 3 )
#fij( 1, 4 )
#fij( 2, 4 )
#fij( 3, 4 )
#fij( 1, 5 )
#fij( 2, 5 )
#fij( 3, 5 )
#fij( 4, 5 )

# --- Now actually walk along and calculate, updating the result outside of the function
#     by using the parent environment assignment operator, `<<-`

# --- To calculate the results and store them in the upper triangle:
# walk2( elements$i, elements$j, ~( X[ , , .x, .y ] <<- fij( .x, .y ) ))

# --- To calculate the results and store them in both upper and lower triangles:
walk2(
    elements$i                        # Take one index i
  , elements$j                        #  and one index j
  , ~{
          fij( .x, .y ) ->>           # Calculate the results for i,j
        X[ , , .x, .y ] ->>           # Assign the result for upper-triangle
        X[ , , .y, .x ]               # Assign the result for lower-triangle
     }
)

Putting it all together

require( purrr )
# --- For illustration, let

p   <- 3
q   <- 4
n   <- 5
x   <- array( data = 1:( n * p ), dim = c( n, p ) )
V   <- matrix( q:1, nrow = q )

# --- The answer you seek is a multi-dimensional array, such as this
X <- array( data = NA, dim = c( p, q, n, n ) )

# --- Take care of cases where i == j
zero_vector <- matrix( 0, ncol = q )
seq( n ) %>% walk( ~{ X[ , , .x, .x ] <<- zero_vector })

index <- matrix( c( 1:n ), n, n, byrow = TRUE )

# --- For index j, take everything above the diagonal
j <- index[ upper.tri( index, diag = FALSE ) ]

# --- For index i, build sequences 1, 12, 123, 1234, ...
2:( n - 1 ) %>%
  map( ~seq( from = 1, to = .x )) %>%
    unlist -> i

# --- Prepend the first i
i <- c( 1, i )

elements <- data.frame( i = i, j = j )

# --- The function that does the work
fij <- function( i, j ) ( ( x[ i, ] - x[ j, ] ) %o%  V )^2

# --- To calculate the results and store them in both upper and lower triangles:
walk2(
    elements$i                        # Take one index i
  , elements$j                        #  and one index j
  , ~{
          fij( .x, .y ) ->>           # Calculate the results for i,j
        X[ , , .x, .y ] ->>           # Assign the result for upper-triangle
        X[ , , .y, .x ]               # Assign the result for lower-triangle
     }
)