Poor Julia performance from R-CodePudding

I've been studying the idea of using Julia as a replacement for C as a secondary language to speed up R scripts. Therefore, I'm writing a short script to compare the performance of the three languages, using an R script as the main file reproduced below:

# Loading packages
library(Rcpp)
library(RcppArmadillo)
library(JuliaConnectoR)
library(microbenchmark)

# Generating data
set.seed(9392927)
N <- 1e3L
x1 <- matrix(rpois(N ^ 2L, lambda = 10L), N)
x2 <- matrix(rpois(N ^ 2L, lambda = 10L), N)

# Defining functions
mySolveR <- function(m) {
  return(solve(m))
}
myProdR <- function(m1, m2) {
  return(m1 %*% m2)
}
cppFunction("
  arma::mat mySolveCpp(arma::mat m) {
    return(inv(m));
  }",
  depends = "RcppArmadillo"
)
cppFunction("
  arma::mat myProdCpp(arma::mat m1, arma::mat m2) {
    return(m1 * m2);
  }",
  depends = "RcppArmadillo"
)
mySolveJulia <- juliaEval("
  function mySolveJulia(m)
    return(inv(m))
  end"
)
#> Starting Julia ...
myProdJulia <- juliaEval("
  function myProdJulia(m1, m2)
    return(m1 * m2)
  end"
)

# Benchmarking
reps <- 10L
print(
  microbenchmark(
      mySolveR(x1), mySolveCpp(x1), mySolveJulia(x1),
      times = reps
    )
)
#> Unit: milliseconds
#>              expr        min         lq      mean     median        uq
#>      mySolveR(x1)   41.53371   44.03872  186.4527   59.43063  465.0983
#>    mySolveCpp(x1)   49.42867   72.63049  240.0839   81.16933  440.1176
#>  mySolveJulia(x1) 3856.43147 3897.36013 5112.4448 5376.94133 6026.1369
#>        max neval cld
#>   528.8838    10  a 
#>   576.2336    10  a 
#>  6220.1093    10   b
print(
  microbenchmark(
    myProdR(x1, x2), myProdCpp(x1, x2), myProdJulia(x1, x2),
    times = reps
  )
)
#> Unit: milliseconds
#>                 expr        min         lq       mean     median         uq
#>      myProdR(x1, x2)   20.10827   23.50031   27.69398   25.18961   32.51614
#>    myProdCpp(x1, x2)   25.59671   29.59619   38.32624   31.83016   49.33401
#>  myProdJulia(x1, x2) 5698.10212 6135.62034 6353.71632 6412.39110 6487.12516
#>         max neval cld
#>    39.89406    10  a 
#>    57.82850    10  a 
#>  7243.81362    10   b

^{Created on 2022-12-14 with reprex v2.0.2}

At this stage, I am not worried about which language performs best, I even suppose such simple functions would perform at the same order of magnitude, but I am surprised that Julia is performing so poorly.

My only guesses have to do with unnecessary recompilation of code or of Julia instance launches, but using startJuliaServer()/stopJulia() doesn't help, and neither does isolating the functions in their own source files. What am I missing?

CodePudding user response：

I have not used JuliaConnectoR, but I tested JuliaCall:

Setup (maybe things can be written in a cleaner way - I just checked JuliaCall manual; you can check sources of example packages listed here to see more carefully designed code):

library(JuliaCall)
library(microbenchmark)
julia <- julia_setup()
set.seed(9392927)
N <- 1e3L
x1 <- matrix(rpois(N ^ 2L, lambda = 10L), N)
x2 <- matrix(rpois(N ^ 2L, lambda = 10L), N)
mySolveR <- function(m) {
  return(solve(m))
}
myProdR <- function(m1, m2) {
  return(m1 %*% m2)
}
julia_eval("
function mySolveJulia(m)
      return(inv(m))
end")
mySolveJulia <- function(m) {
    julia_call("mySolveJulia", m)
}

julia_eval("
function myProdJulia(m1, m2)
    return(m1 * m2)
end")
myProdJulia <- function(m1, m2) {
    julia_call("myProdJulia", m1, m2)
}

And benchmark:

> reps <- 10L
> print(
    microbenchmark(
        mySolveR(x1), mySolveJulia(x1),
        times = reps
      )
  )
Unit: milliseconds
             expr      min       lq      mean    median       uq      max neval
     mySolveR(x1) 516.1157 517.4391 520.98363 520.60125 522.0257 531.6278    10
 mySolveJulia(x1)  50.8057  52.9256  73.29882  55.88265  67.5247 167.2614    10
> print(
    microbenchmark(
      myProdR(x1, x2), myProdJulia(x1, x2),
      times = reps
    )
  )
Unit: milliseconds
                expr      min       lq    mean   median       uq      max neval
     myProdR(x1, x2) 499.3371 509.4347 526.256 523.8693 532.5146 562.5851    10
 myProdJulia(x1, x2) 331.7332 332.5554 354.807 337.0199 351.8239 492.5586    10

Surprisingly, I have significantly higher timings for R than you (but maybe this is a difference in machines on which the test is run).

Also if you want to see how much time is spent in data transport here are benchmarks in Julia:

julia> using Distributions

julia> using BenchmarkTools

julia> x1 = rand(Poisson(10), 1000, 1000);

julia> x2 = rand(Poisson(10), 1000, 1000);

julia> @benchmark inv($x1)
BenchmarkTools.Trial: 168 samples with 1 evaluation.
 Range (min … max):  25.273 ms … 55.200 ms  ┊ GC (min … max): 0.00% … 9.74%
 Time  (median):     28.539 ms              ┊ GC (median):    0.00%
 Time  (mean ± σ):   29.815 ms ±  4.172 ms  ┊ GC (mean ± σ):  2.40% ± 3.90%

     ▃█▂▃▁▂ ▄▆▁▄  ▂
  ▃▁███████▇████▇▄█▅▅█▃▃▁▁▅▄▄▁▁▁▄▃▄▅▃▁▃▁▁▃▃▇▁▁▁▃▁▃▁▄▁▁▁▁▃▁▃▁▃ ▃
  25.3 ms         Histogram: frequency by time        42.1 ms <

 Memory estimate: 15.76 MiB, allocs estimate: 7.

julia> @benchmark $x1 * $x2
BenchmarkTools.Trial: 17 samples with 1 evaluation.
 Range (min … max):  293.104 ms … 321.683 ms  ┊ GC (min … max): 0.00% … 0.00%
 Time  (median):     309.639 ms               ┊ GC (median):    0.00%
 Time  (mean ± σ):   309.618 ms ±   8.177 ms  ┊ GC (mean ± σ):  0.11% ± 0.31%

  ▁            ▁   ▁  ▁▁     ▁    ▁ ▁█ ▁    ▁        ▁ ▁   ▁▁ ▁
  █▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁█▁▁██▁▁▁▁▁█▁▁▁▁█▁██▁█▁▁▁▁█▁▁▁▁▁▁▁▁█▁█▁▁▁██▁█ ▁
  293 ms           Histogram: frequency by time          322 ms <

 Memory estimate: 7.66 MiB, allocs estimate: 5.

CodePudding user response：

With your code you restart Julia at each call. Below is how I would do. But I don't get a better timing with Julia, contrary to @BogumilKaminksi. I don't know why.

library(JuliaConnectoR)

juliaModule <- "
module myJuliaModule
export mySolveJulia
export myProdJulia

function mySolveJulia(m)
  return inv(m)
end

function myProdJulia(m1, m2)
  return m1 * m2
end

end
"

. <- juliaEval(juliaModule)
myJuliaModule <- juliaImport(".myJuliaModule", all = FALSE)
mySolveJulia <- myJuliaModule$mySolveJulia

mySolveR <- function(x) solve(x)
set.seed(9392927)
N <- 1e3L
x1 <- matrix(rpois(N ^ 2L, lambda = 10L), N)
x2 <- matrix(rpois(N ^ 2L, lambda = 10L), N)

reps <- 10L
microbenchmark::microbenchmark(
  mySolveR(x1), 
  mySolveJulia(x1),
  times = reps
)

stopJulia()