Home > OS >  How can I improve the speed of this for loop code?
How can I improve the speed of this for loop code?

Time:11-27

How can I improve the speed of this for loop code?

y<-data.frame(f1=round(runif(100, 1,5)),
              f2=round(runif(100, 1,5)),
              f3=round(runif(100, 1,5)))
m <- nrow(y)
nr <- rownames(y) 
response <- matrix(NA, m, m, dimnames=list(nr, nr))
for(a in 1:m) for(b in 1:m) 
  response[a, b] <- all(y[a,]<=y[b,])
response

CodePudding user response:

Convert y to matrix and you can use apply functions to improve speed.

dat = as.matrix(y)
response2 = sapply(1:m, function(i) apply(dat, 1, function(x) all(x <= dat[i,])))

Benchmarking

y<-data.frame(f1=round(runif(20, 1,5)),
              f2=round(runif(20, 1,5)),
              f3=round(runif(20, 1,5)))
m <- nrow(y)
nr <- rownames(y) 

microbenchmark(f1 = {
    response <- matrix(NA, m, m, dimnames=list(nr, nr))
    for(a in 1:m) for(b in 1:m) 
        response[a, b] <- all(y[a,]<=y[b,])
},
f2 = {
    dat = as.matrix(y)
    response2 = sapply(1:m, function(i) apply(dat, 1, function(x) all(x <= dat[i,])))
})
# Unit: milliseconds
#  expr     min        lq      mean    median        uq     max neval
#    f1 49.1385 51.931652 53.443814 53.298102 54.684251 59.6634   100
#    f2  1.3325  1.455101  1.593198  1.524851  1.617151  4.3286   100

sum(response == response2)
# [1] 400

CodePudding user response:

You can try outer like below

lst <- asplit(y, 1)
outer(lst, lst, FUN = Vectorize(function(x, y) all(x <= y)))

CodePudding user response:

I agree with @d.b that converting the data to a matrix is the best first step, but using the matrixStats package you can also avoid the nested loops.

y<-data.frame(f1=round(runif(100, 1,5)),
              f2=round(runif(100, 1,5)),
              f3=round(runif(100, 1,5)))
m <- nrow(y)
nr <- rownames(y) 

microbenchmark::microbenchmark(f1 = {
  response <- matrix(NA, m, m, dimnames=list(nr, nr))
  for(a in 1:m) for(b in 1:m) 
    response[a, b] <- all(y[a,]<=y[b,])
},
f2 = {
  dat = as.matrix(y)
  response2 = sapply(1:m, function(i) apply(dat, 1, function(x) all(x <= dat[i,])))
},
f3 = {
  response3 <- matrix(NA, m, m, dimnames=list(nr, nr))
  z <- as.matrix(y)
  for (i in 1:m) {
    response3[i,] <- matrixStats::rowAlls(matrix(rep(z[i, ], m), nrow = m, ncol = 3, byrow = TRUE) <= z)  
  }
  
})
Unit: milliseconds
 expr       min         lq        mean     median         uq       max neval
   f1 1154.2246 1164.34475 1178.059824 1170.31180 1177.58840 1272.3578   100
   f2   24.4325   25.09795   27.147773   26.13300   29.82185   32.7160   100
   f3    3.6844    3.94145    4.104324    4.04175    4.13685    9.7601   100
identical(response, response3)
[1] TRUE
  •  Tags:  
  • r
  • Related