Home > Software engineering >  How to compute cosine for loop with function from *apply family?
How to compute cosine for loop with function from *apply family?

Time:09-20

I need convert this for loop into small because it's taking more time for 50k accounts and I need to improve performance by using a function from the *apply family.

a <- data.frame(a=c(1, 2, 2, 3), b=c(2, 3, 4, 5))
b <- data.frame(a=c(1), b=c(2))

##### **by using apply function I need convert this code**
library('lsa')
res <- c()
for (i in 1:nrow(a)) {
  cosinevalue <- cosine(as.numeric(a[i, ]), as.numeric(b))[1]
  res <- rbind(res, as.numeric(cosinevalue))
}

a$cosinevalue <- res

CodePudding user response:

Using vapply.

vapply(seq_len(nrow(a)), \(i) lsa::cosine(unlist(a[i, ]), unlist(b)), numeric(1))
# [1] 1.0000000 0.9922779 1.0000000 0.9970545

Or Vectorizeing it, which is three times faster.

Vectorize(lsa::cosine, vectorize.args='x')(as.data.frame(t(a)), unlist(b)) |> unname()
# [1] 1.0000000 0.9922779 1.0000000 0.9970545

CodePudding user response:

We can also use ´apply()`:

a <- data.frame(a=c(1,2,2,3),b=c(2,3,4,5))
b <- data.frame(a=c(1),b=c(2))

library(lsa)

a$cosinevalue <- apply(a, MARGIN = 1, FUN =  function(x) cosine(as.numeric(x), as.numeric(b))[1])

a 
#>   a b cosinevalue
#> 1 1 2   1.0000000
#> 2 2 3   0.9922779
#> 3 2 4   1.0000000
#> 4 3 5   0.9970545

You can alternatively rewrite your loop and predefine an output vector:

library(lsa)

out <- numeric(nrow(a))

for (i in 1:nrow(a)){
  out[i] <- cosine(as.numeric(a[i,]), as.numeric(b))[1]
}
a$cosinevalue <- out

a
#>   a b cosinevalue
#> 1 1 2   1.0000000
#> 2 2 3   0.9922779
#> 3 2 4   1.0000000
#> 4 3 5   0.9970545

Below is a benchmark between my two approaches and the two approaches by @jay.sf together with the original old loop. We can see that apply is fastest and the loops are slowest.

bench::mark(
  iterations = 100,
  check = FALSE,
  
  "apply" = apply(a, MARGIN = 1, FUN =  function(x) cosine(as.numeric(x), as.numeric(b))[1]),
  
  "vapply" =  vapply(seq_len(nrow(a)), \(i) cosine(unlist(a[i, ]), unlist(b)), numeric(1)),
  
  "vectorize" = Vectorize(cosine, vectorize.args = 'x')(as.data.frame(t(a)), unlist(b)) |> unname(),
  
  "for_loop_new"= {
    out <- numeric(nrow(a))
    for (i in seq_along(a)){
      out[i] <- cosine(as.numeric(a[i,]), as.numeric(b))[1]
    }
  },
  "for_loop_old" = {
    res <- c()
    for (i in 1:nrow(a)) {
      cosinevalue <- cosine(as.numeric(a[i, ]), as.numeric(b))[1]
      res <- rbind(res, as.numeric(cosinevalue))
    }
  }
)
#> # A tibble: 5 x 6
#>   expression        min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>   <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 apply         116.7us  120.7us     8157.   125.7KB      0  
#> 2 vapply        194.6us 199.85us     4923.    39.2KB      0  
#> 3 vectorize     293.7us  302.5us     3258.    48.8KB     32.9
#> 4 for_loop_new   3.48ms   3.56ms      263.    45.4KB     11.0
#> 5 for_loop_old   4.57ms   4.67ms      204.    29.6KB     13.0

Created on 2022-09-20 by the reprex package (v2.0.1)

  • Related