I need convert this for loop into small because it's taking more time for 50k accounts and I need to improve performance by using a function from the *apply family.
a <- data.frame(a=c(1, 2, 2, 3), b=c(2, 3, 4, 5))
b <- data.frame(a=c(1), b=c(2))
##### **by using apply function I need convert this code**
library('lsa')
res <- c()
for (i in 1:nrow(a)) {
cosinevalue <- cosine(as.numeric(a[i, ]), as.numeric(b))[1]
res <- rbind(res, as.numeric(cosinevalue))
}
a$cosinevalue <- res
CodePudding user response:
Using vapply
.
vapply(seq_len(nrow(a)), \(i) lsa::cosine(unlist(a[i, ]), unlist(b)), numeric(1))
# [1] 1.0000000 0.9922779 1.0000000 0.9970545
Or Vectorize
ing it, which is three times faster.
Vectorize(lsa::cosine, vectorize.args='x')(as.data.frame(t(a)), unlist(b)) |> unname()
# [1] 1.0000000 0.9922779 1.0000000 0.9970545
CodePudding user response:
We can also use ´apply()`:
a <- data.frame(a=c(1,2,2,3),b=c(2,3,4,5))
b <- data.frame(a=c(1),b=c(2))
library(lsa)
a$cosinevalue <- apply(a, MARGIN = 1, FUN = function(x) cosine(as.numeric(x), as.numeric(b))[1])
a
#> a b cosinevalue
#> 1 1 2 1.0000000
#> 2 2 3 0.9922779
#> 3 2 4 1.0000000
#> 4 3 5 0.9970545
You can alternatively rewrite your loop and predefine an output vector:
library(lsa)
out <- numeric(nrow(a))
for (i in 1:nrow(a)){
out[i] <- cosine(as.numeric(a[i,]), as.numeric(b))[1]
}
a$cosinevalue <- out
a
#> a b cosinevalue
#> 1 1 2 1.0000000
#> 2 2 3 0.9922779
#> 3 2 4 1.0000000
#> 4 3 5 0.9970545
Below is a benchmark between my two approaches and the two approaches by @jay.sf together with the original old loop. We can see that apply
is fastest and the loops are slowest.
bench::mark(
iterations = 100,
check = FALSE,
"apply" = apply(a, MARGIN = 1, FUN = function(x) cosine(as.numeric(x), as.numeric(b))[1]),
"vapply" = vapply(seq_len(nrow(a)), \(i) cosine(unlist(a[i, ]), unlist(b)), numeric(1)),
"vectorize" = Vectorize(cosine, vectorize.args = 'x')(as.data.frame(t(a)), unlist(b)) |> unname(),
"for_loop_new"= {
out <- numeric(nrow(a))
for (i in seq_along(a)){
out[i] <- cosine(as.numeric(a[i,]), as.numeric(b))[1]
}
},
"for_loop_old" = {
res <- c()
for (i in 1:nrow(a)) {
cosinevalue <- cosine(as.numeric(a[i, ]), as.numeric(b))[1]
res <- rbind(res, as.numeric(cosinevalue))
}
}
)
#> # A tibble: 5 x 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 apply 116.7us 120.7us 8157. 125.7KB 0
#> 2 vapply 194.6us 199.85us 4923. 39.2KB 0
#> 3 vectorize 293.7us 302.5us 3258. 48.8KB 32.9
#> 4 for_loop_new 3.48ms 3.56ms 263. 45.4KB 11.0
#> 5 for_loop_old 4.57ms 4.67ms 204. 29.6KB 13.0
Created on 2022-09-20 by the reprex package (v2.0.1)