I tried to code k
as a cumulatively increasing variable until it reaches 15
. j
is supposed to store the point when k
exceeds 10
for the first time. However, obviously, j
is going to store all the occurrences instead which is not desirable for my purpose.
set.seed(324)
j.ind <- 1
j <- numeric()
k <- numeric()
init.k <- 0
for (i in 1:50){
k[i] <- init.k abs(rnorm(1,0,1))
init.k <- k[i]
if (k[i] >= 10){
j[j.ind] <- i
j.ind <- j.ind 1
}
init.k <- ifelse(init.k >= 15, 0, init.k)
}
Can you suggest a way to do this which will not slow down the loop significantly as the original problem is rather huge? For this example with the given seed, j
is supposed to be 17
and 36
CodePudding user response:
This is my own solution but I am expecting a better one.
set.seed(324)
j.ind <- 1
j <- numeric()
k <- numeric()
init.k <- 0
j.flag <- FALSE
for (i in 1:50){
k[i] <- init.k abs(rnorm(1,0,1))
init.k <- k[i]
if (k[i] >= 10 && j.flag == FALSE){
j[j.ind] <- i
j.ind <- j.ind 1
j.flag <- TRUE
}
if (k[i] >= 15){
init.k <- 0
j.flag <- FALSE
}
}
CodePudding user response:
Here you have how I would do the search
# Set seed
set.seed(324)
# Number of simulated values
n <- 50
# Specific values (cummulative sum)
v1 <- 10; v2 <- 15
# Generate all the values for k
k <- abs(rnorm(n,0,1))
# Cummulative sum of k vector's values
k.cum <- cumsum(k)
# How many indices are we looking for? [Excluding the first one]
n.times <- floor((k.cum[n]-v1)/v2)
# Values to look for at k.cum
v <- c(v1,v1 v2*(1:n.times))
# Vector to save the indices
ind <- vector()
# Indices
for (i in 1:length(v)){
ind[i] <-min(which(k.cum>=v[i]))
}
CodePudding user response:
Check k
of previous iteration, of course only if it exists, i.e. length > 1
. Watch out better if integer or double.
set.seed(324)
j.ind <- 1L
j <- integer()
k <- numeric()
init.k <- 0
for (i in 1:50) {
k[i] <- init.k abs(rnorm(1, 0, 1))
init.k <- k[i]
if (k[i] >= 10 && length(k) > 1L && !k[i - 1L] >= 10) {
j[j.ind] <- i
j.ind <- j.ind 1L
}
if (init.k >= 15) {
init.k <- 0
}
}
j
# [1] 17 36
You could also check this repeat
loop, I've implemented your flag
idea.
set.seed(324)
k <- 0
i <- 1L
j <- numeric()
flag <- FALSE
max.it <- 50L
repeat {
k <- k abs(rnorm(1, 0, 1))
if (k >= 10 && !flag) {
j <- c(j, i)
flag <- TRUE
}
if (k < 10) {
flag <- FALSE
}
i <- i 1L
if (k >= 15) k <- 0
if (i >= max.it) break
}
j
# [1] 17 36
Benchmark
According to the benchmark with max.it=1e4 it turns out that the repeat
loop is fastest.
Unit: milliseconds
expr min lq mean median uq max neval cld
op_loop(max_it) 38.00183 38.48694 42.96273 40.08470 45.02735 56.96728 10 b
for_loop(max_it) 37.93006 38.29678 42.24728 41.85526 43.72171 50.32624 10 ab
repeat_loop(max_it) 33.89751 34.60217 37.20940 37.18443 38.80353 43.27860 10 a
cumsum_rle(max_it) 44.37103 45.63304 47.55966 46.58231 49.92746 54.36341 10 b
Code:
op_loop <- \(max.it=max_it) {
set.seed(324)
j.ind <- 1
j <- numeric()
k <- numeric()
init.k <- 0
j.flag <- FALSE
for (i in 1:max.it){
k[i] <- init.k abs(rnorm(1,0,1))
init.k <- k[i]
if (k[i] >= 10 && j.flag == FALSE){
j[j.ind] <- i
j.ind <- j.ind 1
j.flag <- TRUE
}
if (k[i] >= 15){
init.k <- 0
j.flag <- FALSE
}
}
j
}
repeat_loop <- \(max.it=max_it) {
set.seed(324); k <- 0; i <- 1L; j <- numeric(); flag <- FALSE
repeat {
k <- k abs(rnorm(1, 0, 1))
if (k >= 10 && !flag) {
j <- c(j, i)
flag <- TRUE
}
if (k < 10) {
flag <- FALSE
}
i <- i 1L
if (k >= 15) k <- 0
if (i >= max.it) break
}
j
}
cumsum_rle <- \(max.it=max_it) {
set.seed(324)
k.vectorized <- abs(rnorm(max.it,0,1))
k.cumsum <- Reduce(f=function(x,y) {ifelse(x y>=15,0,x y)} ,x=k.vectorized,init=0,accumulate = T)
r <- rle(k.cumsum>=10)
cumsum(r$length)[which(r$values)-1]
}
stopifnot(all.equal(op_loop(100), for_loop(100)) &
all.equal(op_loop(100), repeat_loop(100)) &
all.equal(op_loop(100), cumsum_rle(100)))
max_it <- 1e4
microbenchmark::microbenchmark(
op_loop(max_it), for_loop(max_it), repeat_loop(max_it), cumsum_rle(max_it), times=10L
)
CodePudding user response:
You could use Reduce
with option accumulate=TRUE
, in combination with rle
:
set.seed(324)
k.vectorized <- abs(rnorm(50,0,1))
# cumsum with reinit for k>=15
k.cumsum <- Reduce(f=function(x,y) {ifelse(x y>=15,0,x y)} ,x=k.vectorized,init=0,accumulate = T)
# extract intervals above 10
r <- rle(k.cumsum>=10)
# output result
cumsum(r$length)[which(r$values)-1]
#> [1] 17 36