Home > OS >  How to store only the first occurrence in a for loop?
How to store only the first occurrence in a for loop?

Time:07-11

I tried to code k as a cumulatively increasing variable until it reaches 15. j is supposed to store the point when k exceeds 10 for the first time. However, obviously, j is going to store all the occurrences instead which is not desirable for my purpose.

set.seed(324)
j.ind <- 1
j <- numeric()
k <- numeric()
init.k <- 0
for (i in 1:50){
  k[i] <- init.k   abs(rnorm(1,0,1))
  init.k <- k[i]
  if (k[i] >= 10){
    j[j.ind] <- i
    j.ind <- j.ind   1
  }
  init.k <- ifelse(init.k >= 15, 0, init.k)
}

Can you suggest a way to do this which will not slow down the loop significantly as the original problem is rather huge? For this example with the given seed, j is supposed to be 17 and 36

CodePudding user response:

This is my own solution but I am expecting a better one.

set.seed(324)
j.ind <- 1
j <- numeric()
k <- numeric()
init.k <- 0
j.flag <- FALSE
for (i in 1:50){
  k[i] <- init.k   abs(rnorm(1,0,1))
  init.k <- k[i]
  if (k[i] >= 10 && j.flag == FALSE){
    j[j.ind] <- i
    j.ind <- j.ind   1
    j.flag <- TRUE
  }
  if (k[i] >= 15){
    init.k <- 0
    j.flag <- FALSE
  }
}

CodePudding user response:

Here you have how I would do the search

# Set seed
  set.seed(324)
# Number of simulated values
  n <- 50
# Specific values (cummulative sum)
  v1 <- 10;  v2 <- 15
  
# Generate all the values for k
  k <- abs(rnorm(n,0,1))
# Cummulative sum of k vector's values
  k.cum <- cumsum(k)
  
# How many indices are we looking for? [Excluding the first one]
  n.times <- floor((k.cum[n]-v1)/v2)
# Values to look for at k.cum
  v <- c(v1,v1  v2*(1:n.times))
# Vector to save the indices
  ind <- vector()
# Indices
  for (i in 1:length(v)){
    ind[i] <-min(which(k.cum>=v[i])) 
  }

CodePudding user response:

Check k of previous iteration, of course only if it exists, i.e. length > 1. Watch out better if integer or double.

set.seed(324)
j.ind <- 1L
j <- integer()
k <- numeric()
init.k <- 0

for (i in 1:50) {
  k[i] <- init.k   abs(rnorm(1, 0, 1))
  init.k <- k[i]
  if (k[i] >= 10 && length(k) > 1L && !k[i - 1L] >= 10) {
    j[j.ind] <- i
    j.ind <- j.ind   1L
  }
  if (init.k >= 15) {
    init.k <- 0
  }
}

j
# [1] 17 36

You could also check this repeat loop, I've implemented your flag idea.

set.seed(324)
k <- 0
i <- 1L
j <- numeric()
flag <- FALSE
max.it <- 50L

repeat {
  k <- k   abs(rnorm(1, 0, 1))
  if (k >= 10 && !flag) {
    j <- c(j, i)
    flag <- TRUE
  } 
  if (k < 10) {
    flag <- FALSE
  }
  i <- i   1L
  if (k >= 15) k <- 0
  if (i >= max.it) break
}

j
# [1] 17 36

Benchmark

According to the benchmark with max.it=1e4 it turns out that the repeat loop is fastest.

Unit: milliseconds
                expr      min       lq     mean   median       uq      max neval cld
     op_loop(max_it) 38.00183 38.48694 42.96273 40.08470 45.02735 56.96728    10   b
    for_loop(max_it) 37.93006 38.29678 42.24728 41.85526 43.72171 50.32624    10  ab
 repeat_loop(max_it) 33.89751 34.60217 37.20940 37.18443 38.80353 43.27860    10  a 
  cumsum_rle(max_it) 44.37103 45.63304 47.55966 46.58231 49.92746 54.36341    10   b

Code:

op_loop <- \(max.it=max_it) {
  set.seed(324)
  j.ind <- 1
  j <- numeric()
  k <- numeric()
  init.k <- 0
  j.flag <- FALSE
  for (i in 1:max.it){
    k[i] <- init.k   abs(rnorm(1,0,1))
    init.k <- k[i]
    if (k[i] >= 10 && j.flag == FALSE){
      j[j.ind] <- i
      j.ind <- j.ind   1
      j.flag <- TRUE
    }
    if (k[i] >= 15){
      init.k <- 0
      j.flag <- FALSE
    }
  }
  j
}

repeat_loop <- \(max.it=max_it) {
  set.seed(324); k <- 0; i <- 1L; j <- numeric(); flag <- FALSE
  repeat {
    k <- k   abs(rnorm(1, 0, 1))
    if (k >= 10 && !flag) {
      j <- c(j, i)
      flag <- TRUE
    } 
    if (k < 10) {
      flag <- FALSE
    }
    i <- i   1L
    if (k >= 15) k <- 0
    if (i >= max.it) break
  }
  j
}

cumsum_rle <- \(max.it=max_it) {
  set.seed(324)
  k.vectorized <- abs(rnorm(max.it,0,1))
  k.cumsum <- Reduce(f=function(x,y) {ifelse(x y>=15,0,x y)} ,x=k.vectorized,init=0,accumulate = T)
  r <- rle(k.cumsum>=10)
  cumsum(r$length)[which(r$values)-1]
}


stopifnot(all.equal(op_loop(100), for_loop(100)) &
            all.equal(op_loop(100), repeat_loop(100)) &
            all.equal(op_loop(100), cumsum_rle(100)))

max_it <- 1e4
microbenchmark::microbenchmark(
  op_loop(max_it), for_loop(max_it), repeat_loop(max_it), cumsum_rle(max_it), times=10L
)

CodePudding user response:

You could use Reduce with option accumulate=TRUE, in combination with rle:

set.seed(324)
k.vectorized <- abs(rnorm(50,0,1))

# cumsum with reinit for k>=15
k.cumsum <- Reduce(f=function(x,y) {ifelse(x y>=15,0,x y)} ,x=k.vectorized,init=0,accumulate = T)

# extract intervals above 10
r <- rle(k.cumsum>=10)

# output result
cumsum(r$length)[which(r$values)-1]
#> [1] 17 36
  • Related