Home > Software engineering >  generating 1000 random samples using for loop
generating 1000 random samples using for loop

Time:11-09

I am trying to generate 1000 random samples and need to put if the p-value of the test for each sample is larger than alpha in reject_collect object and if the true mean of 10 is in the CI of each sample generated. My objects currently only have 1 value. Not sure how to fix it.

reject_collect <- NULL
 CI_include_collect <- NULL

 for ( i in c(1:1000)) { 
   random_vector_index <- rnorm( 50, mean = 10, sd = 2)
   alpha <- 0.05
   mean(random_vector_index)
   test_results_index <- t.test(random_vector_index, mu=10, 
                                alternative = "two.sided", 
                                conf.level = 0.95)
   test_results_index$p.value

   reject_collect <- test_results_index$p.value < alpha

   CI_include_collect <- between(10, 
                                 test_results_index$conf.int[1], 
                                 test_results_index$conf.int[2])
}

CodePudding user response:

@margusl showed you how to index correctly in the loop. Here is alternative with no loop:

library(tidyverse)
 
 tibble(replicate = 1:1000) |>
   mutate(random_vector_index = map(replicate, \(x) {
     set.seed(x)
     rnorm( 50, mean = 10, sd = 2)
     }),
     test_results_index = map(random_vector_index, 
                                   \(x) t.test(x,
                                               mu=10,
                                               alternative = "two.sided",
                                               conf.level = 0.95)),
     p.value = map_dbl(test_results_index, \(x) x$p.value),
     reject_collect = map_lgl(p.value, \(x) x < 0.05),
     CI_include_collect = map_lgl(test_results_index, 
                              \(x) between(10,
                                           x$conf.int[1],
                                           x$conf.int[2]))) |>
   select(replicate, p.value, reject_collect, reject_collect, CI_include_collect)
#> # A tibble: 1,000 x 4
#>    replicate p.value reject_collect CI_include_collect
#>        <int>   <dbl> <lgl>          <lgl>             
#>  1         1 0.397   FALSE          TRUE              
#>  2         2 0.667   FALSE          TRUE              
#>  3         3 0.614   FALSE          TRUE              
#>  4         4 0.0767  FALSE          TRUE              
#>  5         5 0.669   FALSE          TRUE              
#>  6         6 0.562   FALSE          TRUE              
#>  7         7 0.101   FALSE          TRUE              
#>  8         8 0.678   FALSE          TRUE              
#>  9         9 0.503   FALSE          TRUE              
#> 10        10 0.00758 TRUE           FALSE             
#> # ... with 990 more rows

CodePudding user response:

Your loop is overwriting those same objects (reject_collect & CI_include_collect ) on each iteration. For this approach, you should first create vectors (or lists) with the desired lengths and on each iteration store your results at a specific index:

runs <- 1000
reject_collect <- vector(length = runs)
CI_include_collect <- vector(length = runs)

for (i in c(1:runs)) {
  random_vector_index <- rnorm(50, mean = 10, sd = 2)
  alpha <- 0.05
  mean(random_vector_index)
  test_results_index <- t.test(random_vector_index,
    mu = 10,
    alternative = "two.sided",
    conf.level = 0.95
  )
  test_results_index$p.value

  reject_collect[i] <- test_results_index$p.value < alpha

  CI_include_collect[i] <- between(
    10,
    test_results_index$conf.int[1],
    test_results_index$conf.int[2]
  )
}

Alternatively you could consider something like this where apply collects all results into a list that can be later converted into dataframe, for example:

alpha <- 0.05
result <- lapply(1:1000, function(i){
  random_vector_index <- rnorm( 50, mean = 10, sd = 2)
  test_results_index <- t.test(random_vector_index, mu=10, alternative = "two.sided", conf.level = 0.95)
  return(list(
    "mean" = mean(random_vector_index),
    "p" = test_results_index$p.value,
    "reject" = test_results_index$p.value < alpha,
    "CI_include" =  dplyr::between(10, test_results_index$conf.int[1], test_results_index$conf.int[2])))
})

do.call(rbind.data.frame, result)
#>           mean            p reject CI_include
#> 1    10.311684 0.2765610597  FALSE       TRUE
#> 2    10.183697 0.4474821536  FALSE       TRUE
#> 3    10.041988 0.8781323640  FALSE       TRUE
#> 4     9.700637 0.3162029238  FALSE       TRUE
#> 5    10.335990 0.2313467109  FALSE       TRUE
#> ...
#> 39   10.203395 0.4525939583  FALSE       TRUE
#> 40    9.299052 0.0437908734   TRUE      FALSE
#> 41    9.994436 0.9817983485  FALSE       TRUE
#> ...
  • Related