I am trying to generate 1000 random samples and need to put if the p-value of the test for each sample is larger than alpha in reject_collect object and if the true mean of 10 is in the CI of each sample generated. My objects currently only have 1 value. Not sure how to fix it.
reject_collect <- NULL
CI_include_collect <- NULL
for ( i in c(1:1000)) {
random_vector_index <- rnorm( 50, mean = 10, sd = 2)
alpha <- 0.05
mean(random_vector_index)
test_results_index <- t.test(random_vector_index, mu=10,
alternative = "two.sided",
conf.level = 0.95)
test_results_index$p.value
reject_collect <- test_results_index$p.value < alpha
CI_include_collect <- between(10,
test_results_index$conf.int[1],
test_results_index$conf.int[2])
}
CodePudding user response:
@margusl showed you how to index correctly in the loop. Here is alternative with no loop:
library(tidyverse)
tibble(replicate = 1:1000) |>
mutate(random_vector_index = map(replicate, \(x) {
set.seed(x)
rnorm( 50, mean = 10, sd = 2)
}),
test_results_index = map(random_vector_index,
\(x) t.test(x,
mu=10,
alternative = "two.sided",
conf.level = 0.95)),
p.value = map_dbl(test_results_index, \(x) x$p.value),
reject_collect = map_lgl(p.value, \(x) x < 0.05),
CI_include_collect = map_lgl(test_results_index,
\(x) between(10,
x$conf.int[1],
x$conf.int[2]))) |>
select(replicate, p.value, reject_collect, reject_collect, CI_include_collect)
#> # A tibble: 1,000 x 4
#> replicate p.value reject_collect CI_include_collect
#> <int> <dbl> <lgl> <lgl>
#> 1 1 0.397 FALSE TRUE
#> 2 2 0.667 FALSE TRUE
#> 3 3 0.614 FALSE TRUE
#> 4 4 0.0767 FALSE TRUE
#> 5 5 0.669 FALSE TRUE
#> 6 6 0.562 FALSE TRUE
#> 7 7 0.101 FALSE TRUE
#> 8 8 0.678 FALSE TRUE
#> 9 9 0.503 FALSE TRUE
#> 10 10 0.00758 TRUE FALSE
#> # ... with 990 more rows
CodePudding user response:
Your loop is overwriting those same objects (reject_collect
& CI_include_collect
) on each iteration. For this approach, you should first create vectors (or lists) with the desired lengths and on each iteration store your results at a specific index:
runs <- 1000
reject_collect <- vector(length = runs)
CI_include_collect <- vector(length = runs)
for (i in c(1:runs)) {
random_vector_index <- rnorm(50, mean = 10, sd = 2)
alpha <- 0.05
mean(random_vector_index)
test_results_index <- t.test(random_vector_index,
mu = 10,
alternative = "two.sided",
conf.level = 0.95
)
test_results_index$p.value
reject_collect[i] <- test_results_index$p.value < alpha
CI_include_collect[i] <- between(
10,
test_results_index$conf.int[1],
test_results_index$conf.int[2]
)
}
Alternatively you could consider something like this where apply collects all results into a list that can be later converted into dataframe, for example:
alpha <- 0.05
result <- lapply(1:1000, function(i){
random_vector_index <- rnorm( 50, mean = 10, sd = 2)
test_results_index <- t.test(random_vector_index, mu=10, alternative = "two.sided", conf.level = 0.95)
return(list(
"mean" = mean(random_vector_index),
"p" = test_results_index$p.value,
"reject" = test_results_index$p.value < alpha,
"CI_include" = dplyr::between(10, test_results_index$conf.int[1], test_results_index$conf.int[2])))
})
do.call(rbind.data.frame, result)
#> mean p reject CI_include
#> 1 10.311684 0.2765610597 FALSE TRUE
#> 2 10.183697 0.4474821536 FALSE TRUE
#> 3 10.041988 0.8781323640 FALSE TRUE
#> 4 9.700637 0.3162029238 FALSE TRUE
#> 5 10.335990 0.2313467109 FALSE TRUE
#> ...
#> 39 10.203395 0.4525939583 FALSE TRUE
#> 40 9.299052 0.0437908734 TRUE FALSE
#> 41 9.994436 0.9817983485 FALSE TRUE
#> ...