For Loop with multiple outputs-CodePudding

I have a data frame roughly like this:

dput(df)
structure(list(a = 1:9000, b = 1:9000, c = 1:9000, d = 1:9000, 
    e = 1:9000, f = 1:9000, g = 1:9000, h = 1:9000, i = 1:9000), class = "data.frame", row.names = c(NA, 
-9000L))

Edit: These are the exact values this was just to show rough dimension, and there are far more columns as well, the values don't just run 1:9000

To sample randomly & add in a column of row averages I have been using the following:

sample_1 <- sample(1:9000, 200, replace=F)
sampled_df_1 <- df[c(sample_1),]
sampled_df_1$Means_1 <- rowMeans(sampled_df_1)

I need to do this 100 times over, and then create a data frame of the means. I think I need to use a for loop for this as in:

for(i in 1:100){
sample_[i] <- sample(1:9000, 200, replace=F)
sampled_df_[i] <- df[c(sample_[i]),]
sampled_df_[i]$Means_[i] <- rowMeans(sampled_df_[i])}

but the [i] doesn't append the vector number. I have also tried {i} and ' i' Is this possible to do? I think assign(paste()) may be the key here but I am struggling with it And when I am past it is there an easy way to create a data frame of just the means columns without typing out all their names?

CodePudding user response：

It’ll be simpler to use sapply() which will do both the iteration and also combine the results.

set.seed(42)

df <- data.frame(a = rnorm(6), b = rnorm(6), c = rnorm(6))
df
#>            a           b          c
#> 1  1.3709584  1.51152200 -1.3888607
#> 2 -0.5646982 -0.09465904 -0.2787888
#> 3  0.3631284  2.01842371 -0.1333213
#> 4  0.6328626 -0.06271410  0.6359504
#> 5  0.4042683  1.30486965 -0.2842529
#> 6 -0.1061245  2.28664539 -2.6564554

set.seed(43)

N_REPEATS <- 4
N_ROWS_SAMPLED <- 3

sapply(seq_len(N_REPEATS), function(i) {
  rowMeans(df[sample(nrow(df), N_ROWS_SAMPLED), ])
})
#>         [,1]       [,2]       [,3]       [,4]
#> 4  0.4020330 -0.3127153  0.4749617 -0.1586448
#> 6 -0.1586448  0.4749617 -0.3127153  0.4978732
#> 1  0.4978732 -0.1586448  0.7494103  0.4020330

If you want to keep the sampled subsets, split this into two parts:

set.seed(43)

samples <- lapply(seq_len(N_REPEATS), function(i) {
  df[sample(nrow(df), N_ROWS_SAMPLED), ]
})

sapply(samples, rowMeans)
#>         [,1]       [,2]       [,3]       [,4]
#> 4  0.4020330 -0.3127153  0.4749617 -0.1586448
#> 6 -0.1586448  0.4749617 -0.3127153  0.4978732
#> 1  0.4978732 -0.1586448  0.7494103  0.4020330

CodePudding user response：

With a for()loop it can be done like this:

df <- structure(list(a = 1:9000, b = 1:9000, c = 1:9000, d = 1:9000, 
               e = 1:9000, f = 1:9000, g = 1:9000, h = 1:9000, i = 1:9000), class = "data.frame", row.names = c(NA, 
                                                                                                                -9000L))
nsims <- 100
sample_df_rows <- 200
Row <- seq(1:sample_df_rows)

output <- data.frame(Row)
for(i in 1:nsims){
  sample <- sample(1:9000, sampled_df_rows, replace=F)
  sample_df <- df[c(sample),]
  sample_df$Means <- rowMeans(sample_df)
  colnames(sample_df) <- c(letters[1:9],paste0("Means_",i))
  output <- cbind(output,sample_df[10])
}

output