How to add both rows and columns to a for loop in R?-CodePudding

I am trying to create a null bootstrap in R. The dataset has four species and associated sample values. I want to calculate the mean and a special poisson function, sampling 5 values at a time from the distribution associated with each species. Here is my attempt at the code. I sampled 5 and tried to use summarise to calculate the mean and poisson function from those five values. I get an error that says ! New rows can't add columns. Any suggestions on how to correct this to get the desrired output (attached).

set.seed(111)
library(truncnorm)
sample <- rtruncnorm(n = 1440,a = 0,b = 10,mean = 5,sd = 2)
sp <- rep(c("A","B","C","D"), each = 360)

df <- data.frame(sample, sp)

output <- tibble(mean.set = numeric(), 
                 poisson.set = numeric(), 
                 sp = character(),
                 set = numeric()

set.seed(42)                    
for(i in 1:1440){
  samp1 <- df %>% filter(sp == 'A') %>% sample_n(5, replace = TRUE) %>% summarise(mean.set = mean(sample, na.rm=TRUE), possion.set = ((var(sample, na.rm=TRUE)/ mean(sample, na.rm=TRUE)^2) - (1/mean(sample, na.rm=TRUE)))) %>% mutate(set = i)
  samp2 <- df %>% filter(sp == 'B') %>% sample_n(5, replace = TRUE) %>% summarise(mean.set = mean(sample, na.rm=TRUE), possion.set = ((var(sample, na.rm=TRUE)/ mean(sample, na.rm=TRUE)^2) - (1/mean(sample, na.rm=TRUE))))%>% mutate(set = i)
  samp3 <- df %>% filter(sp == 'C') %>% sample_n(5, replace = TRUE) %>% summarise(mean.set = mean(sample, na.rm=TRUE), possion.set = ((var(sample, na.rm=TRUE)/ mean(sample, na.rm=TRUE)^2) - (1/mean(sample, na.rm=TRUE))))%>% mutate(set = i)
  samp4 <- df %>% filter(sp == 'D') %>% sample_n(5, replace = TRUE) %>% summarise(mean.set = mean(sample, na.rm=TRUE), possion.set = ((var(sample, na.rm=TRUE)/ mean(sample, na.rm=TRUE)^2) - (1/mean(sample, na.rm=TRUE))))%>% mutate(set = i)
  output %>% add_row(bind_rows(samp1, samp2, samp3, samp4))  -> output
}


Error:
! New rows can't add columns.
✖ Can't find columns `possion.set` and `set` in `.data`.
Run `rlang::last_error()` to see where the error occurred

#Expected output
 set mean.set poisson.set sp
  1    5          2        A
  2    4          9        A
  ....
  48   12          0        A 
  1    5          2        B
  2    4          9        B
  ....
  48   22          0        B  
   .....

CodePudding user response：

The issue with error is based on the difference in column name in 'output' i.e. poisson.set and in the for loop possion.set. By correcting either one of them, it works (In addition, sp was not created in the summarise resulting in NA)

output <- tibble(mean.set = numeric(), 
                   possion.set = numeric(), 
                   sp = character(),
                   set = numeric()
  
  )
> set.seed(42)                    
> for(i in 1:1440){
    samp1 <- df %>% filter(sp == 'A') %>% sample_n(5, replace = TRUE) %>% summarise(sp = "A", mean.set = mean(sample, na.rm=TRUE), possion.set = ((var(sample, na.rm=TRUE)/ mean(sample, na.rm=TRUE)^2) - (1/mean(sample, na.rm=TRUE)))) %>% mutate(set = i)
    samp2 <- df %>% filter(sp == 'B') %>% sample_n(5, replace = TRUE) %>% summarise(sp = "B", mean.set = mean(sample, na.rm=TRUE), possion.set = ((var(sample, na.rm=TRUE)/ mean(sample, na.rm=TRUE)^2) - (1/mean(sample, na.rm=TRUE))))%>% mutate(set = i)
    samp3 <- df %>% filter(sp == 'C') %>% sample_n(5, replace = TRUE) %>% summarise(sp = "C", mean.set = mean(sample, na.rm=TRUE), possion.set = ((var(sample, na.rm=TRUE)/ mean(sample, na.rm=TRUE)^2) - (1/mean(sample, na.rm=TRUE))))%>% mutate(set = i)
    samp4 <- df %>% filter(sp == 'D') %>% sample_n(5, replace = TRUE) %>% summarise(sp = "D", mean.set = mean(sample, na.rm=TRUE), possion.set = ((var(sample, na.rm=TRUE)/ mean(sample, na.rm=TRUE)^2) - (1/mean(sample, na.rm=TRUE))))%>% mutate(set = i)
    output %>% add_row(bind_rows(samp1, B = samp2,samp3, samp4))  -> output
  }

In addition, we could do this without a for loop as well, i.e. group_by the 'sp' column and use replicate/rerun for multiple iterations

library(dplyr)
library(purrr)
output2 <- rerun(1440, df %>%
           group_by(sp) %>%
           slice_sample(n = 5, replace = TRUE) %>% 
           summarise(mean.set = mean(sample, na.rm=TRUE), 
           possion.set = ((var(sample, na.rm=TRUE)/ 
     mean(sample, na.rm=TRUE)^2) - (1/mean(sample, na.rm=TRUE))))) %>% 
    bind_rows(.id = 'set')