Home > Net >  Creating multiple boxplots from for loop output
Creating multiple boxplots from for loop output

Time:11-18

In this dataframe are a list of sample sites and the resulting population parameter estimates for each species that was caught within each site. I am trying to run a for loop that randomly selects 5 sample sites 100 times, stores those sites into an object, and then obtain boxplots that consider all the values of one species across all 100 iterations.

So in the final out put there would be a boxplot for Species 1, 2, 3, 4, 5- with the values that make up each of those boxplots coming from all 100 draws of sample sites.

Dataframe:

homer_cpue_wide<- structure(list(sample_site = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
), species_1= c(75, 38.4204909284952, 77.634011090573, 82.1917808219178, 
119.341563786008, 22.5422667501565, 155.275381552754, 81.1332904056665, 
37.037037037037, 73.2824427480916, 71.608040201005, 208.806818181818, 
116.504854368932, 119.775421085465, 104.408352668213, 117.391304347826, 
12.0603015075377, 93.5593220338983, 166.795366795367, 20, 91.566265060241, 
70.8860759493671, NA, 44.8765893792072, NA, 3.96563119629874), 
    species_2= c(NA, 6.4034151547492, 11.090573012939, 16.4383561643836, 
    4.11522633744856, NA, NA, NA, NA, 4.58015267175573, NA, 21.3068181818182, 
    NA, NA, 6.96055684454756, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA), species_3= c(51.3157894736842, 83.2443970117396, 
    73.9371534195933, 71.2328767123288, 28.8065843621399, 37.5704445835942, 
    59.721300597213, 38.6349001931745, 66.6666666666667, 77.8625954198473, 
    71.608040201005, 63.9204545454545, 46.6019417475728, 22.4578914535246, 
    6.96055684454756, 13.0434782608696, 24.1206030150754, 40.6779661016949, 
    60.2316602316602, 56, 28.9156626506024, 55.6962025316456, 
    20.2360876897133, 31.413612565445, NA, 31.7250495703899), 
    species_4= c(NA, NA, 14.7874306839187, NA, 4.11522633744856, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4.63320463320463, 
    NA, NA, NA, NA, NA, NA, NA), species_5= c(7.89473684210526, 12.8068303094984, 
    7.39371534195933, 10.958904109589, NA, NA, 3.9814200398142, 
    NA, 3.7037037037037, 13.7404580152672, 11.3065326633166, 
    12.7840909090909, 3.88349514563107, 3.74298190892077, NA, 
    NA, NA, 4.06779661016949, NA, 16, 9.63855421686747, 5.06329113924051, 
    20.2360876897133, 22.4382946896036, NA, 7.93126239259749)), row.names = c(NA, 
-26L), groups = structure(list(waterbody = c("Homer", "Homer", 
"Homer", "Homer", "Homer", "Homer", "Homer", "Homer", "Homer", 
"Homer", "Homer", "Homer", "Homer", "Homer", "Homer", "Homer", 
"Homer", "Homer", "Homer", "Homer", "Homer", "Homer", "Homer", 
"Homer", "Homer", "Homer"), transect_number = c(1, 2, 3, 4, 5, 
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 
23, 24, 25, 26), .rows = structure(list(1L, 2L, 3L, 4L, 5L, 6L, 
    7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 
    19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, -26L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

Current Code:

list_new<- list()

counter<- 1
for (i in 1:100) {
  tempsample<- homer_cpue_wide[sample(1:nrow(homer_cpue_wide), 5, replace=F), ]
  
  #store results of sampling into a list 
  list_new[[length(list_new) 1]]= tempsample
  
  
  # create a box plot for each individual species CPUE estimates 
  x[i]<- boxplot(as.list(tempsample[,2:5])) 
    theme(axis.text.x = element_text(angle=45,hjust=1))

  counter<- counter 1
  print(counter)
}  

Running this I do get a boxplot with the correct information (apart from the theme), but it only includes values from the one singular grab of 5 sample sites (here that is "tempsample"), as opposed to the 100 grabs. Additionally, the [,2:5] condition for the boxplot is to exclude other variables such as sample site- which works, but I'm assuming there is a better way to do this.

Any help would be greatly appreciated. Thanks in advance

CodePudding user response:

I'm not sure if this is what you are asking for, but here is my take.

list_new<- list()

counter<- 0
for (i in 1:100) {
  tempsample<- homer_cpue_wide[sample(1:nrow(homer_cpue_wide), 5, replace=F), ]

  #store results of sampling into a list 
  list_new[[i]]= tempsample

  counter<- counter 1
  print(counter)
} 

you only need the for loop to select the 5 sample sites 100 times. Outside the loop you can rbind the results of the 100 iterations and then plot it (I used tidyverse and ggplot2 for it)

list_new <- do.call(rbind, list_new)

library(tidyverse)
list_new <- pivot_longer(list_new, cols = 2:6, names_to = "species", values_to = 
"value")

library(ggplot2)
ggplot(list_new, aes(x=species, y = value)) 
  geom_boxplot() 
  geom_jitter(alpha = 0.5, width = 0.2)

This is the result I get:

enter image description here

  • Related