Home > Back-end >  Sample in R with a maximum condition
Sample in R with a maximum condition

Time:06-04

In order to evaluate my students in petrography, I need to create boxes of 2 different rocks and the students need to recognize them. I want to write a R script to assign rocks to each box.

This is how I did :

rocks <- c("granite","pumice","gneiss","marl","chalk")
samp <- NULL
for (i in 1:24) {
    samp <- sample(rocks,2)
    
    if (i==1) {
       box <- samp
    } else {
      box <- rbind(box,samp)
    }
}

Which works perfectly

box
[,1]      [,2]     
"granite" "gneiss" 
"marl"    "chalk"  
"granite" "gneiss" 
"pumice"  "marl"
"chalk"   "granite"
"gneiss"  "granite"
"pumice"  "chalk"  
"gneiss"  "marl"   
"gneiss"  "pumice" 
"gneiss"  "pumice" 
"pumice"  "gneiss" 
"pumice"  "marl"
"marl"    "chalk"
"granite" "gneiss" 
"granite" "chalk" 
"gneiss"  "granite"
"pumice"  "granite"
"pumice"  "chalk"
"pumice"  "granite"
"gneiss"  "pumice" 
"pumice"  "granite"
"granite" "marl" 
"marl"    "gneiss" 
"pumice"  "gneiss" 

However, I have only 3 samples for the rocks "chalk" and "marl". Therefore, the dataframe box that I generated is not feasible.

How can I get my script to only generate 3 samples of "chalk" and "marl"?

Thanks in advance,

CodePudding user response:

not the most elegant way, but this should do it:

rocks <- c("granite","pumice","gneiss","marl","chalk")
samp <- NULL
nb.chalk = 0
nb.marl = 0
for (i in 1:24) {
  # bad.sample: when 3 rocks from chalk and marl were already attributed but still sampled
  bad.sample = TRUE
  while (bad.sample){
    samp <- sample(rocks,2)
    bad.sample = (nb.chalk == 3 & 'chalk' %in% samp) | (nb.marl == 3 & 'marl'  %in% samp)
  }
  # count the numbers of chalk and marl already obtained
  if ('chalk'  %in% samp) nb.chalk = nb.chalk  1
  if ('marl'  %in% samp) nb.marl = nb.marl  1
  
  if (i==1) {
    box <- samp
  } else {
    box <- rbind(box,samp)
  }
}

CodePudding user response:

Here's a tidyverse solution without a for loop:

library(tidyverse)

rox <- bind_rows(
tibble(rox = c("marl","chalk")) %>% 
  split(.$rox) %>% 
  map_df(~.x %>% slice(rep(row_number(), 3))),

tibble(rox = c("granite","pumice","gneiss")) %>% 
  split(.$rox) %>% 
  map_df(~.x %>% slice(rep(row_number(), 10)))
)

box <- rox %>% 
  sample_n(24, replace = F) %>% 
  bind_cols(rox %>% 
              sample_n(24, replace = F)) %>% 
  rename(box_1 = 1, box_2 = 2)

table(box$box_1)
#> 
#>   chalk  gneiss granite  pumice 
#>       1       9       7       7
table(box$box_2)
#> 
#>   chalk  gneiss granite    marl  pumice 
#>       1       8       8       1       6
  • Related