Home > Enterprise >  Running consecutive runs of ggplot for the same data and getting different results
Running consecutive runs of ggplot for the same data and getting different results

Time:08-04

I have a wrote a simple get_random_data_for_box_plot() function,

I have used ggplot twice and passed the same data, however I get different points. how come? generated images are attached.

export_image_to_file_now <- function(file_path, iter){
      file_name <-  paste0(file_path,iter, ".jpg")
      print(paste0("file_name: ", file_name))
      ggsave(file_name)
    }
    
    get_random_string <-function(num_words = 5, max_num_characters = 6 ){
      string_total <- ''
      
      for (i in seq(1,num_words)) {
        word_len <- runif(n = 1,min = 2,max = max_num_characters )
        string_total <- paste(string_total, stri_rand_strings(n = 1, length = word_len))
      }
      return (string_total)
    }
    
    get_random_data_for_box_plot <-function(){
      num_boxes <- round(runif(n = 1, min = 2 ,max = 9))
      names_boxes <- c()
      values_boxes <- c()
      mean_vec <- runif(n = num_boxes, min = -50 ,max = 150)
      sd_vec <- c()
      
      for (idx in seq(1,num_boxes)) {
        sd <- runif(n = 1, min = 0 ,max = abs(round(mean_vec[idx]/2)))  
        sd_vec <- c(sd_vec, sd)
      }
      
      for (idx in seq(1, num_boxes)) {
        box_name <- rep(get_random_string(num_words = 1))
        num_points_per_box <- round(runif(n = 1, min = 5 ,max = 7 ))
          
        names_boxes <- c(names_boxes, rep(box_name,num_points_per_box))
        value_per_box <- rnorm(n = num_points_per_box,
                               mean = mean_vec[idx],
                               sd = sd_vec[idx])
        values_boxes <- c(values_boxes,value_per_box)
      }
      
      data_boxplot <- data.frame(name = names_boxes, 
                                 value = values_boxes)
      return (data_boxplot)
    }
    
    
    data = get_random_data_for_box_plot()
    
    data %>%
      ggplot(aes(x=name, y=value, fill=name))  
      geom_boxplot()  
      geom_jitter(color="black", size=0.4, alpha=0.9)  
      theme_ipsum()  
      ggtitle("just a title")
    
    export_image_to_file_now(paste0(getwd(),"/"), "example1")
    
    data %>%
      ggplot(aes(x=name, y=value, fill=name))  
      geom_boxplot()  
      geom_jitter(color="black", size=0.4, alpha=0.9)  
      theme_ipsum()  
      ggtitle("just a title")
    
    export_image_to_file_now(paste0(getwd(),"/"), "example2")

image1 image2

CodePudding user response:

Indeed geom_jitter uses random sampling, therefore I had to use set.seed(thank you Roland)

CodePudding user response:

You are taking multiple random draws with geom_jitter() and runif() and the random number generator will (almost certainly) will produce different random values every time you run the code. You can stop this by using set.seed() with an arbitrary seed number in the parentheses. This will produce the same values every time.

  • Related