I have a wrote a simple get_random_data_for_box_plot()
function,
I have used ggplot twice and passed the same data, however I get different points. how come? generated images are attached.
export_image_to_file_now <- function(file_path, iter){
file_name <- paste0(file_path,iter, ".jpg")
print(paste0("file_name: ", file_name))
ggsave(file_name)
}
get_random_string <-function(num_words = 5, max_num_characters = 6 ){
string_total <- ''
for (i in seq(1,num_words)) {
word_len <- runif(n = 1,min = 2,max = max_num_characters )
string_total <- paste(string_total, stri_rand_strings(n = 1, length = word_len))
}
return (string_total)
}
get_random_data_for_box_plot <-function(){
num_boxes <- round(runif(n = 1, min = 2 ,max = 9))
names_boxes <- c()
values_boxes <- c()
mean_vec <- runif(n = num_boxes, min = -50 ,max = 150)
sd_vec <- c()
for (idx in seq(1,num_boxes)) {
sd <- runif(n = 1, min = 0 ,max = abs(round(mean_vec[idx]/2)))
sd_vec <- c(sd_vec, sd)
}
for (idx in seq(1, num_boxes)) {
box_name <- rep(get_random_string(num_words = 1))
num_points_per_box <- round(runif(n = 1, min = 5 ,max = 7 ))
names_boxes <- c(names_boxes, rep(box_name,num_points_per_box))
value_per_box <- rnorm(n = num_points_per_box,
mean = mean_vec[idx],
sd = sd_vec[idx])
values_boxes <- c(values_boxes,value_per_box)
}
data_boxplot <- data.frame(name = names_boxes,
value = values_boxes)
return (data_boxplot)
}
data = get_random_data_for_box_plot()
data %>%
ggplot(aes(x=name, y=value, fill=name))
geom_boxplot()
geom_jitter(color="black", size=0.4, alpha=0.9)
theme_ipsum()
ggtitle("just a title")
export_image_to_file_now(paste0(getwd(),"/"), "example1")
data %>%
ggplot(aes(x=name, y=value, fill=name))
geom_boxplot()
geom_jitter(color="black", size=0.4, alpha=0.9)
theme_ipsum()
ggtitle("just a title")
export_image_to_file_now(paste0(getwd(),"/"), "example2")
CodePudding user response:
Indeed geom_jitter
uses random sampling, therefore I had to use set.seed
(thank you Roland)
CodePudding user response:
You are taking multiple random draws with geom_jitter()
and runif()
and the random number generator will (almost certainly) will produce different random values every time you run the code. You can stop this by using set.seed()
with an arbitrary seed number in the parentheses. This will produce the same values every time.