Home > OS >  Automatic break after n-dots in geom_dotplot-function in ggplot in R
Automatic break after n-dots in geom_dotplot-function in ggplot in R

Time:01-23

I have got a problem in managing multiple observations in the geom_dotplot-function due to overlap between different groups:

v1 <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2)
v2 <- c(0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2)
v3 <- c(13,67,89,280,40,1,23,99,32,1,75,280,270,200,196,300,320,277,23,4,1,2,5,89,45,23,11,1,3,23,100,100,100,100,100,200,100,11,6,6,123,100,100,100,100,100,12,86,11,300,75,100,110,19,299,100,100,100,100,100,100,100,100,11,100,120,110,100,100,300,300,250,100,100,100,12,100,100,75,5,10,10,10,10,10)

summary <- data.frame(v1, v2, v3)

summary$v1 <- as.factor(summary$v1)
summary$v2 <- as.factor(summary$v2)

ggplot(summary, aes(x = v1, y = v3, fill = v2))   
geom_boxplot(width = 0.5, position = position_dodge(0.75))   geom_dotplot(
  binaxis  = "y",
  stackdir = "center",
  binwidth = 3.25,
  position = position_dodge(0.75)
)

Example as image

I thought about manually changing the data with the aim to have only up to 5 observations with the same values (like v3 <- (... 100, 100, 100, 100, 100, 110, 110, 110, 110, 110, 120, 120, 120, 120, 120, 130, ...)). However, it also affects the results for the boxplots (median, interquartile range).

I could not find any option for an automatically break after 5 dots to have no overlap. Maybe, there is a simple and clever solution. All your help is appreciated. Thank you in advance!

CodePudding user response:

You can make a smaller dataset that just contains the five observations per group (summary2 below). You can use the original data to make the boxes and the smaller data to make the points.

library(dplyr)
library(ggplot2)  
v1 <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2)
v2 <- c(0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2)
v3 <- c(13,67,89,280,40,1,23,99,32,1,75,280,270,200,196,300,320,277,23,4,1,2,5,89,45,23,11,1,3,23,100,100,100,100,100,200,100,11,6,6,123,100,100,100,100,100,12,86,11,300,75,100,110,19,299,100,100,100,100,100,100,100,100,11,100,120,110,100,100,300,300,250,100,100,100,12,100,100,75,5,10,10,10,10,10)

summary <- data.frame(v1, v2, v3)

summary$v1 <- as.factor(summary$v1)
summary$v2 <- as.factor(summary$v2)
summary2 <- summary %>% 
  group_by(v1, v2, v3) %>% 
  filter(1:n() <= 5)



ggplot()   
  geom_boxplot(data = summary, aes(x = v1, y = v3, fill = v2), width = 0.5, position = position_dodge(0.75))   
  geom_dotplot(data = summary2, aes(x = v1, y = v3, fill = v2), binaxis  = "y", stackdir = "center", binwidth = 3.25, 
               position = position_dodge(0.75))

Created on 2023-01-22 by the enter image description here

Another option to use geom_beeswarm rather than geom_dotplot using the same approach with the data:

library(ggbeeswarm)

ggplot(summary, aes(x = v1, y = v3, fill = v2))   
  geom_boxplot(width = 0.5, position = position_dodge(0.75))   
  geom_beeswarm(data =   . %>% group_by_all() %>%
                  mutate(v3 = if(n() > 6) v3   runif(n(), -5, 5) else v3),
                shape = 21, dodge.width = 0.75, priority = 'density')

enter image description here

  • Related