Home > database >  Box plot representing the wrong aesthetics
Box plot representing the wrong aesthetics

Time:02-12

I have the following data

structure(list(imaging_date = structure(c(19010, 19010, 19024, 
19024, 19010, 19024, 19010, 19024, 19010, 19024, 19010, 19024, 
19010, 19010, 19024, 19010, 19010, 19010, 19024, 19024, 19024, 
19010, 19010, 19010, 19024, 19024, 19010, 19010, 19010, 19010, 
19010, 19010, 19010, 19010, 19024, 19024), class = "Date"), diameter_on_mask = c(960L, 
960L, 960L, 960L, 480L, 480L, 480L, 480L, 480L, 480L, 480L, 480L, 
480L, 480L, 480L, 960L, 960L, 960L, 960L, 960L, 960L, 960L, 960L, 
960L, 960L, 960L, 480L, 480L, 480L, 480L, 480L, 480L, 480L, 480L, 
480L, 480L), diameter_measured = c(1020L, 1040L, 1210L, 1120L, 
532L, 626L, 541L, 595L, 519L, 602L, 515L, 638L, 519L, 518L, 593L, 
1030L, 989L, 999L, 1120L, 1140L, 1220L, 1000L, 1010L, 1010L, 
1370L, 1290L, 519L, 511L, 505L, 522L, 502L, 501L, 536L, 532L, 
686L, 754L), slideno_cno_region_id = c("3_1_1", "3_1_2", "3_1_1", 
"3_1_2", "3_1_3", "3_1_3", "3_1_4", "3_1_4", "3_1_5", "3_1_5", 
"3_1_6", "3_1_6", "", "", "", "", "", "", "", "", "", "3_1_7", 
"", "", "3_1_7", "", "", "", "", "", "", "", "", "", "", ""), 
    region = c("inlet", "inlet", "inlet", "inlet", "inlet", "inlet", 
    "inlet", "inlet", "inlet", "inlet", "inlet", "inlet", "inlet", 
    "inlet", "inlet", "middle", "middle", "middle", "middle", 
    "middle", "middle", "outlet", "outlet", "outlet", "outlet", 
    "outlet", "outlet", "outlet", "outlet", "outlet", "outlet", 
    "outlet", "outlet", "outlet", "outlet", "outlet"), norm_diameter = c(1.0625, 
    1.08333333333333, 1.26041666666667, 1.16666666666667, 1.10833333333333, 
    1.30416666666667, 1.12708333333333, 1.23958333333333, 1.08125, 
    1.25416666666667, 1.07291666666667, 1.32916666666667, 1.08125, 
    1.07916666666667, 1.23541666666667, 1.07291666666667, 1.03020833333333, 
    1.040625, 1.16666666666667, 1.1875, 1.27083333333333, 1.04166666666667, 
    1.05208333333333, 1.05208333333333, 1.42708333333333, 1.34375, 
    1.08125, 1.06458333333333, 1.05208333333333, 1.0875, 1.04583333333333, 
    1.04375, 1.11666666666667, 1.10833333333333, 1.42916666666667, 
    1.57083333333333)), row.names = c(NA, -36L), class = c("data.table", 
"data.frame"), .internal.selfref = <pointer: 0x7fc0c100d2e0>, index = structure(integer(0), "`__imaging_date`" = c(1L, 
2L, 5L, 7L, 9L, 11L, 13L, 14L, 16L, 17L, 18L, 22L, 23L, 24L, 
27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 3L, 4L, 6L, 8L, 10L, 
12L, 15L, 19L, 20L, 21L, 25L, 26L, 35L, 36L)))

I want to plot tranparent boxplots under the actual data points. I did

ggplot(dt, aes(x=imaging_date, y=norm_diameter, colour = factor(region))) 
  geom_point() 
  geom_boxplot(aes(fill = after_scale(alpha(colour, 0.4))))

and got

enter image description here

First of all, I have data belonging to only 2 different days. Second of all, the location of the boxplots belong to neither of those dates. If I group the data inside with imaging_date, these problems disappear, but then they lose their colors.

enter image description here

What is wrong with my data/code?

CodePudding user response:

If you want box-and-whiskers plots by regions, then that should be the x variable's value. And add position = position_dodge to the points layer.

Also, you don't need to explicitly coerce to factor when the variable (region) is of class character.

ggplot(dt, aes(x=region, y=norm_diameter, group = region, colour = region)) 
  geom_point(position = position_dodge(width = 0.75))  
  geom_boxplot(aes(fill = after_scale(alpha(colour, 0.4))))

enter image description here

CodePudding user response:

You need the color and fill to be assigned according to imaging_date

ggplot(dt, aes(x=imaging_date, y=norm_diameter, colour = factor(imaging_date))) 
  geom_point() 
  geom_boxplot(aes(fill = after_scale(alpha(colour, 0.4))))

enter image description here

Or, with some aesthetics tweaks:

library(ggnewscale)

ggplot(dt, aes(x=imaging_date, y=norm_diameter, colour = factor(imaging_date)))  
  geom_boxplot(aes(fill = factor(imaging_date)), alpha = 0.4)  
  scale_color_manual(values = c("orange", "deepskyblue4"), name = "Date")  
  scale_fill_manual(values = c("orange", "deepskyblue4"), name = "Date")  
  new_scale_colour()  
  geom_point(aes(x = imaging_date, y = norm_diameter, colour = region),
             position = position_jitter(width = 1), size = 2, alpha = 0.6,
             shape = 21, fill = "white")  
  scale_color_manual(values = c("red4", "blue4", "green4"))  
  theme_bw()  
  labs(x = "Date", y = "Diameter")

enter image description here

  • Related