I have the following dataset
structure(list(imaging_date = structure(c(19010, 19010, 19024,
19024, 19010, 19024, 19010, 19024, 19010, 19024, 19010, 19024,
19010, 19010, 19024, 19010, 19010, 19010, 19024, 19024, 19024,
19010, 19010, 19010, 19024, 19024, 19010, 19010, 19010, 19010,
19010, 19010, 19010, 19010, 19024, 19024), class = "Date"), diameter_on_mask = c(960L,
960L, 960L, 960L, 480L, 480L, 480L, 480L, 480L, 480L, 480L, 480L,
480L, 480L, 480L, 960L, 960L, 960L, 960L, 960L, 960L, 960L, 960L,
960L, 960L, 960L, 480L, 480L, 480L, 480L, 480L, 480L, 480L, 480L,
480L, 480L), diameter_measured = c(1020L, 1040L, 1210L, 1120L,
532L, 626L, 541L, 595L, 519L, 602L, 515L, 638L, 519L, 518L, 593L,
1030L, 989L, 999L, 1120L, 1140L, 1220L, 1000L, 1010L, 1010L,
1370L, 1290L, 519L, 511L, 505L, 522L, 502L, 501L, 536L, 532L,
686L, 754L), slideno_cno_region_id = c("3_1_1", "3_1_2", "3_1_1",
"3_1_2", "3_1_3", "3_1_3", "3_1_4", "3_1_4", "3_1_5", "3_1_5",
"3_1_6", "3_1_6", "", "", "", "", "", "", "", "", "", "3_1_7",
"", "", "3_1_7", "", "", "", "", "", "", "", "", "", "", ""),
region = c("inlet", "inlet", "inlet", "inlet", "inlet", "inlet",
"inlet", "inlet", "inlet", "inlet", "inlet", "inlet", "inlet",
"inlet", "inlet", "middle", "middle", "middle", "middle",
"middle", "middle", "outlet", "outlet", "outlet", "outlet",
"outlet", "outlet", "outlet", "outlet", "outlet", "outlet",
"outlet", "outlet", "outlet", "outlet", "outlet"), norm_diameter = c(1.0625,
1.08333333333333, 1.26041666666667, 1.16666666666667, 1.10833333333333,
1.30416666666667, 1.12708333333333, 1.23958333333333, 1.08125,
1.25416666666667, 1.07291666666667, 1.32916666666667, 1.08125,
1.07916666666667, 1.23541666666667, 1.07291666666667, 1.03020833333333,
1.040625, 1.16666666666667, 1.1875, 1.27083333333333, 1.04166666666667,
1.05208333333333, 1.05208333333333, 1.42708333333333, 1.34375,
1.08125, 1.06458333333333, 1.05208333333333, 1.0875, 1.04583333333333,
1.04375, 1.11666666666667, 1.10833333333333, 1.42916666666667,
1.57083333333333)), row.names = c(NA, -36L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x7fc0c100d2e0>, index = structure(integer(0), "`__imaging_date`" = c(1L,
2L, 5L, 7L, 9L, 11L, 13L, 14L, 16L, 17L, 18L, 22L, 23L, 24L,
27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 3L, 4L, 6L, 8L, 10L,
12L, 15L, 19L, 20L, 21L, 25L, 26L, 35L, 36L)))
I would like to group the data points according to .(imaging_date, region)
and plot the individual data points along with the boxplots, similar to
ggplot(dt, aes(x=region, y=norm_diameter, group = interaction(imaging_date,region)))
geom_point(aes(colour = factor(imaging_date)))
geom_boxplot(aes(colour = factor(imaging_date), fill = after_scale(alpha(colour, 0.4))))
However, as you can see above,
- not all data points align vertically (some shift to right and left),
- The boxplot are placed next to the data points, whereas i want them to overlapp
What am i doing wrong in the above code?
CodePudding user response:
You need to use the position
argument. I've defined a slightly different version for you, that does what you need.
ggplot(dt, aes(x=region, y=norm_diameter, group=interaction(imaging_date,region)))
geom_boxplot(position=position_dodge(1), aes(colour = factor(imaging_date), fill = after_scale(alpha(colour, 0.4))))
geom_dotplot(binaxis='y', stackdir='center',
position=position_dodge(1), dotsize=0.4, aes(colour = factor(imaging_date), fill = after_scale(alpha(colour, 0.4))))
I've basically used the same aes(...)
for the geom_dotplot
. Otherwise, note the stackdir='center'
and position=position_dodge(1)
, which are available in the geom_dotplot
function.
Reference used: http://www.sthda.com/english/wiki/ggplot2-box-plot-quick-start-guide-r-software-and-data-visualization