I have data of measurements at 9 locations, that I want to visualize with grouped boxplot. For each location, there are 3 groups ("combined", "No Rain", "Rain"), where "combined" ist just the data of "No Rain" and "Rain" combined.
I first created grouped boxplots using boxplot():
mydata <- read.table(file = "mydata.txt",
skip=0, head=TRUE, sep="\t", dec = ".",
stringsAsFactors=FALSE)
#Rain
boxplot(Value~Location, data=mydata, subset = Variable =="Rain", col = "deepskyblue",
boxwex = 1, outline = FALSE, at = c(10, 20, 30, 40, 50, 60, 70, 80, 90),
xlab = "Location", ylab = "Value",
cex.axis = 2, cex.lab = 2)
#combined
boxplot(Value~Location, data=mydata, subset = Variable =="combined", col = "grey",
at = c(8, 18, 28, 38, 48, 58, 68, 78, 88),boxwex = 1, add = TRUE,
outline = FALSE, names = NA, xaxt = 'n', yaxt = 'n')
#No Rain
boxplot(Value~Location, data=mydata, subset = Variable =="No Rain", col = "indianred1", add = TRUE,
boxwex = 1, at = c(12, 22, 32, 42, 52, 62, 72, 82, 92), outline = FALSE,
names = NA, xaxt = 'n', yaxt = 'n')
When I create grouped boxplots with the same data, but with ggplot2, the plot looks different, the values seem to be distributed differentl.
mydata$Location <- as.character(mydata$Location)
ggplot(mydata, aes(x = Location, y = Value, fill = Variable, na.rm = TRUE))
geom_boxplot(outlier.shape = NA, na.rm = TRUE)
scale_fill_manual(values=c("grey","red","lightblue"))
scale_y_continuous(limits = c(0, 3.7),
breaks = c(0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5))
Is there an explanation for this?
data:
structure(list(Location = c("1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "4", "4", "4",
"4", "4", "4", "4", "4", "4", "4", "5", "5", "5", "5", "6", "6",
"6", "6", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "8",
"8", "8", "8", "8", "8", "8", "8", "8", "8", "9", "9", "9", "9",
"9", "9", "9", "9", "9", "9", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "3", "3", "3", "3", "3", "3", "3",
"3", "3", "3", "3", "3", "3", "4", "4", "4", "4", "4", "4", "4",
"4", "4", "4", "4", "4", "4", "5", "5", "5", "5", "5", "5", "5",
"5", "5", "5", "5", "5", "5", "6", "6", "6", "6", "6", "6", "6",
"6", "6", "6", "6", "6", "6", "7", "7", "7", "7", "7", "7", "7",
"7", "7", "7", "7", "7", "7", "8", "8", "8", "8", "8", "8", "8",
"8", "8", "8", "8", "8", "8", "9", "9", "9", "9", "9", "9", "9",
"9", "9", "9", "9", "9", "9", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "4", "4", "4",
"4", "4", "4", "4", "4", "4", "4", "5", "5", "5", "5", "6", "6",
"6", "6", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "8",
"8", "8", "8", "8", "8", "8", "8", "8", "8", "9", "9", "9", "9",
"9", "9", "9", "9", "9", "9", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "3", "3", "3", "3", "3", "3", "3",
"3", "3", "3", "3", "3", "3", "4", "4", "4", "4", "4", "4", "4",
"4", "4", "4", "4", "4", "4", "5", "5", "5", "5", "5", "5", "5",
"5", "5", "5", "5", "5", "5", "6", "6", "6", "6", "6", "6", "6",
"6", "6", "6", "6", "6", "6", "7", "7", "7", "7", "7", "7", "7",
"7", "7", "7", "7", "7", "7", "8", "8", "8", "8", "8", "8", "8",
"8", "8", "8", "8", "8", "8", "9", "9", "9", "9", "9", "9", "9",
"9", "9", "9", "9", "9", "9"), Value = c(0.04, 0.02, 0.02, 0.01,
0, 0.02, 0.01, 0, 0.07, 0, 0, 0, 0.05, 0.01, 0.01, 0.03, 0, 0,
0.04, 0, 0.04, 0.01, 0.03, 0.05, 0.07, 0.16, 0.02, 0.04, 0.33,
0.58, 0.04, 0.03, 0.02, 0.01, 0.03, 0.08, 0.05, 0.12, 0.33, 0.05,
0, 0, 0.04, 0.05, 0.01, 0.01, 0, 0.05, 0.02, 0.01, 0.01, 0.02,
0.01, 0.09, 0.01, 0.02, 0.07, 0.25, 0.02, 0.02, 0.01, 0.03, 0.01,
0.05, 0, 0.03, 0, 0.08, 0, 0, 0, 0, 0.01, 0, 0, 0, 0.11, 0.05,
0, 2.6, 0.1, 0, 1, 0, 0.29, NA, NA, 0.29, 0.2, 0, 0, 0, 1.4,
0.14, 0, 0.3, 0.14, 0.29, NA, NA, 1, 0.52, 0, 0.02, 0.2, 2.6,
0.1, 0.25, 0.2, 0.23, 5, NA, NA, 2.14, 0.92, 0.01, 0.04, 0.09,
4.6, 0.34, 1, 1.2, 0.55, 1.71, NA, NA, 1.14, 0.48, 0.02, 0.02,
0.09, 8.6, 0.46, 0.16, 0.7, 2.36, 3.57, NA, NA, 3.14, 0.4, 0.02,
0.04, 0.03, 1.4, 0.06, 0.09, 0, 0.23, 0.71, NA, NA, 1.14, 0.28,
0, 0, 0.1, 5.4, 0.16, 0.25, 1.2, 0.82, 4, NA, NA, 1.86, 0.4,
0.01, 0.02, 0.17, 1, 0.72, 0.63, 0.5, 0.59, 2.14, NA, NA, 0.71,
0.4, 0.01, 0.02, 0.06, 3.6, 0.06, 0.63, 1.3, 0.68, 14.57, NA,
NA, 0.71, 0.12, 0, 0.01, 0.04, 0.02, 0.02, 0.01, 0, 0.02, 0.01,
0, 0.07, 0, 0, 0, 0.05, 0.01, 0.01, 0.03, 0, 0, 0.04, 0, 0.04,
0.01, 0.03, 0.05, 0.07, 0.16, 0.02, 0.04, 0.33, 0.58, 0.04, 0.03,
0.02, 0.01, 0.03, 0.08, 0.05, 0.12, 0.33, 0.05, 0, 0, 0.04, 0.05,
0.01, 0.01, 0, 0.05, 0.02, 0.01, 0.01, 0.02, 0.01, 0.09, 0.01,
0.02, 0.07, 0.25, 0.02, 0.02, 0.01, 0.03, 0.01, 0.05, 0, 0.03,
0, 0.08, 0, 0, 0, 0, 0.01, 0, 0, 0, 0.11, 0.05, 0, 2.6, 0.1,
0, 1, 0, 0.29, NA, NA, 0.29, 0.2, 0, 0, 0, 1.4, 0.14, 0, 0.3,
0.14, 0.29, NA, NA, 1, 0.52, 0, 0.02, 0.2, 2.6, 0.1, 0.25, 0.2,
0.23, 5, NA, NA, 2.14, 0.92, 0.01, 0.04, 0.09, 4.6, 0.34, 1,
1.2, 0.55, 1.71, NA, NA, 1.14, 0.48, 0.02, 0.02, 0.09, 8.6, 0.46,
0.16, 0.7, 2.36, 3.57, NA, NA, 3.14, 0.4, 0.02, 0.04, 0.03, 1.4,
0.06, 0.09, 0, 0.23, 0.71, NA, NA, 1.14, 0.28, 0, 0, 0.1, 5.4,
0.16, 0.25, 1.2, 0.82, 4, NA, NA, 1.86, 0.4, 0.01, 0.02, 0.17,
1, 0.72, 0.63, 0.5, 0.59, 2.14, NA, NA, 0.71, 0.4, 0.01, 0.02,
0.06, 3.6, 0.06, 0.63, 1.3, 0.68, 14.57, NA, NA, 0.71, 0.12,
0, 0.01), Variable = c("No Rain", "No Rain", "No Rain", "No Rain",
"No Rain", "No Rain", "No Rain", "No Rain", "No Rain", "No Rain",
"No Rain", "No Rain", "No Rain", "No Rain", "No Rain", "No Rain",
"No Rain", "No Rain", "No Rain", "No Rain", "No Rain", "No Rain",
"No Rain", "No Rain", "No Rain", "No Rain", "No Rain", "No Rain",
"No Rain", "No Rain", "No Rain", "No Rain", "No Rain", "No Rain",
"No Rain", "No Rain", "No Rain", "No Rain", "No Rain", "No Rain",
"No Rain", "No Rain", "No Rain", "No Rain", "No Rain", "No Rain",
"No Rain", "No Rain", "No Rain", "No Rain", "No Rain", "No Rain",
"No Rain", "No Rain", "No Rain", "No Rain", "No Rain", "No Rain",
"No Rain", "No Rain", "No Rain", "No Rain", "No Rain", "No Rain",
"No Rain", "No Rain", "No Rain", "No Rain", "No Rain", "No Rain",
"No Rain", "No Rain", "No Rain", "No Rain", "No Rain", "No Rain",
"No Rain", "No Rain", "Rain", "Rain", "Rain", "Rain", "Rain",
"Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain",
"Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain",
"Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain",
"Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain",
"Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain",
"Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain",
"Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain",
"Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain",
"Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain",
"Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain",
"Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain",
"Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain",
"Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain",
"Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain", "Rain",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined", "combined", "combined", "combined",
"combined", "combined", "combined")), row.names = c(NA, -390L
), class = "data.frame")
CodePudding user response:
I think your problem is caused by the use of limits
on your call to scale_y_continuous
. This appears to be filtering the data before calculating the statistics used for the box and whisker plots.
The solution is to use coord_cartesian()
. This allows ggplot
to use the whole dataframe to calculate the statistics and then "zooms" the plot to required size and location:
ggplot(d, aes(x = Location, y = Value, fill = Variable, na.rm = TRUE))
geom_boxplot(outlier.shape = NA, na.rm = TRUE)
scale_fill_manual(values=c("grey","red","lightblue"))
scale_y_continuous(breaks = c(0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5))
coord_cartesian(ylim=c(0, 3.7))
See this page for more details.