I want to place a Boxplot, scatter plot and linear regression line for the scatter points onto one chart using GGplot. I am able to get 2 of the three onto one chart but have trouble combing regression with a boxplot.
A sample of my data below
df <- structure(list(Sample = c(2113, 2113, 2114, 2114, 2115, 2115,
2116, 2116, 2117, 2117, 2118, 2118, 2119, 2119, 2120, 2120, 2121,
2121, 2122, 2122, 2123, 2123, 2124, 2124), Rep_No = c("A", "B",
"A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A",
"B", "A", "B", "A", "B", "A", "B", "A", "B"), Fe = c(57.24, 57.12,
57.2, 57.13, 57.21, 57.14, 57.16, 57.31, 57.11, 57.18, 57.21,
57.12, 57.14, 57.17, 57.1, 57.18, 57, 57.06, 57.13, 57.09, 57.17,
57.23, 57.09, 57.1), SiO2 = c("6.85", "6.83", "6.7", "6.69",
"6.83", "6.8", "6.76", "6.79", "6.82", "6.82", "6.8", "6.86",
"6.9", "6.82", "6.81", "6.83", "6.79", "6.76", "6.8", "6.88",
"6.83", "6.79", "6.8", "6.83"), Al2O3 = c("2.9", "2.88", "2.88",
"2.88", "2.92", "2.9", "2.89", "2.87", "2.9", "2.89", "2.9",
"2.89", "2.89", "2.88", "2.89", "2.91", "2.91", "2.91", "2.9",
"2.9", "2.91", "2.91", "2.88", "2.86")), row.names = c(NA, -24L
), class = "data.frame")
My code thus far
x <- df$Sample
y <- df$Fe
lm_eqn <- function(df,...){
m <- lm(y ~ x, df);
eq <- substitute(italic(y) == a b %.% italic(x)*","~~italic(r)^2~"="~r2,
list(a = format(unname(coef(m)[1]), digits = 2),
b = format(unname(coef(m)[2]), digits = 2),
r2 = format(summary(m)$r.squared, digits = 3)))
as.character(as.expression(eq));
}
a <- lm_eqn(df)
p <- df %>%
mutate(Sample = factor(Sample)) %>%
ggplot()
geom_boxplot(mapping = aes(x = "All Data", y = Fe))
geom_point(mapping = aes(x = Sample, y = Fe, color = Sample))
ggtitle("Lab Test Order Fe")
theme(plot.title = element_text(hjust = 0.5))
theme(legend.position = "none")
xlab(label = "Sample No")
ylab("Homogeneity Test Fe %")
p
and my code to get linear trend line
p2 <- df %>%
ggplot(aes(Sample, y = Fe))
geom_point(mapping = aes(x = Sample, y = Fe))
geom_smooth(method = lm, se = FALSE)
theme(legend.position = "None")
geom_text(x = 2115, y = 57.05, check_overlap = T, label = a, parse = TRUE)
p2
How can I get all three onto the same chart. I would also like to put the boxplot first, maintain the colours for the points as well as have the text for the regression line placed in the optimal position rather than setting the coordinates for placement.
Any help appreciated.
CodePudding user response:
I suggest two options. First, With the help of scales
and ggpmisc
packages, to get everything into a single plot/frame. This is what you asked, literally.
Then, with the help of patchwork
, to get two aligned plots. One with the boxplot, another with the scatter regression curve.
Option 1. All bundled together.
library(tidyverse)
library(scales) # To get nice looking x-axis breaks
library(ggpmisc) # To help with optimal position for the regression formula
ggplot(data = df, aes(x = Sample, y = Fe))
geom_point(mapping = aes(x = Sample, y = Fe, color = as.factor(Sample)))
stat_poly_eq(formula = y ~x , mapping = aes( label = a), parse = TRUE, method = "lm", hjust = -0.35 )
geom_smooth(method = lm, se = FALSE)
geom_boxplot(mapping = aes(x = min(Sample) - 1, y = Fe))
theme(legend.position = "None")
labs(title = "Lab Test Order Fe", x = "Sample No", y = "Homogeneity Test Fe %")
scale_x_continuous(labels = c("All Data", as.integer(df$Sample)),
breaks = c(min(df$Sample)-1, df$Sample))
Option 2. Assembled plot through patchwork
.
library(tidyverse)
library(scales) # To get nice looking x-axis breaks
library(ggpmisc) # To help with optimal position for the regression formula
library(patchwork) # To assemble a composite plot
p_boxplot <-
ggplot(data = df, aes(x = Sample, y = Fe))
geom_boxplot(data = df, mapping = aes(x = "All Data", y = Fe))
labs(subtitle = "Box Plot",
x = "",
y = "Homogeneity Test Fe %")
p_scatter <-
ggplot(data = df, aes(x = Sample, y = Fe))
geom_point(mapping = aes(x = Sample, y = Fe, color = as.factor(Sample)))
stat_poly_eq(formula = y ~x , mapping = aes( label = a), parse = TRUE, method = "lm", )
geom_smooth(method = lm, se = FALSE)
theme(legend.position = "None")
labs(subtitle = "Scatter Plot",
x = "Sample No", y = "")
scale_x_continuous(labels = as.integer(df$Sample),
breaks = df$Sample)
p_boxplot p_scatter
plot_layout(widths = c(1,5))
plot_annotation(title = "Lab Test Order Fe")