Home > front end >  Combine scatter, boxplot and linear regression line on one chart ggplot R
Combine scatter, boxplot and linear regression line on one chart ggplot R

Time:10-02

I want to place a Boxplot, scatter plot and linear regression line for the scatter points onto one chart using GGplot. I am able to get 2 of the three onto one chart but have trouble combing regression with a boxplot.

A sample of my data below

df <- structure(list(Sample = c(2113, 2113, 2114, 2114, 2115, 2115, 
2116, 2116, 2117, 2117, 2118, 2118, 2119, 2119, 2120, 2120, 2121, 
2121, 2122, 2122, 2123, 2123, 2124, 2124), Rep_No = c("A", "B", 
"A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", 
"B", "A", "B", "A", "B", "A", "B", "A", "B"), Fe = c(57.24, 57.12, 
57.2, 57.13, 57.21, 57.14, 57.16, 57.31, 57.11, 57.18, 57.21, 
57.12, 57.14, 57.17, 57.1, 57.18, 57, 57.06, 57.13, 57.09, 57.17, 
57.23, 57.09, 57.1), SiO2 = c("6.85", "6.83", "6.7", "6.69", 
"6.83", "6.8", "6.76", "6.79", "6.82", "6.82", "6.8", "6.86", 
"6.9", "6.82", "6.81", "6.83", "6.79", "6.76", "6.8", "6.88", 
"6.83", "6.79", "6.8", "6.83"), Al2O3 = c("2.9", "2.88", "2.88", 
"2.88", "2.92", "2.9", "2.89", "2.87", "2.9", "2.89", "2.9", 
"2.89", "2.89", "2.88", "2.89", "2.91", "2.91", "2.91", "2.9", 
"2.9", "2.91", "2.91", "2.88", "2.86")), row.names = c(NA, -24L
), class = "data.frame")

My code thus far

x <- df$Sample                                                                                                                                                                                                                   
y <- df$Fe

lm_eqn <- function(df,...){
  m <- lm(y ~ x, df);
  eq <- substitute(italic(y) == a   b %.% italic(x)*","~~italic(r)^2~"="~r2, 
                   list(a = format(unname(coef(m)[1]), digits = 2),
                        b = format(unname(coef(m)[2]), digits = 2),
                        r2 = format(summary(m)$r.squared, digits = 3)))
  as.character(as.expression(eq));
}

a <- lm_eqn(df)


p <- df %>%
  mutate(Sample = factor(Sample)) %>%
  ggplot() 
  geom_boxplot(mapping = aes(x = "All Data", y = Fe)) 
  geom_point(mapping = aes(x = Sample, y = Fe, color = Sample)) 
  ggtitle("Lab Test Order Fe")  
  theme(plot.title = element_text(hjust = 0.5))  
  theme(legend.position = "none") 
  xlab(label = "Sample No")  
  ylab("Homogeneity Test Fe %")
p

enter image description here

and my code to get linear trend line

p2 <- df %>% 
  ggplot(aes(Sample, y = Fe)) 
  geom_point(mapping = aes(x = Sample, y = Fe)) 
  geom_smooth(method = lm, se = FALSE) 
  theme(legend.position = "None") 
  geom_text(x = 2115, y = 57.05, check_overlap = T, label = a, parse = TRUE)

p2

enter image description here

How can I get all three onto the same chart. I would also like to put the boxplot first, maintain the colours for the points as well as have the text for the regression line placed in the optimal position rather than setting the coordinates for placement.

Any help appreciated.

CodePudding user response:

I suggest two options. First, With the help of scales and ggpmisc packages, to get everything into a single plot/frame. This is what you asked, literally. Then, with the help of patchwork, to get two aligned plots. One with the boxplot, another with the scatter regression curve.

Option 1. All bundled together.

library(tidyverse)
library(scales)  # To get nice looking x-axis breaks
library(ggpmisc) # To help with optimal position for the regression formula

  ggplot(data = df, aes(x = Sample, y = Fe)) 
    geom_point(mapping = aes(x = Sample, y = Fe, color = as.factor(Sample)))  
    stat_poly_eq(formula = y ~x , mapping = aes( label = a), parse = TRUE, method = "lm", hjust = -0.35 )  
    geom_smooth(method = lm, se = FALSE)  
    geom_boxplot(mapping = aes(x = min(Sample) - 1, y = Fe))    
    theme(legend.position = "None")  
    labs(title = "Lab Test Order Fe", x = "Sample No", y = "Homogeneity Test Fe %")  
    scale_x_continuous(labels = c("All Data", as.integer(df$Sample)),
                       breaks = c(min(df$Sample)-1, df$Sample))

enter image description here

Option 2. Assembled plot through patchwork.

library(tidyverse)
library(scales)    # To get nice looking x-axis breaks
library(ggpmisc)   # To help with optimal position for the regression formula
library(patchwork) # To assemble a composite plot

p_boxplot <- 
  ggplot(data = df, aes(x = Sample, y = Fe)) 
  geom_boxplot(data = df, mapping = aes(x = "All Data", y = Fe))  
  labs(subtitle = "Box Plot", 
       x = "", 
       y = "Homogeneity Test Fe %")

p_scatter <- 
  ggplot(data = df, aes(x = Sample, y = Fe)) 
  geom_point(mapping = aes(x = Sample, y = Fe, color = as.factor(Sample)))  
  stat_poly_eq(formula = y ~x , mapping = aes( label = a), parse = TRUE, method = "lm", )  
  geom_smooth(method = lm, se = FALSE)  
  theme(legend.position = "None")  
  labs(subtitle = "Scatter Plot", 
       x = "Sample No", y = "")  
  scale_x_continuous(labels = as.integer(df$Sample),
                     breaks = df$Sample)


p_boxplot   p_scatter   
  plot_layout(widths = c(1,5))   
  plot_annotation(title = "Lab Test Order Fe")

enter image description here

  • Related