Home > Back-end >  Multiple boxplots from a list of dataframes R
Multiple boxplots from a list of dataframes R

Time:09-22

I have a dataframe and I want to make a boxplot of each column except for the first 2. However, they will be used for other purposes.

My df below

df <- structure(list(SAMPLE_NO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 
5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 
8L, 8L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L), .Label = c("137380", 
"137796", "137926", "138180", "138217", "138233", "138569", "138978", 
"139128", "139137"), class = "factor"), Repeat_No = c(1L, 2L, 
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L
), Fe = c(59.78, 59.59, 59.76, 59.49, 59.72, 59.74, 59.52, 59.85, 
59.73, 59.61, 59.71, 59.76, 59.73, 59.56, 59.82, 59.71, 59.6, 
59.86, 59.78, 59.84, 59.49, 59.75, 59.71, 59.73, 59.51, 59.75, 
59.77, 59.83, 59.55, 59.6, 59.68, 59.57, 59.73, 59.76, 59.58, 
59.48, 59.79, 59.73, 59.64, 59.85, 59.75, 59.68, 59.63, 59.73, 
59.78, 59.59, 59.69, 59.79, 59.69, 59.7), SiO2 = c(5.1, 5.14, 
5.11, 5.14, 5.14, 5.12, 5.13, 5.11, 5.09, 5.09, 5.12, 5.15, 5.13, 
5.11, 5.14, 5.11, 5.1, 5.12, 5.1, 5.11, 5.12, 5.15, 5.16, 5.11, 
5.12, 5.17, 5.11, 5.15, 5.12, 5.15, 5.12, 5.17, 5.12, 5.13, 5.12, 
5.12, 5.1, 5.1, 5.14, 5.1, 5.11, 5.1, 5.14, 5.15, 5.07, 5.1, 
5.1, 5.11, 5.12, 5.11), Al2O3 = c(2.08, 2.09, 2.09, 2.1, 2.06, 
2.08, 2.07, 2.09, 2.06, 2.1, 2.09, 2.11, 2.07, 2.07, 2.09, 2.06, 
2.07, 2.09, 2.1, 2.08, 2.08, 2.09, 2.08, 2.07, 2.07, 2.07, 2.1, 
2.07, 2.07, 2.1, 2.07, 2.11, 2.09, 2.07, 2.11, 2.06, 2.09, 2.09, 
2.08, 2.05, 2.08, 2.07, 2.08, 2.1, 2.08, 2.08, 2.1, 2.05, 2.07, 
2.08), TiO2 = c(0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 
0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 
0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 
0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 
0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14)), row.names = c(NA, 
-50L), class = "data.frame")

I also have a list of dataframes with some values that I want to use to draw lines on each boxplots, namely the mean and sd.

my list of dataframes

df2 <-list(Fe = structure(c(59.6938, 0.103625897007515), .Dim = 1:2, .Dimnames = list(
    NULL, c("hm", "hsd"))), SiO2 = structure(c(5.121, 0.0214998813475606
), .Dim = 1:2, .Dimnames = list(NULL, c("hm", "hsd"))), Al2O3 = structure(c(2.0812, 
0.015069905136275), .Dim = 1:2, .Dimnames = list(NULL, c("hm", 
"hsd"))), TiO2 = structure(c(0.14, 0), .Dim = 1:2, .Dimnames = list(
    NULL, c("hm", "hsd"))))

and my code to make the box plots

plotorder <- sort(unique(df2$SAMPLE_NO))

imap(df2, ~{
  ggplot(df, outlier.shape = NA, 
         mapping = aes(x = SAMPLE_NO, y = .data[[.y]], color = SAMPLE_NO))  
    stat_boxplot(geom = 'errorbar') 
    coord_cartesian(ylim = as.numeric(c(min(.y),max(.y)))) 
    geom_boxplot(outlier.shape = NA)  
    geom_hline(linetype = 'dashed', color = 'blue', size = 0.75,
               mapping = aes(yintercept = as.numeric(.x[[1]])))   #mean
    geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
               mapping = aes(yintercept = as.numeric(.x[[1]]) #mean 
                             - (as.numeric(.x[[2]])) * 3))   # subtract SD
    geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
               mapping = aes(yintercept = as.numeric(.x[[2]]) #mean value 
                               (as.numeric(.x[[2]])) * 3))   # Add SD
    ggtitle(paste0(.y, " Homogeniety Box Plot"))  
    theme(plot.title = element_text(hjust = 0.5))  
    xlab(label = "Sample No")  
    ylab(paste0(.y, ' values %'))
}) -> list_plot_box
list_plot_box

[![enter image description here][1]][1]


which produces produces a boxplot for three of the columns but not the first one and the lines are located in the wrong lcoation for each chart. things are being mapped incorrectly to get the values but not sure how to fix it.

CodePudding user response:

The c(min(.y), max(.y)) is incorrect as the .y refers to the names of the 'df2' i.e. "Fe", ...

imap(df2, ~{
  ggplot(df, outlier.shape = NA, 
         mapping = aes(x = SAMPLE_NO, y = .data[[.y]], color = SAMPLE_NO))  
    stat_boxplot(geom = 'errorbar') 
    coord_cartesian(ylim = as.numeric(c(min(df[[.y]]),max(df[[.y]])))) 
    geom_boxplot(outlier.shape = NA)  
    geom_hline(linetype = 'dashed', color = 'blue', size = 0.75,
               mapping = aes(yintercept = as.numeric(.x[[1]])))   #mean
    geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
               mapping = aes(yintercept = as.numeric(.x[[1]]) #mean 
                             - (as.numeric(.x[[2]])) * 3))   # subtract SD
    geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
               mapping = aes(yintercept = as.numeric(.x[[1]]) #mean value 
                               (as.numeric(.x[[2]])) * 3))   # Add SD
    ggtitle(paste0(.y, " Homogeniety Box Plot"))  
    theme(plot.title = element_text(hjust = 0.5))  
    xlab(label = "Sample No")  
    ylab(paste0(.y, ' values %'))
}) -> list_plot_box

-checking the output of first list element

> list_plot_box[[1]]

enter image description here


Regarding the lines not showed, it is just that the difference in the ylimit compared to the first one is so small that it was not showed. If we change by removing the * 3, it would show

imap(df2, ~{
  ggplot(df, outlier.shape = NA, 
         mapping = aes(x = SAMPLE_NO, y = .data[[.y]], color = SAMPLE_NO))  
    stat_boxplot(geom = 'errorbar') 
    coord_cartesian(ylim = as.numeric(c(min(df[[.y]]),max(df[[.y]])))) 
    geom_boxplot(outlier.shape = NA)  
    geom_hline(linetype = 'dashed', color = 'blue', size = 0.75,
               mapping = aes(yintercept = as.numeric(.x[[1]])))   #mean
    geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
               mapping = aes(yintercept = as.numeric(.x[[1]]) #mean 
                             - (as.numeric(.x[[2]])) ))   # subtract SD
    geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
               mapping = aes(yintercept = as.numeric(.x[[1]]) #mean value 
                               (as.numeric(.x[[2]])) ))   # Add SD
    ggtitle(paste0(.y, " Homogeniety Box Plot"))  
    theme(plot.title = element_text(hjust = 0.5))  
    xlab(label = "Sample No")  
    ylab(paste0(.y, ' values %'))
}) -> list_plot_box

-checking

list_plot_box[[1]]

enter image description here


We can get all the plots in single page

library(ggpubr)
ggarrange(plotlist = list_plot_box, ncol = 2, nrow = 2)

CodePudding user response:

In the end I created a dataframe with the plotting parameters I needed and then mapped them GGPLot to make each chart


df2 <- $Fe
          hm       hsd     hmin     hmax
[1,] 59.6938 0.1036259 59.38292 60.00468

$SiO2
        hm        hsd   hmin   hmax
[1,] 5.121 0.02149988 5.0565 5.1855

$Al2O3
         hm        hsd    hmin    hmax
[1,] 2.0812 0.01506991 2.03599 2.12641

$TiO2
       hm hsd hmin hmax
[1,] 0.14   0 0.14 0.14

then changed my main bit of code as


imap(df2, ~{
  ggplot(df, outlier.shape = NA, 
         mapping = aes(x = SAMPLE_NO, y = .data[[.y]], color = SAMPLE_NO))  
    stat_boxplot(geom = 'errorbar') 
    coord_cartesian(ylim = as.numeric(c(.x[3],.x[4]))) 
    geom_boxplot(outlier.shape = NA)  
    geom_hline(linetype = 'dashed', color = 'blue', size = 0.75,
               mapping = aes(yintercept = as.numeric(.x[[1]])))   #mean
    geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
               mapping = aes(yintercept = .x[4] ))   # subtract SD
    geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
               mapping = aes(yintercept = .x[3] ))   # Add SD
    ggtitle(paste0(.y, " Homogeniety Box Plot"))  
    theme(plot.title = element_text(hjust = 0.5))  
    xlab(label = "Sample No")  
    ylab(paste0(.y, ' values %'))
}) -> list_plot_box

list_plot_box[[1]]


enter image description here

  • Related