I have a dataframe and I want to make a boxplot of each column except for the first 2. However, they will be used for other purposes.
My df below
df <- structure(list(SAMPLE_NO = structure(c(1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L,
8L, 8L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L), .Label = c("137380",
"137796", "137926", "138180", "138217", "138233", "138569", "138978",
"139128", "139137"), class = "factor"), Repeat_No = c(1L, 2L,
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L
), Fe = c(59.78, 59.59, 59.76, 59.49, 59.72, 59.74, 59.52, 59.85,
59.73, 59.61, 59.71, 59.76, 59.73, 59.56, 59.82, 59.71, 59.6,
59.86, 59.78, 59.84, 59.49, 59.75, 59.71, 59.73, 59.51, 59.75,
59.77, 59.83, 59.55, 59.6, 59.68, 59.57, 59.73, 59.76, 59.58,
59.48, 59.79, 59.73, 59.64, 59.85, 59.75, 59.68, 59.63, 59.73,
59.78, 59.59, 59.69, 59.79, 59.69, 59.7), SiO2 = c(5.1, 5.14,
5.11, 5.14, 5.14, 5.12, 5.13, 5.11, 5.09, 5.09, 5.12, 5.15, 5.13,
5.11, 5.14, 5.11, 5.1, 5.12, 5.1, 5.11, 5.12, 5.15, 5.16, 5.11,
5.12, 5.17, 5.11, 5.15, 5.12, 5.15, 5.12, 5.17, 5.12, 5.13, 5.12,
5.12, 5.1, 5.1, 5.14, 5.1, 5.11, 5.1, 5.14, 5.15, 5.07, 5.1,
5.1, 5.11, 5.12, 5.11), Al2O3 = c(2.08, 2.09, 2.09, 2.1, 2.06,
2.08, 2.07, 2.09, 2.06, 2.1, 2.09, 2.11, 2.07, 2.07, 2.09, 2.06,
2.07, 2.09, 2.1, 2.08, 2.08, 2.09, 2.08, 2.07, 2.07, 2.07, 2.1,
2.07, 2.07, 2.1, 2.07, 2.11, 2.09, 2.07, 2.11, 2.06, 2.09, 2.09,
2.08, 2.05, 2.08, 2.07, 2.08, 2.1, 2.08, 2.08, 2.1, 2.05, 2.07,
2.08), TiO2 = c(0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14,
0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14,
0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14,
0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14,
0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14)), row.names = c(NA,
-50L), class = "data.frame")
I also have a list of dataframes with some values that I want to use to draw lines on each boxplots, namely the mean and sd.
my list of dataframes
df2 <-list(Fe = structure(c(59.6938, 0.103625897007515), .Dim = 1:2, .Dimnames = list(
NULL, c("hm", "hsd"))), SiO2 = structure(c(5.121, 0.0214998813475606
), .Dim = 1:2, .Dimnames = list(NULL, c("hm", "hsd"))), Al2O3 = structure(c(2.0812,
0.015069905136275), .Dim = 1:2, .Dimnames = list(NULL, c("hm",
"hsd"))), TiO2 = structure(c(0.14, 0), .Dim = 1:2, .Dimnames = list(
NULL, c("hm", "hsd"))))
and my code to make the box plots
plotorder <- sort(unique(df2$SAMPLE_NO))
imap(df2, ~{
ggplot(df, outlier.shape = NA,
mapping = aes(x = SAMPLE_NO, y = .data[[.y]], color = SAMPLE_NO))
stat_boxplot(geom = 'errorbar')
coord_cartesian(ylim = as.numeric(c(min(.y),max(.y))))
geom_boxplot(outlier.shape = NA)
geom_hline(linetype = 'dashed', color = 'blue', size = 0.75,
mapping = aes(yintercept = as.numeric(.x[[1]]))) #mean
geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
mapping = aes(yintercept = as.numeric(.x[[1]]) #mean
- (as.numeric(.x[[2]])) * 3)) # subtract SD
geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
mapping = aes(yintercept = as.numeric(.x[[2]]) #mean value
(as.numeric(.x[[2]])) * 3)) # Add SD
ggtitle(paste0(.y, " Homogeniety Box Plot"))
theme(plot.title = element_text(hjust = 0.5))
xlab(label = "Sample No")
ylab(paste0(.y, ' values %'))
}) -> list_plot_box
list_plot_box
[![enter image description here][1]][1]
which produces produces a boxplot for three of the columns but not the first one and the lines are located in the wrong lcoation for each chart. things are being mapped incorrectly to get the values but not sure how to fix it.
CodePudding user response:
The c(min(.y), max(.y))
is incorrect as the .y
refers to the names of the 'df2' i.e. "Fe", ...
imap(df2, ~{
ggplot(df, outlier.shape = NA,
mapping = aes(x = SAMPLE_NO, y = .data[[.y]], color = SAMPLE_NO))
stat_boxplot(geom = 'errorbar')
coord_cartesian(ylim = as.numeric(c(min(df[[.y]]),max(df[[.y]]))))
geom_boxplot(outlier.shape = NA)
geom_hline(linetype = 'dashed', color = 'blue', size = 0.75,
mapping = aes(yintercept = as.numeric(.x[[1]]))) #mean
geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
mapping = aes(yintercept = as.numeric(.x[[1]]) #mean
- (as.numeric(.x[[2]])) * 3)) # subtract SD
geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
mapping = aes(yintercept = as.numeric(.x[[1]]) #mean value
(as.numeric(.x[[2]])) * 3)) # Add SD
ggtitle(paste0(.y, " Homogeniety Box Plot"))
theme(plot.title = element_text(hjust = 0.5))
xlab(label = "Sample No")
ylab(paste0(.y, ' values %'))
}) -> list_plot_box
-checking the output of first list
element
> list_plot_box[[1]]
Regarding the lines not showed, it is just that the difference in the ylimit
compared to the first one is so small that it was not showed. If we change by removing the * 3
, it would show
imap(df2, ~{
ggplot(df, outlier.shape = NA,
mapping = aes(x = SAMPLE_NO, y = .data[[.y]], color = SAMPLE_NO))
stat_boxplot(geom = 'errorbar')
coord_cartesian(ylim = as.numeric(c(min(df[[.y]]),max(df[[.y]]))))
geom_boxplot(outlier.shape = NA)
geom_hline(linetype = 'dashed', color = 'blue', size = 0.75,
mapping = aes(yintercept = as.numeric(.x[[1]]))) #mean
geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
mapping = aes(yintercept = as.numeric(.x[[1]]) #mean
- (as.numeric(.x[[2]])) )) # subtract SD
geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
mapping = aes(yintercept = as.numeric(.x[[1]]) #mean value
(as.numeric(.x[[2]])) )) # Add SD
ggtitle(paste0(.y, " Homogeniety Box Plot"))
theme(plot.title = element_text(hjust = 0.5))
xlab(label = "Sample No")
ylab(paste0(.y, ' values %'))
}) -> list_plot_box
-checking
list_plot_box[[1]]
We can get all the plots in single page
library(ggpubr)
ggarrange(plotlist = list_plot_box, ncol = 2, nrow = 2)
CodePudding user response:
In the end I created a dataframe with the plotting parameters I needed and then mapped them GGPLot to make each chart
df2 <- $Fe
hm hsd hmin hmax
[1,] 59.6938 0.1036259 59.38292 60.00468
$SiO2
hm hsd hmin hmax
[1,] 5.121 0.02149988 5.0565 5.1855
$Al2O3
hm hsd hmin hmax
[1,] 2.0812 0.01506991 2.03599 2.12641
$TiO2
hm hsd hmin hmax
[1,] 0.14 0 0.14 0.14
then changed my main bit of code as
imap(df2, ~{
ggplot(df, outlier.shape = NA,
mapping = aes(x = SAMPLE_NO, y = .data[[.y]], color = SAMPLE_NO))
stat_boxplot(geom = 'errorbar')
coord_cartesian(ylim = as.numeric(c(.x[3],.x[4])))
geom_boxplot(outlier.shape = NA)
geom_hline(linetype = 'dashed', color = 'blue', size = 0.75,
mapping = aes(yintercept = as.numeric(.x[[1]]))) #mean
geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
mapping = aes(yintercept = .x[4] )) # subtract SD
geom_hline(linetype = 'dashed', color = 'firebrick', size = 0.75,
mapping = aes(yintercept = .x[3] )) # Add SD
ggtitle(paste0(.y, " Homogeniety Box Plot"))
theme(plot.title = element_text(hjust = 0.5))
xlab(label = "Sample No")
ylab(paste0(.y, ' values %'))
}) -> list_plot_box
list_plot_box[[1]]