Home > Back-end >  Make a separate output containing 4 boxplots for each column in a dataframe
Make a separate output containing 4 boxplots for each column in a dataframe

Time:06-28

I have a dataframe, df, with 15 different data columns. There are 1162 rows and each row is part of one of four groups. Here is a snippet of df:

   var1 var2 var3      var4  group
1   2.4   68  310 0.6192056 group1
2   2.7   66  305 0.7287633 group1
3   7.1   73  297 0.5105544 group1
4   2.4   69  295 0.6198897 group1
5   3.4   74  283 0.6399971 group1
6   3.7   73  310 0.7949742 group1
7   3.0   65  281 0.7783598 group1
8   4.6   76  304 0.6377989 group1
9   3.0   64  311 0.7020734 group1
10  3.0   62  307 0.7437677 group1
11  2.5   72  306 0.6953473 group1
12  2.5   74  309 0.5672044 group1
13  2.7   34  285 0.7941913 group1
14  3.2   74  335 0.6661719 group2
15  2.5   72  305 0.5500999 group2
16  2.5   69  336 0.8476085 group2
17  2.8   63  289 0.6646900 group2
18  1.3   52  270 0.6448988 group2
19  2.9   70  334 0.7123679 group2
20  2.5   69  308 0.5418768 group2
21  2.4   73  307 0.5108490 group2
22  2.3   45  290 0.8393499 group2
23  2.3   66  283 0.5413923 group2
24  2.5   31  285 0.7542300 group2
25  2.6   68  332 0.7872316 group2
26  3.8   49  292 0.7775340 group2
27  2.4   66  294 0.6445523 group3
28  2.5   76  314 0.7265084 group3
29  7.5   80  314 0.6255964 group3
30  2.3   70  303 0.4487150 group3
31  2.2   53  426 0.8706240 group3
32  2.5   42  295 0.2243240 group3
33  2.4   66  320 0.5563342 group3
34  2.9   59  289 0.6899643 group3
35  2.5   24  280 0.7351417 group3
36  2.5   59  281 0.6295490 group3
37  2.9   68  305 0.6660455 group3
38  2.6   64  267 0.5632927 group3
39  2.6   58  283 0.6810814 group3
40  2.4   71  290 0.6878466 group4
41  2.5   59  302 0.6488055 group4
42  2.5   69  306 0.6815277 group4
43  2.6   56  297 0.5262509 group4
44  2.9   65  302 0.6239796 group4
45  2.5   63  302 0.7206896 group4
46  2.4   66  306 0.5208803 group4
47  2.8   59  293 0.7122809 group4
48  2.6   72  306 0.7217113 group4
49  2.7   56  293 0.7916376 group4
50  2.5   52  292 0.7499101 group4
51  2.7   76  313 0.6795473 group4
52  3.2   80  299 0.2475061 group4

This is the code I'm using to generate a boxplot for each group for just one of the columns (var1 in this example):

df %>% ggplot( aes(x=group, y=var1 ))  
  geom_boxplot(outlier.size = 0)  
  geom_jitter()  
  ggtitle("Var1")  
  xlab("Group")  
  ylab("Var1")  
  theme(plot.title = element_text(hjust=0.5), axis.text.x=element_text(angle=45, hjust=1))

I want to create a separate output (either a separate pdf or just a separate page in a pdf) for each column in the dataframe. Each output will contain a boxplot for the 4 separate groups. I know that I could just copy and paste the above code and replace y=var1 with the other column names or I could use a for loop but is there an easier/more efficient way to do this?

CodePudding user response:

We can reshape to 'long' format and plot at once in a single page with facet_wrap

library(dplyr)
library(tidyr)
library(ggplot2)
df %>%
  pivot_longer(cols = starts_with('var'), names_to = 'var_grp') %>% 
  ggplot(aes(x = group, y = value, fill = var_grp))   
  geom_boxplot(outlier.size = 0)  
  geom_jitter()  
  theme(plot.title = element_text(hjust=0.5), 
    axis.text.x=element_text(angle=45, hjust=1))  
  facet_wrap(~ var_grp)

Or if we want to save as separate 'pdf' files

for(nm in names(df)[1:4]) {

p <- ggplot(df,  aes(x=group, y=.data[[nm]] ))  
  geom_boxplot(outlier.size = 0)  
  geom_jitter()  
  ggtitle(nm)  
  xlab("Group")  
  ylab(nm)  
  theme(plot.title = element_text(hjust=0.5), 
      axis.text.x=element_text(angle=45, hjust=1))
  ggsave(file.path(getwd(),  paste0(nm, '.pdf') ), p)
  }

-output

enter image description here

data

df <- structure(list(var1 = c(2.4, 2.7, 7.1, 2.4, 3.4, 3.7, 3, 4.6, 
3, 3, 2.5, 2.5, 2.7, 3.2, 2.5, 2.5, 2.8, 1.3, 2.9, 2.5, 2.4, 
2.3, 2.3, 2.5, 2.6, 3.8, 2.4, 2.5, 7.5, 2.3, 2.2, 2.5, 2.4, 2.9, 
2.5, 2.5, 2.9, 2.6, 2.6, 2.4, 2.5, 2.5, 2.6, 2.9, 2.5, 2.4, 2.8, 
2.6, 2.7, 2.5, 2.7, 3.2), var2 = c(68L, 66L, 73L, 69L, 74L, 73L, 
65L, 76L, 64L, 62L, 72L, 74L, 34L, 74L, 72L, 69L, 63L, 52L, 70L, 
69L, 73L, 45L, 66L, 31L, 68L, 49L, 66L, 76L, 80L, 70L, 53L, 42L, 
66L, 59L, 24L, 59L, 68L, 64L, 58L, 71L, 59L, 69L, 56L, 65L, 63L, 
66L, 59L, 72L, 56L, 52L, 76L, 80L), var3 = c(310L, 305L, 297L, 
295L, 283L, 310L, 281L, 304L, 311L, 307L, 306L, 309L, 285L, 335L, 
305L, 336L, 289L, 270L, 334L, 308L, 307L, 290L, 283L, 285L, 332L, 
292L, 294L, 314L, 314L, 303L, 426L, 295L, 320L, 289L, 280L, 281L, 
305L, 267L, 283L, 290L, 302L, 306L, 297L, 302L, 302L, 306L, 293L, 
306L, 293L, 292L, 313L, 299L), var4 = c(0.6192056, 0.7287633, 
0.5105544, 0.6198897, 0.6399971, 0.7949742, 0.7783598, 0.6377989, 
0.7020734, 0.7437677, 0.6953473, 0.5672044, 0.7941913, 0.6661719, 
0.5500999, 0.8476085, 0.66469, 0.6448988, 0.7123679, 0.5418768, 
0.510849, 0.8393499, 0.5413923, 0.75423, 0.7872316, 0.777534, 
0.6445523, 0.7265084, 0.6255964, 0.448715, 0.870624, 0.224324, 
0.5563342, 0.6899643, 0.7351417, 0.629549, 0.6660455, 0.5632927, 
0.6810814, 0.6878466, 0.6488055, 0.6815277, 0.5262509, 0.6239796, 
0.7206896, 0.5208803, 0.7122809, 0.7217113, 0.7916376, 0.7499101, 
0.6795473, 0.2475061), group = c("group1", "group1", "group1", 
"group1", "group1", "group1", "group1", "group1", "group1", "group1", 
"group1", "group1", "group1", "group2", "group2", "group2", "group2", 
"group2", "group2", "group2", "group2", "group2", "group2", "group2", 
"group2", "group2", "group3", "group3", "group3", "group3", "group3", 
"group3", "group3", "group3", "group3", "group3", "group3", "group3", 
"group3", "group4", "group4", "group4", "group4", "group4", "group4", 
"group4", "group4", "group4", "group4", "group4", "group4", "group4"
)), class = "data.frame", row.names = c("1", "2", "3", "4", "5", 
"6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", 
"17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", 
"28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", 
"39", "40", "41", "42", "43", "44", "45", "46", "47", "48", "49", 
"50", "51", "52"))
  • Related