I want to make boxplot for 80 csv files, the filenames look something like this: -NY_two.csv
,CA_three.csv
,FL_three.csv
,....
,NY_ten.csv
.
Desirables include
(I) boxplot (export as pdf, 2 graphs per page)
See below for the 3 out of the 80 csv files
# All 80 files have the same column names - state, dept, year and revenue
#copy and paste to generate 3 out of 80 csv,
# The datasets generated below represent 3 out of the 80 csv files
# Dataset 1
state <-c("NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY","NY")
dept <- c("energy","energy","energy","energy","works",'works','works','works','fin','fin','fin','fin','parks','parks','parks','parks','trans','trans','trans','trans')
year <- c("two","two","two","two","two","two","two","two","two","two","two","two","two","two","two","two","two","two","two","two")
revenue <-c(1212.9,1253,1244.4,5123.5,1312,3134,515.8,2449.9,3221.6,3132.5,2235.09,2239.01,3235.01,5223.01,4235.6,2204.5,2315.5,6114,4512,3514.2)
NY_two <-data.frame(state,dept,year,revenue)
# Dataset 2
state <- rep("FL",20)
dept <- c("energy","energy","energy","energy","works",'works','works','works','fin','fin','fin','fin','parks','parks','parks','parks','trans','trans','trans','trans')
year <- rep("three",20)
revenue <-c(112.9,123,124,523.5,112,334,55,449,221.6,332,235,239,235,223,235.6,204,315.5,614,512,514.2)
FL_three <- data.frame(state,dept,year,revenue)
# Dataset 3
state <- rep("CA",20)
dept <- c("energy","energy","energy","energy","works",'works','works','works','fin','fin','fin','fin','parks','parks','parks','parks','trans','trans','trans','trans')
year <- rep("three",20)
revenue <-c(1102.9,1023,1024,5203.5,1012,3034,505,4049,2021.6,3032,2035,2039,2035,2023,2035.6,2004,3015.5,6014,5012,5014.2)
CA_three <- data.frame(state,dept,year,revenue)
# exporting the the above datasets as csv files ( imagine them as 3 out of the 80 files)
# set the path in the write.csv(/path/.csv) to collect the datasets
write.csv(NY_two,"C:\\Path to export the DataFrame\\NY_two.csv", row.names = FALSE)
write.csv(FL_three,"C:\\Path to export the DataFrame\\FL_three.csv", row.names = FALSE)
write.csv(CA_three,"C:\\Path to export the DataFrame\\CA_three.csv", row.names = FALSE)
My attempt
# Desirables include
#(I) plot the boxplot & export as pdf file (2 graphs per page)
######################################################################################
library(ggplot2)
# import all csv files in the folder
files <- list.files("C:\\path to the files\\", pattern="*.csv", full.names = T)
files
# set the pdf file path, I want two plots per page
pdf(file = "/Users/Desktop/boxplot_anova.pdf")
#specify to save plots in 2x2 grid
par(mfrow = c(2,2))
out <- lapply(1:length(files), function(idx) {
# read the file
this_data <- read.csv(files[idx], header = TRUE) # choose TRUE/FALSE accordingly
# boxplot using ggplot
p <-ggplot(this_data, aes(x = dept, y = revenue, fill = dept))
stat_boxplot(geom = "errorbar", width = 0.15) geom_boxplot(alpha = 0.8, # Fill transparency
colour = "#474747", # Border color
outlier.colour = 1) theme(panel.background = element_blank()) ggtitle("Title using each file name ")
p
dev.off()
})
out
Kindly share your code, thanx in advance
CodePudding user response:
There are a few separate issues that might cause problems in your code:
- generating plots in a function might not be properly exported (use
plot(p)
orprint(p)
instead ofp
). - You have to open the pdf device before your loop and close it after, not within the loop. E.g. this would work in principle:
pdf(file = "boxplot_anova.pdf")
#specify to save plots in 2x2 grid
par(mfrow = c(2,2))
out <- lapply(1:length(files), function(idx) {
# read the file
this_data <- read.csv(files[idx], header = TRUE) # choose TRUE/FALSE accordingly
# boxplot using ggplot
p <-ggplot(this_data, aes(x = dept, y = revenue, fill = dept))
stat_boxplot(geom = "errorbar", width = 0.15) geom_boxplot(alpha = 0.8, # Fill transparency
colour = "#474747", # Border color
outlier.colour = 1) theme(panel.background = element_blank()) ggtitle("Title using each file name ")
plot(p)
})
out
dev.off()
- The code above will not plot (up to 4, which you would expect from
mfrow(2,2)
) plots on the same page, sinceggplot2
does not use base graphics. Use e.g. theplot_grid
function from thecowplot
package to achieve this. To generate multiple pages, split the plot list in matching number of elements, e.g. for 4 plots per page:
res <- lapply(files, function(x){
this_data <- read.csv(x, header = TRUE) # choose TRUE/FALSE accordingly
# boxplot using ggplot
ggplot(this_data, aes(x = dept, y = revenue, fill = dept))
stat_boxplot(geom = "errorbar", width = 0.15)
geom_boxplot(alpha = 0.8, # Fill transparency
colour = "#474747", # Border color
outlier.colour = 1)
theme(panel.background = element_blank())
ggtitle(gsub("(.*/)(.*)(.csv)", "\\2", x))
})
# set the pdf file path, I want two plots per page
pdf(file = "boxplot_anova.pdf")
lapply(split(res, ceiling(seq_along(res)/4)),
function(x) plot_grid(plotlist=x, ncol=2, nrow=2))
dev.off()