Home > Net >  How to avoid repeating code in a function to draw a density histogram and a simple histogram with gg
How to avoid repeating code in a function to draw a density histogram and a simple histogram with gg

Time:08-12

I am trying to create a function that allows you to draw an histogram or a density histogram depending on your choice.

This is the function that I have:

library(ggplot2)
library(dplyr)

hist_function <- function(DF1, x = Variable1, fill_col = Variable2, x_axis = "x_axis", 
                          y_axis = "ylab", title_plot, var_name, bins=5, col_border="black", alpha=0.2, 
                          Density_Curve="TRUE", colour_curve="red", legend_title="title", lwd=1.2){
  
  if(Density_Curve == "TRUE"){
    p <- DF1 %>% 
      ggplot(aes(x))  
      geom_histogram(aes(y=..density.., fill = fill_col), bins=bins, col=col_border, alpha=alpha)  
      geom_density(lwd = lwd, colour=colour_curve, show.legend = FALSE)  
      scale_x_continuous(breaks=pretty(x, n=10))  
      xlab(x_axis)   
      ylab(y_axis)  
      ggtitle(paste0(title_plot, var_name))  
      guides(fill=guide_legend(title=legend_title))  
      theme(strip.text.y = element_blank())
    
   }else{
    p <- DF1 %>%
      ggplot(aes(x))  
      geom_histogram(aes(fill=fill_col), bins=bins, col=col_border, alpha=alpha)  
      scale_x_continuous(breaks=pretty(x, n=10))  
      xlab(x_axis)  
      ylab(y_axis)  
      ggtitle(paste0(title_plot, var_name))  
      guides(fill=guide_legend(title=legend_title))  
      theme(strip.text.y = element_blank())
  }
  return(p)
}

mydf <- iris

Using the function:

# Draw a simple histogram

hist_function(DF1=mydf, x=mydf$Sepal.Length, fill_col = mydf$Species, var_name = "Sepal.Length",Density_Curve = "FALSE", x_axis = "Length", legend_title ="Species", y_axis ="Frequency", title_plot = "Histogram of ")

image 1

# Draw a density histogram

hist_function(DF1=mydf, x=mydf$Sepal.Length, fill_col = mydf$Species, var_name = "Sepal.Length", Density_Curve = "TRUE", x_axis = "Length", legend_title ="Species", y_axis ="Density",title_plot = "Distribution of ")

image 2

The function works perfectly fine, although I think that I am repeating a lot of code since most of the code to draw the plot is shared for both. The only difference is here:

#simple histogram
geom_histogram(aes(y=..density.., fill = fill_col), bins=bins, col=col_border, alpha=alpha) 
  geom_density(lwd = lwd, colour=colour_curve, show.legend = FALSE) 

VS

#density histogram
geom_histogram(aes(fill=fill_col), bins=bins, col=col_border, alpha=alpha) 

Does anybody have an idea about how to reduce the code?

Thanks in advance

CodePudding user response:

Here the solution. Thanks for the idea @Limey!

library(ggplot2)
library(dplyr)

hist_function <- function(DF1, x = Variable1, x_axis = "x_axis", 
                          y_axis = "ylab", title_plot, var_name,
                          Density_Curve="TRUE", legend_title="title"){
  DF1 %>% 
      ggplot(aes(x))  
      scale_x_continuous(breaks=pretty(x, n=10))  
      xlab(x_axis)   
      ylab(y_axis)  
      ggtitle(paste0(title_plot, var_name))  
      guides(fill=guide_legend(title=legend_title))  
      theme(strip.text.y = element_blank())
  
}

mydf <- iris

Simple histogram

p= hist_function(DF1=mydf, x=mydf$Sepal.Length, var_name = "Sepal.Length", x_axis = "Length", 
                 legend_title ="Species", y_axis ="Density",title_plot = "Distribution of ")
p = p geom_histogram(aes(y=..density.., fill = Species), bins=5, col="black", alpha=0.2) 
p = p   geom_density(lwd = 1.2, colour="red", show.legend = FALSE) 
p

Density histogram

p2 = hist_function(DF1=mydf, x=mydf$Sepal.Length, var_name = "Sepal.Length", x_axis = "Length", 
              legend_title ="Species", y_axis ="Frequency", title_plot = "Histogram of ")
p2 = p2  geom_histogram(aes(fill=Species), bins=5, col="black", alpha=0.2) 
p2

CodePudding user response:

You may define a conditional geom layer so you do not have to repeat the remaining plot code. Note that you need to create a list if DensityCurve TRUE to add multiple layers

library(ggplot2)
library(dplyr)
    
    hist_function <- function(DF1, x = Variable1, fill_col = Variable2, x_axis = "x_axis", 
                              y_axis = "ylab", title_plot, var_name, bins=5, col_border="black", alpha=0.2, 
                              Density_Curve="TRUE", colour_curve="red", legend_title="title", lwd=1.2){
        
        if(Density_Curve){
            geom_layer <- list(
                geom_histogram(aes(y=..density.., fill = fill_col), bins=bins, col=col_border, alpha=alpha),
                geom_density(lwd = lwd, colour=colour_curve, show.legend = FALSE)
            )
        } else {
            geom_layer <- geom_histogram(aes(fill=fill_col), bins=bins, col=col_border, alpha=alpha) 
        }
        
        DF1 %>% 
            ggplot(aes(x))  
            geom_layer   
            scale_x_continuous(breaks=pretty(x, n=10))  
            xlab(x_axis)   
            ylab(y_axis)  
            ggtitle(paste0(title_plot, var_name))  
            guides(fill=guide_legend(title=legend_title)) 
        
    }
    
    mydf <- iris
    
hist_function(DF1=mydf, x=mydf$Sepal.Length, fill_col = mydf$Species, var_name = "Sepal.Length",Density_Curve = "FALSE", x_axis = "Length", legend_title ="Species", y_axis ="Frequency", title_plot = "Histogram of ")
hist_function(DF1=mydf, x=mydf$Sepal.Length, fill_col = mydf$Species, var_name = "Sepal.Length", Density_Curve = "TRUE", x_axis = "Length", legend_title ="Species", y_axis ="Density",title_plot = "Distribution of ")

Created on 2022-08-12 by the reprex package (v2.0.1)

  • Related