Home > Net >  Label a looped plot in R
Label a looped plot in R

Time:04-12

I am using a loop for plotting the histogram, group by different values of column_a at once which works perfectly fine. Here's the code:

par(ask=F)

for (i in unique(Data$column_a)) {
  dat <- Data[Data$column_a== i, ]
  plotdist(dat$count,histo = TRUE, demp = TRUE, discrete = T,
           pch = 16, col = "dodgerblue1") 
}

The only problem is that I cannot label each figure relative to column_a value to differentiate the figures from on another.

Thanks in advance for the help.

my data consists of number of losses with the column name of "count" with 3 distinct value in column_a(R,I,F)). and I want to plot the histogram of number of losses for these three values.

CodePudding user response:

A somewhat hacky solution would be to alter the function itself.

Below is the alteret function, which uncludes the title argument (and only works for the configuration you had in your question!)

plotdist_alt <- function (data, distr, para, histo = TRUE, breaks = "default", 
                      demp = FALSE, discrete, title = "default", ...) 
{
  def.par <- par(no.readonly = TRUE)
  if (missing(data) || !is.vector(data, mode = "numeric")) 
    stop("data must be a numeric vector")
  if ((missing(distr) & !missing(para)) || (missing(distr) & 
                                            !missing(para))) 
    stop("distr and para must defined")
  if (!histo & !demp) 
    stop("one the arguments histo and demp must be put to TRUE")
  xlim <- c(min(data), max(data))
  s <- sort(data)
  n <- length(data)
  if (missing(distr)) {
    par(mfrow = c(1, 2))
    if (missing(discrete)) 
      discrete <- FALSE
    if (!discrete) {
      obsp <- ppoints(s)
      if (histo) {
        if (demp) {
          if (breaks == "default") 
            h <- hist(data, freq = FALSE, xlab = "Data", 
                      main = "Empirical density", ...)
          else h <- hist(data, freq = FALSE, xlab = "Data", 
                         main = "Empirical density", breaks = breaks, 
                         ...)
          lines(density(data)$x, density(data)$y, lty = 2, 
                col = "black")
        }
        else {
          if (breaks == "default") 
            h <- hist(data, freq = FALSE, xlab = "Data", 
                      main = "Histogram", ...)
          else h <- hist(data, freq = FALSE, xlab = "Data", 
                         main = "Histogram", breaks = breaks, 
                         ...)
        }
      }
      else {
        h <- hist(data, freq = FALSE, xlab = "Data", 
                  main = "Histogram", plot = FALSE, ...)
        plot(density(data)$x, density(data)$y, lty = 1, 
             col = "black", type = "l", xlab = "Data", 
             main = paste("Empirical density"), ylab = "Density", 
             ...)
      }
      plot(s, obsp, main = paste("Cumulative distribution"), 
           xlab = "Data", xlim = c(h$breaks[1], h$breaks[length(h$breaks)]), 
           ylab = "CDF", ...)
    }
    else {
      if (breaks != "default") 
        warning("Breaks are\tnot taken into account for discrete data")
      t <- table(data)
      xval <- as.numeric(names(t))
      ydobs <- as.vector(t)/n
      ydmax <- max(ydobs)
      plot(xval, ydobs, type = "h", xlim = xlim, 
           ylim = c(0, ydmax), main = paste0("Empirical distribution ", title), 
           xlab = "Data", ylab = "Density", 
           ...)
      ycdfobs <- cumsum(ydobs)
      plot(xval, ycdfobs, type = "p", xlim = xlim, 
           ylim = c(0, 1), main = paste0("Empirical CDFs ", title), 
           xlab = "Data", ylab = "CDF", ...)
    }
  }
  else {
    if (!is.character(distr)) 
      distname <- substring(as.character(match.call()$distr), 
                            2)
    else distname <- distr
    if (!is.list(para)) 
      stop("'para' must be a named list")
    ddistname <- paste("d", distname, sep = "")
    if (!exists(ddistname, mode = "function")) 
      stop(paste("The ", ddistname, " function must be defined"))
    pdistname <- paste("p", distname, sep = "")
    if (!exists(pdistname, mode = "function")) 
      stop(paste("The ", pdistname, " function must be defined"))
    qdistname <- paste("q", distname, sep = "")
    if (!exists(qdistname, mode = "function")) 
      stop(paste("The ", qdistname, " function must be defined"))
    densfun <- get(ddistname, mode = "function")
    nm <- names(para)
    f <- formals(densfun)
    args <- names(f)
    m <- match(nm, args)
    if (any(is.na(m))) 
      stop(paste("'para' specifies names which are not arguments to ", 
                 ddistname))
    if (missing(discrete)) {
      if (is.element(distname, c("binom", "nbinom", 
                                 "geom", "hyper", "pois"))) 
        discrete <- TRUE
      else discrete <- FALSE
    }
    if (!discrete) {
      par(mfrow = c(2, 2))
      obsp <- ppoints(s)
      if (breaks == "default") 
        h <- hist(data, plot = FALSE)
      else h <- hist(data, breaks = breaks, plot = FALSE, 
                     ...)
      xhist <- seq(min(h$breaks), max(h$breaks), length = 1000)
      yhist <- do.call(ddistname, c(list(xhist), as.list(para)))
      if (length(yhist) != length(xhist)) 
        stop("problem when computing densities.")
      ymax <- ifelse(is.finite(max(yhist)), max(max(h$density), 
                                                max(yhist)), max(h$density))
      if (histo) {
        hist(data, freq = FALSE, xlab = "Data", 
             ylim = c(0, ymax), breaks = h$breaks, main = paste("Empirical and theoretical dens."), 
             ...)
        if (demp) {
          lines(density(data)$x, density(data)$y, lty = 2, 
                col = "black")
        }
      }
      else plot(density(data)$x, density(data)$y, lty = 2, 
                col = "black", type = "l", xlab = "Data", 
                main = paste("Empirical and theoretical dens."), 
                ylab = "Density", xlim = c(min(h$breaks), 
                                           max(h$breaks)), ...)
      if (demp) 
        legend("topright", bty = "n", lty = c(2, 
                                              1), col = c("black", "red"), legend = c("empirical", 
                                                                                      "theoretical"), bg = "white", cex = 0.7)
      lines(xhist, yhist, lty = 1, col = "red")
      theoq <- do.call(qdistname, c(list(obsp), as.list(para)))
      if (length(theoq) != length(obsp)) 
        stop("problem when computing quantities.")
      plot(theoq, s, main = " Q-Q plot", xlab = "Theoretical quantiles", 
           ylab = "Empirical quantiles", ...)
      abline(0, 1)
      xmin <- h$breaks[1]
      xmax <- h$breaks[length(h$breaks)]
      if (length(s) != length(obsp)) 
        stop("problem when computing probabilities.")
      plot(s, obsp, main = paste("Empirical and theoretical CDFs"), 
           xlab = "Data", ylab = "CDF", xlim = c(xmin, 
                                                 xmax), ...)
      sfin <- seq(xmin, xmax, by = (xmax - xmin)/100)
      theopfin <- do.call(pdistname, c(list(sfin), as.list(para)))
      lines(sfin, theopfin, lty = 1, col = "red")
      theop <- do.call(pdistname, c(list(s), as.list(para)))
      if (length(theop) != length(obsp)) 
        stop("problem when computing probabilities.")
      plot(theop, obsp, main = "P-P plot", xlab = "Theoretical probabilities", 
           ylab = "Empirical probabilities", ...)
      abline(0, 1)
    }
    else {
      par(mfrow = c(1, 2))
      if (breaks != "default") 
        warning("Breaks are not taken into account for discrete distributions")
      t <- table(data)
      xval <- as.numeric(names(t))
      xvalfin <- seq(min(xval), max(xval), by = 1)
      xlinesdec <- min((max(xval) - min(xval))/30, 0.4)
      yd <- do.call(ddistname, c(list(xvalfin), as.list(para)))
      if (length(yd) != length(xvalfin)) 
        stop("problem when computing density points.")
      ydobs <- as.vector(t)/n
      ydmax <- max(yd, ydobs)
      plot(xvalfin   xlinesdec, yd, type = "h", xlim = c(min(xval), 
                                                         max(xval)   xlinesdec), ylim = c(0, ydmax), lty = 1, 
           col = "red", main = "Emp. and theo. distr.", 
           xlab = "Data", ylab = "Density", 
           ...)
      points(xval, ydobs, type = "h", lty = 1, col = "black", 
             ...)
      legend("topright", lty = c(1, 1), col = c("black", 
                                                "red"), legend = c("empirical", paste("theoretical")), 
             bty = "o", bg = "white", cex = 0.6, 
             ...)
      ycdf <- do.call(pdistname, c(list(xvalfin), as.list(para)))
      if (length(ycdf) != length(xvalfin)) 
        stop("problem when computing probabilities.")
      plot(xvalfin, ycdf, type = "s", xlim = c(min(xval), 
                                               max(xval)   xlinesdec), ylim = c(0, 1), lty = 1, 
           col = "red", main = "Emp. and theo. CDFs", 
           xlab = "Data", ylab = "CDF", ...)
      ycdfobs <- cumsum(ydobs)
      points(xval, ycdfobs, type = "p", col = "black", 
             ...)
      legend("bottomright", lty = c(1, 1), col = c("black", 
                                                   "red"), legend = c("empirical", paste("theoretical")), 
             bty = "o", bg = "white", cex = 0.6, 
             ...)
    }
  }
  par(def.par)
  invisible()
}

To now add a title to your plot, simply use this:

par(ask=F)

for (i in unique(Data$column_a)) {
  dat <- Data[Data$column_a== i, ]
  plotdist_alt(dat$count,histo = TRUE, demp = TRUE, discrete = T,
           pch = 16, col = "dodgerblue1", title = i) 
}

Edit: Added dummy data to test the provided loop.

df <- data.frame(column_a = rep(c("a", "b"), each = 50),
                 count = sample(1:1000, 100, replace = T))
par(ask=F)

for (i in unique(df$column_a)) {
  dat <- df[df$column_a== i, ]
  plotdist_alt(dat$count,histo = TRUE, demp = TRUE, discrete = T,
               pch = 16, col = "dodgerblue1", title = i) 
}
  • Related