Adding 95% confidence interval of prediction using ggplot2-CodePudding

I am using your facet_grid2 function from ggh4x to make both x and y-axis scales to be free like

ggplot(data_calibration, aes(Observed,Predicted)) 
  geom_point(color="black",alpha = 1/3)   
  facet_grid2(Station ~ Method, scales="free", independent = "all") 
  xlab("Measured")  
  ylab("Predicted")   
  theme_bw() 
  geom_smooth(method="lm")  
  theme(panel.grid.minor = element_blank())

Now how can I add the 95% confidence interval of prediction to this plot like the following plot

Data

data_calibration = structure(list(Observed = c(17229L, 15964L, 13373L, 17749L, 12457L, 
                                               7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L, 7197L, 7220L, 
                                               7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L, 11465L, 
                                               11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L, 10759L, 
                                               9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L, 3183L, 
                                               9078L, 8688L, 11023L, 9000L, 9001L, 17229L, 15964L, 13373L, 17749L, 
                                               12457L, 7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L, 7197L, 
                                               7220L, 7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L, 
                                               11465L, 11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L, 
                                               10759L, 9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L, 
                                               3183L, 9078L, 8688L, 11023L, 9000L, 9001L, 17229L, 15964L, 13373L, 
                                               17749L, 12457L, 7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L, 
                                               7197L, 7220L, 7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L, 
                                               11465L, 11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L, 
                                               10759L, 9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L, 
                                               3183L, 9078L, 8688L, 11023L, 9000L, 9001L, 17229L, 15964L, 13373L, 
                                               17749L, 12457L, 7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L, 
                                               7197L, 7220L, 7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L, 
                                               11465L, 11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L, 
                                               10759L, 9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L, 
                                               3183L, 9078L, 8688L, 11023L, 9000L, 9001L, 17229L, 15964L, 13373L, 
                                               17749L, 12457L, 7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L, 
                                               7197L, 7220L, 7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L, 
                                               11465L, 11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L, 
                                               10759L, 9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L, 
                                               3183L, 9078L, 8688L, 11023L, 9000L, 9001L, 17229L, 15964L, 13373L, 
                                               17749L, 12457L, 7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L, 
                                               7197L, 7220L, 7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L, 
                                               11465L, 11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L, 
                                               10759L, 9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L, 
                                               3183L, 9078L, 8688L, 11023L, 9000L, 9001L), Station = structure(c(1L, 
                                                                                                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 
                                                                                                                 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 
                                                                                                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 
                                                                                                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 
                                                                                                                 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                                                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                                                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
                                                                                                                 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                                                                 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                                                                 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
                                                                                                                 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                                                                 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                                                                 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
                                                                                                                 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                                                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 
                                                                                                                 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 
                                                                                                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Raigad", 
                                                                                                                                                                                 "Ratnagiri", "Thane "), class = "factor"), Method = structure(c(6L, 
                                                                                                                                                                                                                                                 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
                                                                                                                                                                                                                                                 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
                                                                                                                                                                                                                                                 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 
                                                                                                                                                                                                                                                 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
                                                                                                                                                                                                                                                 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
                                                                                                                                                                                                                                                 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
                                                                                                                                                                                                                                                 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
                                                                                                                                                                                                                                                 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
                                                                                                                                                                                                                                                 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                                                                                                                                                                                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                                                                                                                                                                                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                                                                                                                                                                                                 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                                                                                                                                                                                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                                                                                                                                                                                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                                                                                                                                                                                                 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
                                                                                                                                                                                                                                                 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
                                                                                                                                                                                                                                                 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("ANN", 
                                                                                                                                                                                                                                                                                                                 "ELNET", "LASSO", "PCA-ANN", "PCA-MLR", "SMLR"), class = "factor"), 
                                  Predicted = c(14463L, 14285L, 14452L, 12765L, 11917L, 8143L, 
                                                11251L, 8611L, 6789L, 2059L, 2787L, 2201L, 3062L, 4508L, 
                                                4975L, 15357L, 15605L, 12326L, 10377L, 9113L, 13926L, 13142L, 
                                                11407L, 8711L, 7801L, 2064L, 4563L, 4725L, 6247L, 7170L, 
                                                9492L, 8857L, 10323L, 7389L, 6776L, 7842L, 8261L, 6156L, 
                                                8627L, 4326L, 8094L, 8897L, 10370L, 10214L, 8548L, 16043L, 
                                                16671L, 15831L, 13463L, 11921L, 10239L, 9110L, 8090L, 10794L, 
                                                5826L, 3621L, 5639L, 7364L, 8152L, 5515L, 15182L, 14370L, 
                                                13559L, 12748L, 11936L, 11125L, 10313L, 9502L, 8691L, 7879L, 
                                                7068L, 6257L, 5445L, 4634L, 3822L, 10045L, 9911L, 11038L, 
                                                9255L, 8736L, 8848L, 8063L, 7847L, 8538L, 6744L, 9583L, 10474L, 
                                                8343L, 10353L, 8791L, 13185L, 13331L, 13099L, 12557L, 11898L, 
                                                10474L, 11199L, 10255L, 9251L, 6148L, 6795L, 6166L, 7775L, 
                                                8157L, 7990L, 14843L, 15086L, 12585L, 10987L, 10193L, 13663L, 
                                                11317L, 11071L, 9392L, 6991L, 4484L, 4667L, 4846L, 5830L, 
                                                6577L, 9085L, 8802L, 9570L, 7770L, 7652L, 8006L, 7995L, 6599L, 
                                                9050L, 4876L, 8360L, 8981L, 9931L, 9479L, 8009L, 13775L, 
                                                13890L, 13416L, 12851L, 12141L, 10693L, 10834L, 10372L, 9585L, 
                                                5914L, 5930L, 5922L, 7854L, 7407L, 7697L, 14941L, 15174L, 
                                                12572L, 10817L, 10412L, 13705L, 11154L, 10886L, 9448L, 7215L, 
                                                4389L, 4875L, 4809L, 5747L, 6385L, 9034L, 8749L, 9410L, 7820L, 
                                                7798L, 7940L, 7957L, 6803L, 8844L, 5227L, 8369L, 8972L, 9789L, 
                                                9514L, 7940L, 15309L, 14477L, 14219L, 18581L, 12084L, 10550L, 
                                                8666L, 8812L, 11415L, 5566L, 3928L, 4592L, 7861L, 7489L, 
                                                6903L, 12509L, 13366L, 11956L, 11880L, 8711L, 12768L, 11690L, 
                                                10922L, 4101L, 10106L, 2811L, 2979L, 4785L, 5944L, 5901L, 
                                                10007L, 8710L, 8688L, 7383L, 7575L, 8047L, 7938L, 6585L, 
                                                9517L, 3729L, 8816L, 8704L, 10847L, 8812L, 8493L, 18115L, 
                                                15670L, 15931L, 16804L, 12450L, 7701L, 7588L, 8450L, 9205L, 
                                                5477L, 4666L, 4948L, 8262L, 7095L, 6798L, 12902L, 12883L, 
                                                12864L, 12788L, 12690L, 12896L, 12491L, 12199L, 11982L, 5213L, 
                                                5357L, 5053L, 5013L, 5321L, 5596L, 9467L, 8931L, 9305L, 7867L, 
                                                8427L, 8282L, 7291L, 6396L, 9725L, 5509L, 8545L, 8997L, 10171L, 
                                                10389L, 8700L)), class = "data.frame", row.names = c(NA, 
                                                                                                     -270L))

CodePudding user response：

In short, the geom_smooth function only calculates confidence intervals. To get prediction intervals as well, these should be calculated outside of ggplot and passed in. This is a bit of a long way of coding, but hopefully you can see that predict is called twice on the lm model, once to produce two columns of confidence intervals, once to produce two columns of prediction intervals. These are passed on to geom_ribbons:

library(ggh4x)
library(tidyverse)

data_calibration |>
  group_by(Station, Method) |>
  nest() |>
  mutate(model = map(data, ~ lm(Predicted ~ Observed, data = .x))) |>
  mutate(fit = map2(model, data, ~ as.tibble(
    predict(.x, interval = "conf"), new_data = tibble(Observed = seq(min(
      data$Observed, max(data$Observed), 100
    )))
  )),
  pred = map2(model, data, ~ as.tibble(
    predict(.x, interval = "pred", new_data = tibble(Observed = seq(
      min(data$Observed, max(data$Observed), 100)
    )))
  ))) |>
  unnest(c(data, fit, pred), names_sep = "_") |>
  ggplot(aes(data_Observed, data_Predicted))  
  geom_point(color = "black", alpha = 1 / 3)  
  facet_grid2(Station ~ Method, scales = "free", independent = "all")  
  xlab("Measured")  
  ylab("Predicted")  
  theme_bw()  
  geom_smooth(method = "lm", se = FALSE)  
  geom_ribbon(aes(ymax = fit_upr, ymin = fit_lwr),
              colour = "green",
              fill = NA)  
  geom_ribbon(aes(ymax = pred_upr, ymin = pred_lwr),
              colour = "red",
              fill = NA)  
  theme(panel.grid.minor = element_blank())

I would welcome a tidier answer! One would be to create a new stat_predict layer function, which is a little tricky but not impossible.

Edit - that thing I said was perhaps a good idea, maybe it is!

Out of curiosity, I thought worth making a stat_predict function. Source the code from this gist and then the simple code will work with above data:

# To source new function, either...
source("https://gist.githubusercontent.com/andrewbaxter439/b508a60786f8af3c0be7b381a667ae07/raw/f7f4672222f0b1024cf6bf536ed7f6059867b4f2/stat_predict.R")

# or devtools::source_gist("b508a60786f8af3c0be7b381a667ae07")

ggplot(data_calibration, aes(Observed,Predicted)) 
  geom_point(color="black",alpha = 1/3)   
  facet_grid2(Station ~ Method, scales="free", independent = "all") 
  xlab("Measured")  
  ylab("Predicted")   
  theme_bw() 
  geom_smooth(method="lm", se = FALSE)  
  stat_smooth(method = "lm", geom = "ribbon", fill = NA, colour = "green")  
  stat_predict(method = "lm", geom = "ribbon", fill = NA, colour = "red")  
  theme(panel.grid.minor = element_blank())

Footnote: here's an old discussion on whether a prediction interval function should be a part of ggplot2 or not