I am using your facet_grid2
function from ggh4x
to make both x and y-axis scales to be free like
ggplot(data_calibration, aes(Observed,Predicted))
geom_point(color="black",alpha = 1/3)
facet_grid2(Station ~ Method, scales="free", independent = "all")
xlab("Measured")
ylab("Predicted")
theme_bw()
geom_smooth(method="lm")
theme(panel.grid.minor = element_blank())
Now how can I add the 95% confidence interval of prediction to this plot like the following plot
Data
data_calibration = structure(list(Observed = c(17229L, 15964L, 13373L, 17749L, 12457L,
7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L, 7197L, 7220L,
7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L, 11465L,
11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L, 10759L,
9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L, 3183L,
9078L, 8688L, 11023L, 9000L, 9001L, 17229L, 15964L, 13373L, 17749L,
12457L, 7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L, 7197L,
7220L, 7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L,
11465L, 11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L,
10759L, 9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L,
3183L, 9078L, 8688L, 11023L, 9000L, 9001L, 17229L, 15964L, 13373L,
17749L, 12457L, 7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L,
7197L, 7220L, 7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L,
11465L, 11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L,
10759L, 9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L,
3183L, 9078L, 8688L, 11023L, 9000L, 9001L, 17229L, 15964L, 13373L,
17749L, 12457L, 7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L,
7197L, 7220L, 7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L,
11465L, 11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L,
10759L, 9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L,
3183L, 9078L, 8688L, 11023L, 9000L, 9001L, 17229L, 15964L, 13373L,
17749L, 12457L, 7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L,
7197L, 7220L, 7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L,
11465L, 11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L,
10759L, 9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L,
3183L, 9078L, 8688L, 11023L, 9000L, 9001L, 17229L, 15964L, 13373L,
17749L, 12457L, 7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L,
7197L, 7220L, 7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L,
11465L, 11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L,
10759L, 9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L,
3183L, 9078L, 8688L, 11023L, 9000L, 9001L), Station = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Raigad",
"Ratnagiri", "Thane "), class = "factor"), Method = structure(c(6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("ANN",
"ELNET", "LASSO", "PCA-ANN", "PCA-MLR", "SMLR"), class = "factor"),
Predicted = c(14463L, 14285L, 14452L, 12765L, 11917L, 8143L,
11251L, 8611L, 6789L, 2059L, 2787L, 2201L, 3062L, 4508L,
4975L, 15357L, 15605L, 12326L, 10377L, 9113L, 13926L, 13142L,
11407L, 8711L, 7801L, 2064L, 4563L, 4725L, 6247L, 7170L,
9492L, 8857L, 10323L, 7389L, 6776L, 7842L, 8261L, 6156L,
8627L, 4326L, 8094L, 8897L, 10370L, 10214L, 8548L, 16043L,
16671L, 15831L, 13463L, 11921L, 10239L, 9110L, 8090L, 10794L,
5826L, 3621L, 5639L, 7364L, 8152L, 5515L, 15182L, 14370L,
13559L, 12748L, 11936L, 11125L, 10313L, 9502L, 8691L, 7879L,
7068L, 6257L, 5445L, 4634L, 3822L, 10045L, 9911L, 11038L,
9255L, 8736L, 8848L, 8063L, 7847L, 8538L, 6744L, 9583L, 10474L,
8343L, 10353L, 8791L, 13185L, 13331L, 13099L, 12557L, 11898L,
10474L, 11199L, 10255L, 9251L, 6148L, 6795L, 6166L, 7775L,
8157L, 7990L, 14843L, 15086L, 12585L, 10987L, 10193L, 13663L,
11317L, 11071L, 9392L, 6991L, 4484L, 4667L, 4846L, 5830L,
6577L, 9085L, 8802L, 9570L, 7770L, 7652L, 8006L, 7995L, 6599L,
9050L, 4876L, 8360L, 8981L, 9931L, 9479L, 8009L, 13775L,
13890L, 13416L, 12851L, 12141L, 10693L, 10834L, 10372L, 9585L,
5914L, 5930L, 5922L, 7854L, 7407L, 7697L, 14941L, 15174L,
12572L, 10817L, 10412L, 13705L, 11154L, 10886L, 9448L, 7215L,
4389L, 4875L, 4809L, 5747L, 6385L, 9034L, 8749L, 9410L, 7820L,
7798L, 7940L, 7957L, 6803L, 8844L, 5227L, 8369L, 8972L, 9789L,
9514L, 7940L, 15309L, 14477L, 14219L, 18581L, 12084L, 10550L,
8666L, 8812L, 11415L, 5566L, 3928L, 4592L, 7861L, 7489L,
6903L, 12509L, 13366L, 11956L, 11880L, 8711L, 12768L, 11690L,
10922L, 4101L, 10106L, 2811L, 2979L, 4785L, 5944L, 5901L,
10007L, 8710L, 8688L, 7383L, 7575L, 8047L, 7938L, 6585L,
9517L, 3729L, 8816L, 8704L, 10847L, 8812L, 8493L, 18115L,
15670L, 15931L, 16804L, 12450L, 7701L, 7588L, 8450L, 9205L,
5477L, 4666L, 4948L, 8262L, 7095L, 6798L, 12902L, 12883L,
12864L, 12788L, 12690L, 12896L, 12491L, 12199L, 11982L, 5213L,
5357L, 5053L, 5013L, 5321L, 5596L, 9467L, 8931L, 9305L, 7867L,
8427L, 8282L, 7291L, 6396L, 9725L, 5509L, 8545L, 8997L, 10171L,
10389L, 8700L)), class = "data.frame", row.names = c(NA,
-270L))
CodePudding user response:
In short, the geom_smooth
function only calculates confidence intervals. To get prediction intervals as well, these should be calculated outside of ggplot
and passed in. This is a bit of a long way of coding, but hopefully you can see that predict
is called twice on the lm
model, once to produce two columns of confidence intervals, once to produce two columns of prediction intervals. These are passed on to geom_ribbon
s:
library(ggh4x)
library(tidyverse)
data_calibration |>
group_by(Station, Method) |>
nest() |>
mutate(model = map(data, ~ lm(Predicted ~ Observed, data = .x))) |>
mutate(fit = map2(model, data, ~ as.tibble(
predict(.x, interval = "conf"), new_data = tibble(Observed = seq(min(
data$Observed, max(data$Observed), 100
)))
)),
pred = map2(model, data, ~ as.tibble(
predict(.x, interval = "pred", new_data = tibble(Observed = seq(
min(data$Observed, max(data$Observed), 100)
)))
))) |>
unnest(c(data, fit, pred), names_sep = "_") |>
ggplot(aes(data_Observed, data_Predicted))
geom_point(color = "black", alpha = 1 / 3)
facet_grid2(Station ~ Method, scales = "free", independent = "all")
xlab("Measured")
ylab("Predicted")
theme_bw()
geom_smooth(method = "lm", se = FALSE)
geom_ribbon(aes(ymax = fit_upr, ymin = fit_lwr),
colour = "green",
fill = NA)
geom_ribbon(aes(ymax = pred_upr, ymin = pred_lwr),
colour = "red",
fill = NA)
theme(panel.grid.minor = element_blank())
I would welcome a tidier answer! One would be to create a new stat_predict
layer function, which is a little tricky but not impossible.
Edit - that thing I said was perhaps a good idea, maybe it is!
Out of curiosity, I thought worth making a stat_predict
function. Source the code from this gist and then the simple code will work with above data:
# To source new function, either...
source("https://gist.githubusercontent.com/andrewbaxter439/b508a60786f8af3c0be7b381a667ae07/raw/f7f4672222f0b1024cf6bf536ed7f6059867b4f2/stat_predict.R")
# or devtools::source_gist("b508a60786f8af3c0be7b381a667ae07")
ggplot(data_calibration, aes(Observed,Predicted))
geom_point(color="black",alpha = 1/3)
facet_grid2(Station ~ Method, scales="free", independent = "all")
xlab("Measured")
ylab("Predicted")
theme_bw()
geom_smooth(method="lm", se = FALSE)
stat_smooth(method = "lm", geom = "ribbon", fill = NA, colour = "green")
stat_predict(method = "lm", geom = "ribbon", fill = NA, colour = "red")
theme(panel.grid.minor = element_blank())