I have built an Arima model and want to visualise the actual vs predicted. I have made a custom function for plotting the actual and predicted to see how the old and new values are distributed over the data, but it is not working and give an error.
My data:
structure(list(tradingDay = c("2010-09-20", "2010-09-21", "2010-09-22",
"2010-09-23", "2010-09-24", "2010-09-27", "2010-09-28", "2010-09-29",
"2010-09-30", "2010-10-01", "2010-10-04", "2010-10-05", "2010-10-06",
"2010-10-07", "2010-10-08", "2010-10-11", "2010-10-12", "2010-10-13",
"2010-10-14", "2010-10-15", "2010-10-18", "2010-10-19", "2010-10-20",
"2010-10-21", "2010-10-22", "2010-10-25", "2010-10-26", "2010-10-27",
"2010-10-28", "2010-10-29", "2010-11-01", "2010-11-02", "2010-11-03",
"2010-11-04", "2010-11-05", "2010-11-08", "2010-11-09", "2010-11-10",
"2010-11-11", "2010-11-12", "2010-11-15", "2010-11-16", "2010-11-17",
"2010-11-18", "2010-11-19", "2010-11-22", "2010-11-23", "2010-11-24",
"2010-11-26", "2010-11-29", "2010-11-30"), close = c(1084.5,
1080, 1088.5, 1093.5, 1126, 1128.5, 1110, 1099, 1106.75, 1057,
1054, 1071.75, 1062, 1065, 1135, 1152.5, 1178.5, 1176.5, 1188.5,
1185, 1184, 1180, 1212, 1201.5, 1199.5, 1217.75, 1219, 1223.75,
1225, 1226, 1225.25, 1223.75, 1227.5, 1264.75, 1273.5, 1264.5,
1319.25, 1309.5, 1330.25, 1269, 1286.5, 1219.75, 1205, 1242,
1201.5, 1221.5, 1239, 1255, 1238.5, 1235, 1243), predicted = c(1069,
1084.5, 1080, 1088.5, 1093.5, 1126, 1129.78301886793, 1110, 1099,
1107.4347071939, 1057, 1054, 1071.75, 1062, 1065, 1141.41447156232,
1152.5, 1178.5, 1176.5, 1188.5, 1185, 1177.77921027082, 1181.55660377359,
1213.74371069182, 1203.14150943396, 1201.12264150944, 1220.83567988792,
1220.84591194969, 1225.62578616352, 1226.85534591195, 1227.76729559748,
1226.92452830189, 1225.41194968553, 1229.1713836478, 1266.61320754717,
1275.4213836478, 1266.41509433962, 1321.61949685535, 1298.22159153934,
1336.44233899511, 1271.52597482938, 1288.67295597484, 1238.92349066464,
1205, 1243.8710691824, 1201.5, 1221.5, 1239, 1255, 1238.5, 1235
)), row.names = c(NA, -51L), class = "data.frame")
My code:
vis_results <- function(r_df) {
r_df %>%
select(arima_results$tradingDay, Actual = arima_results$close, Predicted = arima_results$predicted) %>%
gather(a, b, -arima_results$tradingDay) %>%
ggplot(aes(arima_results$tradingDay, b, color = a))
geom_line()
geom_point()
scale_color_manual(values = c("purple", "orange"), name = "")
my_theme()
}
arima_results %>%
vis_results()
labs(x = NULL, y = NULL)
Error:
Can't subset columns that don't exist.
CodePudding user response:
You should remove all arima_results$
from your function, then it works.
library(dplyr); library(tidyr); library(ggplot2)
vis_results <- function(r_df) {
r_df %>%
select(tradingDay, Actual = close, Predicted = predicted) %>%
gather(a, b, -tradingDay) %>%
ggplot(aes(tradingDay, b, color = a))
geom_line()
geom_point()
scale_color_manual(values = c("purple", "orange"), name = "")
#
# my_theme()
}
arima_results %>%
vis_results()
labs(x = NULL, y = NULL)