I am working on a data frame where I have the data on different states across the UK. I have applied the ARIMA model to the data frame and am interested in getting the RMSE and Error for each column/state. How can I do this using for loop for all the 14 columns so that I won't have to do it manually?
Data:
structure(list(Date = structure(c(289094400, 297043200, 304992000,
312854400, 320716800, 328665600), tzone = "UTC", class = c("POSIXct",
"POSIXt")), NORTH = c(4.06976744186047, 5.51675977653633, 7.2799470549305,
4.75015422578655, 4.59363957597172, 3.15315315315317), YORKSANDTHEHUMBER = c(4.0121120363361,
5.45851528384282, 9.52380952380951, 6.04914933837431, 3.03030303030299,
5.42099192618225), NORTHWEST = c(6.57894736842105, 6.95256660168939,
6.50060753341436, 5.5904164289789, 4.59211237169096, 4.70041322314051
), EASTMIDS = c(4.98489425981872, 8.20143884892085, 6.91489361702127,
5.22388059701494, 5.61465721040189, 4.64465584778958), WESTMIDS = c(4.65838509316771,
4.74777448071216, 8.66855524079319, 6.56934306569344, 3.22896281800389,
3.17535545023698), EASTANGLIA = c(6.74525212835624, 8.58895705521476,
8.47457627118643, 10.7291666666667, 4.8447789275635, 4.84522207267835
), OUTERSEAST = c(6.7110371602884, 7.53638253638255, 9.47317544707589,
8.56512141280351, 3.82269215128102, 2.11515863689776), OUTERMET = c(4.54545454545458,
6.58505698607005, 7.36633663366336, 7.08225746956843, 4.3747847054771,
1.68316831683168), LONDON = c(8.11719500480309, 10.3065304309196,
6.32299637535239, 7.65151515151515, 1.30190007037299, 2.1535255296978
), SOUTHWEST = c(6.17577197149644, 7.71812080536912, 7.63239875389407,
9.45489628557649, 2.46804759806079, 2.19354838709679), WALES = c(6.09418282548476,
8.35509138381203, 7.40963855421687, 7.01065619742007, 1.15303983228513,
3.47150259067357), SCOTLAND = c(5.15222482435597, 4.12026726057908,
5.40106951871658, 8.67579908675796, -0.280112044817908, 2.94943820224719
), NIRELAND = c(4.54545454545454, 4.94752623688156, 4.42857142857145,
2.96397628818967, 6.06731620903454, 0.0835073068893502), UK = c(5.76890543055322,
7.20302836425676, 7.39543442582184, 7.22885986848197, 3.23472252213347,
2.95766398929048)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
Code:
in_sample <- pc %>%
dplyr::filter(Date < '2020-03-01')
st(in_sample)
out_sample <-pc %>%
dplyr::filter(Date >= '2020-03-01')
st(out_sample)
ar_data = in_sample
ar_data %<>% dplyr::select(-Date)
ar_model4=apply(ar_data,2,function(x){
return(
list(
summary(arima(x, order = c(1,0,0))) %>%
forecast::forecast(h = 4, level = 0.95)
))
} )
ar_model4
names(ar_data)
error <- out_sample$NORTH[1:4]-ar_model4[["NORTH"]][[1]][["mean"]]
sqrt(mean(error^2))
CodePudding user response:
There is no need for summary
or to compute the models' residuals by hand, there is a resid
method for objects of class "Arima"
.
Note that tryCatch
is needed because with the posted data there will be an error for NIRELAND.
pc <-
structure(list(
Date = structure(c(289094400, 297043200, 304992000,
312854400, 320716800, 328665600), tzone = "UTC", class = c("POSIXct",
"POSIXt")),
NORTH = c(4.06976744186047, 5.51675977653633, 7.2799470549305,
4.75015422578655, 4.59363957597172, 3.15315315315317),
YORKSANDTHEHUMBER = c(4.0121120363361, 5.45851528384282, 9.52380952380951,
6.04914933837431, 3.03030303030299, 5.42099192618225),
NORTHWEST = c(6.57894736842105, 6.95256660168939, 6.50060753341436,
5.5904164289789, 4.59211237169096, 4.70041322314051),
EASTMIDS = c(4.98489425981872, 8.20143884892085, 6.91489361702127,
5.22388059701494, 5.61465721040189, 4.64465584778958),
WESTMIDS = c(4.65838509316771, 4.74777448071216, 8.66855524079319,
6.56934306569344, 3.22896281800389, 3.17535545023698),
EASTANGLIA = c(6.74525212835624, 8.58895705521476,
8.47457627118643, 10.7291666666667, 4.8447789275635, 4.84522207267835),
OUTERSEAST = c(6.7110371602884, 7.53638253638255, 9.47317544707589,
8.56512141280351, 3.82269215128102, 2.11515863689776),
OUTERMET = c(4.54545454545458, 6.58505698607005, 7.36633663366336,
7.08225746956843, 4.3747847054771, 1.68316831683168),
LONDON = c(8.11719500480309, 10.3065304309196,
6.32299637535239, 7.65151515151515, 1.30190007037299, 2.1535255296978),
SOUTHWEST = c(6.17577197149644, 7.71812080536912, 7.63239875389407,
9.45489628557649, 2.46804759806079, 2.19354838709679),
WALES = c(6.09418282548476, 8.35509138381203, 7.40963855421687, 7.01065619742007,
1.15303983228513, 3.47150259067357),
SCOTLAND = c(5.15222482435597, 4.12026726057908, 5.40106951871658,
8.67579908675796, -0.280112044817908, 2.94943820224719),
NIRELAND = c(4.54545454545454, 4.94752623688156, 4.42857142857145,
2.96397628818967, 6.06731620903454, 0.0835073068893502),
UK = c(5.76890543055322, 7.20302836425676, 7.39543442582184, 7.22885986848197,
3.23472252213347, 2.95766398929048)),
row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"))
suppressPackageStartupMessages({
library(dplyr)
library(magrittr)
})
in_sample <- pc %>%
dplyr::filter(Date < '2020-03-01')
ar_data <- in_sample
ar_data %<>% dplyr::select(-Date)
mse <- function(x, na.rm = FALSE) mean(resid(x)^2, na.rm = na.rm)
rmse <- function(x, na.rm = FALSE) sqrt(mse(x, na.rm = na.rm))
ar_resid <- apply(ar_data, 2, function(x){
tryCatch(
arima(x, order = c(1,0,0)) %>% resid(),
error = function(e) e
)
})
ar_resid
#> $NORTH
#> Time Series:
#> Start = 1
#> End = 6
#> Frequency = 1
#> [1] -0.7285983 0.8359489 2.3471268 -0.4897442 -0.2056680 -1.6188957
#>
#> $YORKSANDTHEHUMBER
#> Time Series:
#> Start = 1
#> End = 6
#> Frequency = 1
#> [1] -1.56097644 -0.06906896 3.95344669 0.35855202 -2.55752830 -0.07755450
#>
#> $NORTHWEST
#> Time Series:
#> Start = 1
#> End = 6
#> Frequency = 1
#> [1] 0.5624819 0.5865668 -0.1453380 -0.7168848 -1.0332002 -0.1768894
#>
#> $EASTMIDS
#> Time Series:
#> Start = 1
#> End = 6
#> Frequency = 1
#> [1] -0.9447732 2.2744683 0.9787282 -0.7086071 -0.3129965 -1.2841150
#>
#> $WESTMIDS
#> Time Series:
#> Start = 1
#> End = 6
#> Frequency = 1
#> [1] -0.3927584 -0.2219119 3.6784580 0.6839904 -2.1770641 -1.4679424
#>
#> $EASTANGLIA
#> Time Series:
#> Start = 1
#> End = 6
#> Frequency = 1
#> [1] -0.5710381 1.3238904 1.0367394 3.3020482 -2.7936130 -2.2417548
#>
#> $OUTERSEAST
#> Time Series:
#> Start = 1
#> End = 6
#> Frequency = 1
#> [1] 0.7716909 1.2299292 2.6972779 0.6876048 -3.5383368 -2.5484469
#>
#> $OUTERMET
#> Time Series:
#> Start = 1
#> End = 6
#> Frequency = 1
#> [1] -0.08399587 1.99441267 1.65175670 0.93714852 -1.61378065 -2.81342732
#>
#> $LONDON
#> Time Series:
#> Start = 1
#> End = 6
#> Frequency = 1
#> [1] 2.103406 3.567926 -1.289222 1.628852 -5.250883 -1.865562
#>
#> $SOUTHWEST
#> Time Series:
#> Start = 1
#> End = 6
#> Frequency = 1
#> [1] 0.4202758 1.8571201 1.3375759 3.1841848 -4.3152846 -2.6245668
#>
#> $WALES
#> Time Series:
#> Start = 1
#> End = 6
#> Frequency = 1
#> [1] 0.5866757 2.6842648 1.0262762 0.9252577 -4.8066176 -0.6420999
#>
#> $SCOTLAND
#> Time Series:
#> Start = 1
#> End = 6
#> Frequency = 1
#> [1] 0.78420411 -0.07735357 1.00311288 4.52648693 -3.79369498 -2.30277210
#>
#> $NIRELAND
#> <simpleError in arima(x, order = c(1, 0, 0)): non-stationary AR part from CSS>
#>
#> $UK
#> Time Series:
#> Start = 1
#> End = 6
#> Frequency = 1
#> [1] 0.3930068 1.6613513 1.1382402 0.8756698 -3.0353596 -1.3196505
ar_rmse <- apply(ar_data, 2, function(x){
tryCatch(
arima(x, order = c(1,0,0)) %>% rmse(),
error = function(e) e
)
})
ar_rmse
#> $NORTH
#> [1] 1.267652
#>
#> $YORKSANDTHEHUMBER
#> [1] 2.030874
#>
#> $NORTHWEST
#> [1] 0.6183697
#>
#> $EASTMIDS
#> [1] 1.243165
#>
#> $WESTMIDS
#> [1] 1.875138
#>
#> $EASTANGLIA
#> [1] 2.116871
#>
#> $OUTERSEAST
#> [1] 2.19358
#>
#> $OUTERMET
#> [1] 1.737381
#>
#> $LONDON
#> [1] 2.958653
#>
#> $SOUTHWEST
#> [1] 2.616094
#>
#> $WALES
#> [1] 2.344308
#>
#> $SCOTLAND
#> [1] 2.639797
#>
#> $NIRELAND
#> <simpleError in arima(x, order = c(1, 0, 0)): non-stationary AR part from CSS>
#>
#> $UK
#> [1] 1.62951
Created on 2022-08-18 by the reprex package (v2.0.1)
Now the same but using forecast::auto.arima
. There is no need to specify the models' orders.
ar_resid <- apply(ar_data, 2, function(x){
tryCatch(
forecast::auto.arima(x) %>% resid(),
error = function(e) e
)
})
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
ar_resid
#> NORTH YORKSANDTHEHUMBER NORTHWEST EASTMIDS WESTMIDS EASTANGLIA OUTERSEAST OUTERMET LONDON SOUTHWEST WALES SCOTLAND NIRELAND UK
#> [1,] -0.8241361 -1.5703682 0.006578944 -0.9458425 -0.5163443 -0.6260734 0.3404426 -0.7273886 0.008117191 0.235308 0.5118309 0.8157770 0.7060625 0.1374697
#> [2,] 0.6228562 -0.1239649 0.373619233 2.2707021 -0.4269549 1.2176315 1.1657880 1.3122139 2.189335426 1.777657 2.7727395 -0.2161805 1.1081342 1.5715926
#> [3,] 2.3860435 3.9413293 -0.451959068 0.9841569 3.4938259 1.1032508 3.1025809 2.0934935 -3.983534056 1.691935 1.8272867 1.0646217 0.5891794 1.7639987
#> [4,] -0.1437493 0.4666691 -0.910191104 -0.7068561 1.3946137 3.3578411 2.1945269 1.8094144 1.328518776 3.514432 1.4283043 4.3393513 -0.8754157 1.5974241
#> [5,] -0.3002640 -2.5521772 -0.998304057 -0.3160795 -1.9457665 -2.5265466 -2.5479024 -0.8980584 -6.349615081 -3.472416 -4.4293121 -4.6165599 2.2279242 -2.3967132
#> [6,] -1.7407504 -0.1614883 0.108300851 -1.2860809 -1.9993739 -2.5261034 -4.2554359 -3.5896748 0.851625459 -3.746916 -2.1108493 -1.3870096 -3.7558847 -2.6737718
ar_rmse <- apply(ar_data, 2, function(x){
tryCatch(
forecast::auto.arima(x) %>% class(),
error = function(e) e
)
})
ar_rmse
#> NORTH YORKSANDTHEHUMBER NORTHWEST EASTMIDS WESTMIDS EASTANGLIA OUTERSEAST OUTERMET LONDON SOUTHWEST WALES
#> [1,] "forecast_ARIMA" "forecast_ARIMA" "forecast_ARIMA" "forecast_ARIMA" "forecast_ARIMA" "forecast_ARIMA" "forecast_ARIMA" "forecast_ARIMA" "forecast_ARIMA" "forecast_ARIMA" "forecast_ARIMA"
#> [2,] "ARIMA" "ARIMA" "ARIMA" "ARIMA" "ARIMA" "ARIMA" "ARIMA" "ARIMA" "ARIMA" "ARIMA" "ARIMA"
#> [3,] "Arima" "Arima" "Arima" "Arima" "Arima" "Arima" "Arima" "Arima" "Arima" "Arima" "Arima"
#> SCOTLAND NIRELAND UK
#> [1,] "forecast_ARIMA" "forecast_ARIMA" "forecast_ARIMA"
#> [2,] "ARIMA" "ARIMA" "ARIMA"
#> [3,] "Arima" "Arima" "Arima"
Created on 2022-08-18 by the reprex package (v2.0.1)