Home > database >  For Loop for RMSE in R
For Loop for RMSE in R

Time:08-19

I am working on a data frame where I have the data on different states across the UK. I have applied the ARIMA model to the data frame and am interested in getting the RMSE and Error for each column/state. How can I do this using for loop for all the 14 columns so that I won't have to do it manually?

Data:

structure(list(Date = structure(c(289094400, 297043200, 304992000, 
312854400, 320716800, 328665600), tzone = "UTC", class = c("POSIXct", 
"POSIXt")), NORTH = c(4.06976744186047, 5.51675977653633, 7.2799470549305, 
4.75015422578655, 4.59363957597172, 3.15315315315317), YORKSANDTHEHUMBER = c(4.0121120363361, 
5.45851528384282, 9.52380952380951, 6.04914933837431, 3.03030303030299, 
5.42099192618225), NORTHWEST = c(6.57894736842105, 6.95256660168939, 
6.50060753341436, 5.5904164289789, 4.59211237169096, 4.70041322314051
), EASTMIDS = c(4.98489425981872, 8.20143884892085, 6.91489361702127, 
5.22388059701494, 5.61465721040189, 4.64465584778958), WESTMIDS = c(4.65838509316771, 
4.74777448071216, 8.66855524079319, 6.56934306569344, 3.22896281800389, 
3.17535545023698), EASTANGLIA = c(6.74525212835624, 8.58895705521476, 
8.47457627118643, 10.7291666666667, 4.8447789275635, 4.84522207267835
), OUTERSEAST = c(6.7110371602884, 7.53638253638255, 9.47317544707589, 
8.56512141280351, 3.82269215128102, 2.11515863689776), OUTERMET = c(4.54545454545458, 
6.58505698607005, 7.36633663366336, 7.08225746956843, 4.3747847054771, 
1.68316831683168), LONDON = c(8.11719500480309, 10.3065304309196, 
6.32299637535239, 7.65151515151515, 1.30190007037299, 2.1535255296978
), SOUTHWEST = c(6.17577197149644, 7.71812080536912, 7.63239875389407, 
9.45489628557649, 2.46804759806079, 2.19354838709679), WALES = c(6.09418282548476, 
8.35509138381203, 7.40963855421687, 7.01065619742007, 1.15303983228513, 
3.47150259067357), SCOTLAND = c(5.15222482435597, 4.12026726057908, 
5.40106951871658, 8.67579908675796, -0.280112044817908, 2.94943820224719
), NIRELAND = c(4.54545454545454, 4.94752623688156, 4.42857142857145, 
2.96397628818967, 6.06731620903454, 0.0835073068893502), UK = c(5.76890543055322, 
7.20302836425676, 7.39543442582184, 7.22885986848197, 3.23472252213347, 
2.95766398929048)), row.names = c(NA, -6L), class = c("tbl_df", 
"tbl", "data.frame"))

Code:

in_sample <- pc %>% 
  dplyr::filter(Date < '2020-03-01')
st(in_sample)

out_sample <-pc %>% 
  dplyr::filter(Date >= '2020-03-01')

st(out_sample)

ar_data = in_sample
ar_data %<>% dplyr::select(-Date)
ar_model4=apply(ar_data,2,function(x){
  return(
    list(
      
      summary(arima(x, order = c(1,0,0))) %>% 
        forecast::forecast(h = 4, level = 0.95)
      
    ))   
  
} )
ar_model4

names(ar_data)

error <- out_sample$NORTH[1:4]-ar_model4[["NORTH"]][[1]][["mean"]]
sqrt(mean(error^2))

CodePudding user response:

There is no need for summary or to compute the models' residuals by hand, there is a resid method for objects of class "Arima".
Note that tryCatch is needed because with the posted data there will be an error for NIRELAND.

pc <-
  structure(list(
    Date = structure(c(289094400, 297043200, 304992000, 
                       312854400, 320716800, 328665600), tzone = "UTC", class = c("POSIXct", 
                                                                                  "POSIXt")), 
    NORTH = c(4.06976744186047, 5.51675977653633, 7.2799470549305, 
              4.75015422578655, 4.59363957597172, 3.15315315315317), 
    YORKSANDTHEHUMBER = c(4.0121120363361, 5.45851528384282, 9.52380952380951, 
                          6.04914933837431, 3.03030303030299, 5.42099192618225), 
    NORTHWEST = c(6.57894736842105, 6.95256660168939, 6.50060753341436, 
                  5.5904164289789, 4.59211237169096, 4.70041322314051), 
    EASTMIDS = c(4.98489425981872, 8.20143884892085, 6.91489361702127, 
                 5.22388059701494, 5.61465721040189, 4.64465584778958), 
    WESTMIDS = c(4.65838509316771, 4.74777448071216, 8.66855524079319, 
                 6.56934306569344, 3.22896281800389, 3.17535545023698), 
    EASTANGLIA = c(6.74525212835624, 8.58895705521476, 
                   8.47457627118643, 10.7291666666667, 4.8447789275635, 4.84522207267835), 
    OUTERSEAST = c(6.7110371602884, 7.53638253638255, 9.47317544707589, 
                   8.56512141280351, 3.82269215128102, 2.11515863689776), 
    OUTERMET = c(4.54545454545458, 6.58505698607005, 7.36633663366336, 
                 7.08225746956843, 4.3747847054771, 1.68316831683168), 
    LONDON = c(8.11719500480309, 10.3065304309196, 
               6.32299637535239, 7.65151515151515, 1.30190007037299, 2.1535255296978), 
    SOUTHWEST = c(6.17577197149644, 7.71812080536912, 7.63239875389407, 
                  9.45489628557649, 2.46804759806079, 2.19354838709679), 
    WALES = c(6.09418282548476, 8.35509138381203, 7.40963855421687, 7.01065619742007, 
              1.15303983228513, 3.47150259067357), 
    SCOTLAND = c(5.15222482435597, 4.12026726057908, 5.40106951871658, 
                 8.67579908675796, -0.280112044817908, 2.94943820224719), 
    NIRELAND = c(4.54545454545454, 4.94752623688156, 4.42857142857145, 
                 2.96397628818967, 6.06731620903454, 0.0835073068893502), 
    UK = c(5.76890543055322, 7.20302836425676, 7.39543442582184, 7.22885986848197, 
           3.23472252213347, 2.95766398929048)), 
    row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"))

suppressPackageStartupMessages({
  library(dplyr)
  library(magrittr)
})

in_sample <- pc %>% 
  dplyr::filter(Date < '2020-03-01')

ar_data <- in_sample
ar_data %<>% dplyr::select(-Date)

mse <- function(x, na.rm = FALSE) mean(resid(x)^2, na.rm = na.rm)
rmse <- function(x, na.rm = FALSE) sqrt(mse(x, na.rm = na.rm))

ar_resid <- apply(ar_data, 2, function(x){
  tryCatch(
    arima(x, order = c(1,0,0)) %>% resid(),
    error = function(e) e
  )
})
ar_resid
#> $NORTH
#> Time Series:
#> Start = 1 
#> End = 6 
#> Frequency = 1 
#> [1] -0.7285983  0.8359489  2.3471268 -0.4897442 -0.2056680 -1.6188957
#> 
#> $YORKSANDTHEHUMBER
#> Time Series:
#> Start = 1 
#> End = 6 
#> Frequency = 1 
#> [1] -1.56097644 -0.06906896  3.95344669  0.35855202 -2.55752830 -0.07755450
#> 
#> $NORTHWEST
#> Time Series:
#> Start = 1 
#> End = 6 
#> Frequency = 1 
#> [1]  0.5624819  0.5865668 -0.1453380 -0.7168848 -1.0332002 -0.1768894
#> 
#> $EASTMIDS
#> Time Series:
#> Start = 1 
#> End = 6 
#> Frequency = 1 
#> [1] -0.9447732  2.2744683  0.9787282 -0.7086071 -0.3129965 -1.2841150
#> 
#> $WESTMIDS
#> Time Series:
#> Start = 1 
#> End = 6 
#> Frequency = 1 
#> [1] -0.3927584 -0.2219119  3.6784580  0.6839904 -2.1770641 -1.4679424
#> 
#> $EASTANGLIA
#> Time Series:
#> Start = 1 
#> End = 6 
#> Frequency = 1 
#> [1] -0.5710381  1.3238904  1.0367394  3.3020482 -2.7936130 -2.2417548
#> 
#> $OUTERSEAST
#> Time Series:
#> Start = 1 
#> End = 6 
#> Frequency = 1 
#> [1]  0.7716909  1.2299292  2.6972779  0.6876048 -3.5383368 -2.5484469
#> 
#> $OUTERMET
#> Time Series:
#> Start = 1 
#> End = 6 
#> Frequency = 1 
#> [1] -0.08399587  1.99441267  1.65175670  0.93714852 -1.61378065 -2.81342732
#> 
#> $LONDON
#> Time Series:
#> Start = 1 
#> End = 6 
#> Frequency = 1 
#> [1]  2.103406  3.567926 -1.289222  1.628852 -5.250883 -1.865562
#> 
#> $SOUTHWEST
#> Time Series:
#> Start = 1 
#> End = 6 
#> Frequency = 1 
#> [1]  0.4202758  1.8571201  1.3375759  3.1841848 -4.3152846 -2.6245668
#> 
#> $WALES
#> Time Series:
#> Start = 1 
#> End = 6 
#> Frequency = 1 
#> [1]  0.5866757  2.6842648  1.0262762  0.9252577 -4.8066176 -0.6420999
#> 
#> $SCOTLAND
#> Time Series:
#> Start = 1 
#> End = 6 
#> Frequency = 1 
#> [1]  0.78420411 -0.07735357  1.00311288  4.52648693 -3.79369498 -2.30277210
#> 
#> $NIRELAND
#> <simpleError in arima(x, order = c(1, 0, 0)): non-stationary AR part from CSS>
#> 
#> $UK
#> Time Series:
#> Start = 1 
#> End = 6 
#> Frequency = 1 
#> [1]  0.3930068  1.6613513  1.1382402  0.8756698 -3.0353596 -1.3196505

ar_rmse <- apply(ar_data, 2, function(x){
  tryCatch(
    arima(x, order = c(1,0,0)) %>% rmse(),
    error = function(e) e
  )
})
ar_rmse
#> $NORTH
#> [1] 1.267652
#> 
#> $YORKSANDTHEHUMBER
#> [1] 2.030874
#> 
#> $NORTHWEST
#> [1] 0.6183697
#> 
#> $EASTMIDS
#> [1] 1.243165
#> 
#> $WESTMIDS
#> [1] 1.875138
#> 
#> $EASTANGLIA
#> [1] 2.116871
#> 
#> $OUTERSEAST
#> [1] 2.19358
#> 
#> $OUTERMET
#> [1] 1.737381
#> 
#> $LONDON
#> [1] 2.958653
#> 
#> $SOUTHWEST
#> [1] 2.616094
#> 
#> $WALES
#> [1] 2.344308
#> 
#> $SCOTLAND
#> [1] 2.639797
#> 
#> $NIRELAND
#> <simpleError in arima(x, order = c(1, 0, 0)): non-stationary AR part from CSS>
#> 
#> $UK
#> [1] 1.62951

Created on 2022-08-18 by the reprex package (v2.0.1)

Now the same but using forecast::auto.arima. There is no need to specify the models' orders.

ar_resid <- apply(ar_data, 2, function(x){
  tryCatch(
    forecast::auto.arima(x) %>% resid(),
    error = function(e) e
  )
})
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
ar_resid
#>           NORTH YORKSANDTHEHUMBER    NORTHWEST   EASTMIDS   WESTMIDS EASTANGLIA OUTERSEAST   OUTERMET       LONDON SOUTHWEST      WALES   SCOTLAND   NIRELAND         UK
#> [1,] -0.8241361        -1.5703682  0.006578944 -0.9458425 -0.5163443 -0.6260734  0.3404426 -0.7273886  0.008117191  0.235308  0.5118309  0.8157770  0.7060625  0.1374697
#> [2,]  0.6228562        -0.1239649  0.373619233  2.2707021 -0.4269549  1.2176315  1.1657880  1.3122139  2.189335426  1.777657  2.7727395 -0.2161805  1.1081342  1.5715926
#> [3,]  2.3860435         3.9413293 -0.451959068  0.9841569  3.4938259  1.1032508  3.1025809  2.0934935 -3.983534056  1.691935  1.8272867  1.0646217  0.5891794  1.7639987
#> [4,] -0.1437493         0.4666691 -0.910191104 -0.7068561  1.3946137  3.3578411  2.1945269  1.8094144  1.328518776  3.514432  1.4283043  4.3393513 -0.8754157  1.5974241
#> [5,] -0.3002640        -2.5521772 -0.998304057 -0.3160795 -1.9457665 -2.5265466 -2.5479024 -0.8980584 -6.349615081 -3.472416 -4.4293121 -4.6165599  2.2279242 -2.3967132
#> [6,] -1.7407504        -0.1614883  0.108300851 -1.2860809 -1.9993739 -2.5261034 -4.2554359 -3.5896748  0.851625459 -3.746916 -2.1108493 -1.3870096 -3.7558847 -2.6737718

ar_rmse <- apply(ar_data, 2, function(x){
  tryCatch(
    forecast::auto.arima(x) %>% class(),
    error = function(e) e
  )
})
ar_rmse
#>      NORTH            YORKSANDTHEHUMBER NORTHWEST        EASTMIDS         WESTMIDS         EASTANGLIA       OUTERSEAST       OUTERMET         LONDON           SOUTHWEST        WALES           
#> [1,] "forecast_ARIMA" "forecast_ARIMA"  "forecast_ARIMA" "forecast_ARIMA" "forecast_ARIMA" "forecast_ARIMA" "forecast_ARIMA" "forecast_ARIMA" "forecast_ARIMA" "forecast_ARIMA" "forecast_ARIMA"
#> [2,] "ARIMA"          "ARIMA"           "ARIMA"          "ARIMA"          "ARIMA"          "ARIMA"          "ARIMA"          "ARIMA"          "ARIMA"          "ARIMA"          "ARIMA"         
#> [3,] "Arima"          "Arima"           "Arima"          "Arima"          "Arima"          "Arima"          "Arima"          "Arima"          "Arima"          "Arima"          "Arima"         
#>      SCOTLAND         NIRELAND         UK              
#> [1,] "forecast_ARIMA" "forecast_ARIMA" "forecast_ARIMA"
#> [2,] "ARIMA"          "ARIMA"          "ARIMA"         
#> [3,] "Arima"          "Arima"          "Arima"

Created on 2022-08-18 by the reprex package (v2.0.1)

  • Related