Home > front end >  Problems with predict() using new data
Problems with predict() using new data

Time:06-17

My problem goes as follows. I have the following dataset.

This is Ibex (Spanish stockmarket) ranging from 2020-01-01 to 2022-05-01

tail(ibex, 3)
Date Open High Low Close Adj.Close Volume
2022-04-01 8482 8878 8345 8584 8584 3824235500
2022-05-01 8517 8651 8139 8139 8139 1454670100
2022-05-10 8219 8259 8139 8139 8139 204415904

dim(ibex) [1] 270 7

Step 1. I select Date and Open
ibex$Date <- as.Date(ibex$Date) ibex <- as_tibble(ibex) ibex.1 <- ibex[,c(1,2)]

tail(ibex.1, 3)

Date Open
2022-04-01 8482
2022-05-01 8517
2022-05-10 8219

Step 2. I create a ts format

ibex.1 <- ts(data = ibex.1, start = c(2000,1), end = c(2022,5), frequency = 12)

str(ibex.1)

Time-Series [1:269, 1:2] from 2000 to 2022: 10957 10988 11017 11048 11078 ...

  • attr(*, "dimnames")=List of 2 ..$ : NULL ..$ : chr [1:2] "Date" "Open"

tail(ibex.1, 3)

Date Open
Mar 2022 19052 8462.1
Apr 2022 19083 8481.7
May 2022 19113 8516.6

Step 3. I produce a spline model using cross validation
ibex.spl <- smooth.spline(ibex.1[,1], ibex.1[,2], cv=TRUE)

Step 4. I want to predict new Open values for June, July, August 2022, i.e., values which are not in the dataset. I do this:

nuevafecha <- seq(as.Date("2022-06-01"),as.Date("2022-08-01"),by="1 month") nuevafecha <- as_tibble(nuevafecha)`

head(nuevafecha)

value
2022-06-01
2022-07-01
2022-08-01

Step 5. And finally this:

ibex11 <- ibex.1 %>% add_row(Date=nuevafecha$value)

l<- predict(ibex.spl, ibex.1=ibex11[270:273], se=TRUE)

tail(l$fit)

264 265 266 267 268 269
8354.129 8371.194 8391.543 8412.822 8439.675 8469.041

In other words: values 270:273, corresponding to June, July, August 2022 are not shown.

I just obtain predictions for the original values.

How can I get predictions for 270,271,273?

CodePudding user response:

Make sure that your x value is numeric:

# Fit model
dat$Date2 <- as.numeric(as.Date(dat$Date, format = "%Y-%m-%d"))
mymod <- smooth.spline(x = dat$Date2, y = dat$Open, cv = TRUE)

# Predict
class(mymod)
#> [1] "smooth.spline"
x_new <- as.numeric(seq(as.Date("2022-06-01"), as.Date("2022-08-01"), by = "day"))
mypreds <- predict(mymod, x = x_new)

# Visualize results
plot(mymod, xlim = c(min(dat$Date2), max(x_new)),
     ylim = c(min(dat$Open), max(mypreds$y)),
     xlab = "date", ylab = "open value")
points(x = dat$Date2, y = dat$Open, col = "red")
points(mypreds$x, y = mypreds$y, col = "green")
legend("topleft",
       legend = c("fitted", "actual", "predicted"),
       pch = c(1, 1, 1),
       col = c("black", "red", "green"), inset = 0.05)

Created on 2022-06-15 by the reprex package (v2.0.1)

enter image description here

data:

structure(list(Date = c("2020-01-02", "2020-01-03", "2020-01-06", 
"2020-01-07", "2020-01-08", "2020-01-09", "2020-01-10", "2020-01-13", 
"2020-01-14", "2020-01-15", "2020-01-16", "2020-01-17", "2020-01-20", 
"2020-01-21", "2020-01-22", "2020-01-23", "2020-01-24", "2020-01-27", 
"2020-01-28", "2020-01-29", "2020-01-30", "2020-01-31", "2020-02-03", 
"2020-02-04", "2020-02-05", "2020-02-06", "2020-02-07", "2020-02-10", 
"2020-02-11", "2020-02-12", "2020-02-13", "2020-02-14", "2020-02-17", 
"2020-02-18", "2020-02-19", "2020-02-20", "2020-02-21", "2020-02-24", 
"2020-02-25", "2020-02-26", "2020-02-27", "2020-02-28", "2020-03-02", 
"2020-03-03", "2020-03-04", "2020-03-05", "2020-03-06", "2020-03-09", 
"2020-03-10", "2020-03-11", "2020-03-12", "2020-03-13", "2020-03-16", 
"2020-03-17", "2020-03-18", "2020-03-19", "2020-03-20", "2020-03-23", 
"2020-03-24", "2020-03-25", "2020-03-26", "2020-03-27", "2020-03-30", 
"2020-03-31", "2020-04-01", "2020-04-02", "2020-04-03", "2020-04-06", 
"2020-04-07", "2020-04-08", "2020-04-09", "2020-04-14", "2020-04-15", 
"2020-04-16", "2020-04-17", "2020-04-20", "2020-04-21", "2020-04-22", 
"2020-04-23", "2020-04-24", "2020-04-27", "2020-04-28", "2020-04-29", 
"2020-04-30"), Open = c(9639.099609, 9631.200195, 9585.400391, 
9623.099609, 9535.099609, 9629.200195, 9611.299805, 9586.599609, 
9548.099609, 9521.200195, 9516.700195, 9616.099609, 9676.599609, 
9593.400391, 9622, 9549.400391, 9576.799805, 9447.099609, 9401.799805, 
9532.700195, 9460.799805, 9519.299805, 9404.400391, 9465.900391, 
9553.400391, 9767.099609, 9789.299805, 9790.599609, 9863.400391, 
9897.900391, 9893.900391, 9910.900391, 9979.200195, 9977.400391, 
10042, 10048.700195, 9893, 9649.799805, 9507, 9213, 9182.200195, 
8748.099609, 8910.200195, 8860.200195, 8827.799805, 8960.799805, 
8532.599609, 7884, 7815.600098, 7589.299805, 7040.799805, 6763.700195, 
6331, 6444.5, 6370.5, 6368.200195, 6645.399902, 6223.700195, 
6433.299805, 6985.899902, 6789, 6916.700195, 6748.600098, 6746.799805, 
6627.299805, 6633.700195, 6563.600098, 6807.899902, 6986.899902, 
6930.200195, 7049.299805, 7209.700195, 7065.600098, 6930.799805, 
6942.5, 6932.5, 6741.299805, 6685.100098, 6758.700195, 6641.299805, 
6780.899902, 6724.600098, 6829.399902, 7084.799805), High = c(9705.400391, 
9650.700195, 9618.200195, 9657.900391, 9604.299805, 9644.799805, 
9623.599609, 9586.599609, 9548.400391, 9530.200195, 9579.5, 9709.900391, 
9680.900391, 9620.700195, 9632.799805, 9604.700195, 9639.900391, 
9482.799805, 9488, 9574.299805, 9528.400391, 9545.099609, 9431.599609, 
9577.299805, 9721.900391, 9816.200195, 9816.200195, 9820.400391, 
9884, 9946.599609, 9910.099609, 9969.700195, 10022.200195, 10041.5, 
10100.200195, 10050.299805, 9946.400391, 9676.799805, 9518.900391, 
9362.799805, 9204.400391, 8818.599609, 8913.5, 9014.299805, 8963.700195, 
8962.5, 8542, 8022.899902, 8007.700195, 7717.100098, 7077.700195, 
7140.5, 6362.5, 6557.299805, 6524.200195, 6506.399902, 6769.899902, 
6472, 6717.299805, 7058.100098, 7033.200195, 6936.700195, 6789.100098, 
6802, 6687.799805, 6658.899902, 6632.5, 6874.899902, 7119.100098, 
6952.5, 7116.799805, 7209.700195, 7086.799805, 6950.899902, 7000.299805, 
6933.299805, 6760.899902, 6729.600098, 6797.600098, 6710.799805, 
6790.100098, 6857.899902, 7055.700195, 7128.399902), Low = c(9615.099609, 
9581.200195, 9492.700195, 9557.900391, 9520.299805, 9573.799805, 
9557.900391, 9507.400391, 9466, 9475, 9481.599609, 9611.299805, 
9623.099609, 9550, 9557, 9499.299805, 9552.5, 9357.799805, 9363.799805, 
9510.700195, 9439.299805, 9365.099609, 9361.5, 9462.400391, 9540.200195, 
9757, 9761.799805, 9773.5, 9817.799805, 9888.599609, 9794.299805, 
9905.200195, 9955.200195, 9967.799805, 10020.700195, 9931, 9843.5, 
9459.599609, 9248, 9030.700195, 8877.400391, 8582.700195, 8541.099609, 
8776.400391, 8745.900391, 8639.900391, 8310.400391, 7621.399902, 
7440.600098, 7364.600098, 6347, 6468.299805, 5814.5, 6083, 6174.100098, 
6228.299805, 6371.100098, 6148, 6403.600098, 6625.799805, 6759.100098, 
6661.899902, 6507, 6624.700195, 6565.200195, 6424.5, 6509.399902, 
6728.600098, 6903.600098, 6860.600098, 6918.700195, 7080.299805, 
6816.399902, 6733.100098, 6845, 6715, 6634.899902, 6654.200195, 
6713.600098, 6578.100098, 6667.299805, 6707.899902, 6816.799805, 
6918.299805), Close = c(9691.200195, 9646.599609, 9600.900391, 
9579.799805, 9591.400391, 9581.799805, 9573.599609, 9543.900391, 
9528.299805, 9511.700195, 9572.5, 9681.299805, 9658.799805, 9611.299805, 
9573.700195, 9518.5, 9562, 9366.299805, 9484.200195, 9546.700195, 
9477.900391, 9367.900391, 9404.700195, 9562.900391, 9717.799805, 
9811.299805, 9811, 9816, 9882.599609, 9940.400391, 9909.799805, 
9956.799805, 10022.200195, 10005.799805, 10083.599609, 9931, 
9886.200195, 9483.5, 9250.799805, 9316.799805, 8985.900391, 8723.200195, 
8741.5, 8811.599609, 8910, 8683, 8375.599609, 7708.700195, 7461.5, 
7436.399902, 6390.899902, 6629.600098, 6107.200195, 6498.5, 6274.799805, 
6395.799805, 6443.299805, 6230.200195, 6717.299805, 6942.399902, 
7033.200195, 6777.899902, 6659.899902, 6785.399902, 6579.399902, 
6574.100098, 6581.600098, 6844.299805, 7002, 6951.799805, 7070.600098, 
7108.600098, 6839.5, 6763.399902, 6875.799805, 6831.5, 6634.899902, 
6719.799805, 6746.5, 6613.899902, 6731.799805, 6836.399902, 7055.700195, 
6922.299805), Adj.Close = c(9691.200195, 9646.599609, 9600.900391, 
9579.799805, 9591.400391, 9581.799805, 9573.599609, 9543.900391, 
9528.299805, 9511.700195, 9572.5, 9681.299805, 9658.799805, 9611.299805, 
9573.700195, 9518.5, 9562, 9366.299805, 9484.200195, 9546.700195, 
9477.900391, 9367.900391, 9404.700195, 9562.900391, 9717.799805, 
9811.299805, 9811, 9816, 9882.599609, 9940.400391, 9909.799805, 
9956.799805, 10022.200195, 10005.799805, 10083.599609, 9931, 
9886.200195, 9483.5, 9250.799805, 9316.799805, 8985.900391, 8723.200195, 
8741.5, 8811.599609, 8910, 8683, 8375.599609, 7708.700195, 7461.5, 
7436.399902, 6390.899902, 6629.600098, 6107.200195, 6498.5, 6274.799805, 
6395.799805, 6443.299805, 6230.200195, 6717.299805, 6942.399902, 
7033.200195, 6777.899902, 6659.899902, 6785.399902, 6579.399902, 
6574.100098, 6581.600098, 6844.299805, 7002, 6951.799805, 7070.600098, 
7108.600098, 6839.5, 6763.399902, 6875.799805, 6831.5, 6634.899902, 
6719.799805, 6746.5, 6613.899902, 6731.799805, 6836.399902, 7055.700195, 
6922.299805), Volume = c(142379600L, 135130000L, 103520400L, 
133476100L, 133957600L, 151793500L, 132894500L, 124149000L, 143774600L, 
161995000L, 133841500L, 159823200L, 99959200L, 126448800L, 134567800L, 
169141000L, 154731100L, 164515500L, 154919400L, 187145800L, 164094200L, 
303553700L, 168665700L, 174133900L, 221922900L, 248416700L, 179586900L, 
132132500L, 167628300L, 199830200L, 161753500L, 127061800L, 105784200L, 
184042000L, 151866100L, 178849000L, 194954500L, 256063800L, 263346200L, 
299256300L, 341792600L, 497983900L, 355485000L, 351219900L, 321258300L, 
342803500L, 345901100L, 598769200L, 506034000L, 369150100L, 723609100L, 
473165800L, 583614700L, 388451800L, 398609500L, 346225500L, 486796200L, 
282033200L, 301211900L, 322631900L, 248806600L, 225316500L, 232005200L, 
258181200L, 235113700L, 233309800L, 215387500L, 225965400L, 260857500L, 
155910100L, 240348600L, 259986200L, 289924500L, 237962600L, 251272600L, 
186644500L, 200783000L, 177928500L, 163723500L, 182925300L, 147263100L, 
233782400L, 247762400L, 314979200L)), class = "data.frame", row.names = c(NA, 
-84L))
  • Related