My problem goes as follows. I have the following dataset.
This is Ibex (Spanish stockmarket) ranging from 2020-01-01 to 2022-05-01
tail(ibex, 3)
Date | Open | High | Low | Close | Adj.Close | Volume |
---|---|---|---|---|---|---|
2022-04-01 | 8482 | 8878 | 8345 | 8584 | 8584 | 3824235500 |
2022-05-01 | 8517 | 8651 | 8139 | 8139 | 8139 | 1454670100 |
2022-05-10 | 8219 | 8259 | 8139 | 8139 | 8139 | 204415904 |
dim(ibex)
[1] 270 7
Step 1. I select Date and Open
ibex$Date <- as.Date(ibex$Date)
ibex <- as_tibble(ibex)
ibex.1 <- ibex[,c(1,2)]
tail(ibex.1, 3)
Date | Open |
---|---|
2022-04-01 | 8482 |
2022-05-01 | 8517 |
2022-05-10 | 8219 |
Step 2. I create a ts format
ibex.1 <- ts(data = ibex.1, start = c(2000,1), end = c(2022,5), frequency = 12)
str(ibex.1)
Time-Series [1:269, 1:2] from 2000 to 2022: 10957 10988 11017 11048 11078 ...
- attr(*, "dimnames")=List of 2 ..$ : NULL ..$ : chr [1:2] "Date" "Open"
tail(ibex.1, 3)
Date | Open | |
---|---|---|
Mar 2022 | 19052 | 8462.1 |
Apr 2022 | 19083 | 8481.7 |
May 2022 | 19113 | 8516.6 |
Step 3. I produce a spline model using cross validation
ibex.spl <- smooth.spline(ibex.1[,1], ibex.1[,2], cv=TRUE)
Step 4. I want to predict new Open values for June, July, August 2022, i.e., values which are not in the dataset. I do this:
nuevafecha <- seq(as.Date("2022-06-01"),as.Date("2022-08-01"),by="1 month")
nuevafecha <- as_tibble(nuevafecha)`
head(nuevafecha)
value |
---|
2022-06-01 |
2022-07-01 |
2022-08-01 |
Step 5. And finally this:
ibex11 <- ibex.1 %>% add_row(Date=nuevafecha$value)
l<- predict(ibex.spl, ibex.1=ibex11[270:273], se=TRUE)
tail(l$fit)
264 | 265 | 266 | 267 | 268 | 269 |
---|---|---|---|---|---|
8354.129 | 8371.194 | 8391.543 | 8412.822 | 8439.675 | 8469.041 |
In other words: values 270:273, corresponding to June, July, August 2022 are not shown.
I just obtain predictions for the original values.
How can I get predictions for 270,271,273?
CodePudding user response:
Make sure that your x value is numeric:
# Fit model
dat$Date2 <- as.numeric(as.Date(dat$Date, format = "%Y-%m-%d"))
mymod <- smooth.spline(x = dat$Date2, y = dat$Open, cv = TRUE)
# Predict
class(mymod)
#> [1] "smooth.spline"
x_new <- as.numeric(seq(as.Date("2022-06-01"), as.Date("2022-08-01"), by = "day"))
mypreds <- predict(mymod, x = x_new)
# Visualize results
plot(mymod, xlim = c(min(dat$Date2), max(x_new)),
ylim = c(min(dat$Open), max(mypreds$y)),
xlab = "date", ylab = "open value")
points(x = dat$Date2, y = dat$Open, col = "red")
points(mypreds$x, y = mypreds$y, col = "green")
legend("topleft",
legend = c("fitted", "actual", "predicted"),
pch = c(1, 1, 1),
col = c("black", "red", "green"), inset = 0.05)
Created on 2022-06-15 by the reprex package (v2.0.1)
data:
structure(list(Date = c("2020-01-02", "2020-01-03", "2020-01-06",
"2020-01-07", "2020-01-08", "2020-01-09", "2020-01-10", "2020-01-13",
"2020-01-14", "2020-01-15", "2020-01-16", "2020-01-17", "2020-01-20",
"2020-01-21", "2020-01-22", "2020-01-23", "2020-01-24", "2020-01-27",
"2020-01-28", "2020-01-29", "2020-01-30", "2020-01-31", "2020-02-03",
"2020-02-04", "2020-02-05", "2020-02-06", "2020-02-07", "2020-02-10",
"2020-02-11", "2020-02-12", "2020-02-13", "2020-02-14", "2020-02-17",
"2020-02-18", "2020-02-19", "2020-02-20", "2020-02-21", "2020-02-24",
"2020-02-25", "2020-02-26", "2020-02-27", "2020-02-28", "2020-03-02",
"2020-03-03", "2020-03-04", "2020-03-05", "2020-03-06", "2020-03-09",
"2020-03-10", "2020-03-11", "2020-03-12", "2020-03-13", "2020-03-16",
"2020-03-17", "2020-03-18", "2020-03-19", "2020-03-20", "2020-03-23",
"2020-03-24", "2020-03-25", "2020-03-26", "2020-03-27", "2020-03-30",
"2020-03-31", "2020-04-01", "2020-04-02", "2020-04-03", "2020-04-06",
"2020-04-07", "2020-04-08", "2020-04-09", "2020-04-14", "2020-04-15",
"2020-04-16", "2020-04-17", "2020-04-20", "2020-04-21", "2020-04-22",
"2020-04-23", "2020-04-24", "2020-04-27", "2020-04-28", "2020-04-29",
"2020-04-30"), Open = c(9639.099609, 9631.200195, 9585.400391,
9623.099609, 9535.099609, 9629.200195, 9611.299805, 9586.599609,
9548.099609, 9521.200195, 9516.700195, 9616.099609, 9676.599609,
9593.400391, 9622, 9549.400391, 9576.799805, 9447.099609, 9401.799805,
9532.700195, 9460.799805, 9519.299805, 9404.400391, 9465.900391,
9553.400391, 9767.099609, 9789.299805, 9790.599609, 9863.400391,
9897.900391, 9893.900391, 9910.900391, 9979.200195, 9977.400391,
10042, 10048.700195, 9893, 9649.799805, 9507, 9213, 9182.200195,
8748.099609, 8910.200195, 8860.200195, 8827.799805, 8960.799805,
8532.599609, 7884, 7815.600098, 7589.299805, 7040.799805, 6763.700195,
6331, 6444.5, 6370.5, 6368.200195, 6645.399902, 6223.700195,
6433.299805, 6985.899902, 6789, 6916.700195, 6748.600098, 6746.799805,
6627.299805, 6633.700195, 6563.600098, 6807.899902, 6986.899902,
6930.200195, 7049.299805, 7209.700195, 7065.600098, 6930.799805,
6942.5, 6932.5, 6741.299805, 6685.100098, 6758.700195, 6641.299805,
6780.899902, 6724.600098, 6829.399902, 7084.799805), High = c(9705.400391,
9650.700195, 9618.200195, 9657.900391, 9604.299805, 9644.799805,
9623.599609, 9586.599609, 9548.400391, 9530.200195, 9579.5, 9709.900391,
9680.900391, 9620.700195, 9632.799805, 9604.700195, 9639.900391,
9482.799805, 9488, 9574.299805, 9528.400391, 9545.099609, 9431.599609,
9577.299805, 9721.900391, 9816.200195, 9816.200195, 9820.400391,
9884, 9946.599609, 9910.099609, 9969.700195, 10022.200195, 10041.5,
10100.200195, 10050.299805, 9946.400391, 9676.799805, 9518.900391,
9362.799805, 9204.400391, 8818.599609, 8913.5, 9014.299805, 8963.700195,
8962.5, 8542, 8022.899902, 8007.700195, 7717.100098, 7077.700195,
7140.5, 6362.5, 6557.299805, 6524.200195, 6506.399902, 6769.899902,
6472, 6717.299805, 7058.100098, 7033.200195, 6936.700195, 6789.100098,
6802, 6687.799805, 6658.899902, 6632.5, 6874.899902, 7119.100098,
6952.5, 7116.799805, 7209.700195, 7086.799805, 6950.899902, 7000.299805,
6933.299805, 6760.899902, 6729.600098, 6797.600098, 6710.799805,
6790.100098, 6857.899902, 7055.700195, 7128.399902), Low = c(9615.099609,
9581.200195, 9492.700195, 9557.900391, 9520.299805, 9573.799805,
9557.900391, 9507.400391, 9466, 9475, 9481.599609, 9611.299805,
9623.099609, 9550, 9557, 9499.299805, 9552.5, 9357.799805, 9363.799805,
9510.700195, 9439.299805, 9365.099609, 9361.5, 9462.400391, 9540.200195,
9757, 9761.799805, 9773.5, 9817.799805, 9888.599609, 9794.299805,
9905.200195, 9955.200195, 9967.799805, 10020.700195, 9931, 9843.5,
9459.599609, 9248, 9030.700195, 8877.400391, 8582.700195, 8541.099609,
8776.400391, 8745.900391, 8639.900391, 8310.400391, 7621.399902,
7440.600098, 7364.600098, 6347, 6468.299805, 5814.5, 6083, 6174.100098,
6228.299805, 6371.100098, 6148, 6403.600098, 6625.799805, 6759.100098,
6661.899902, 6507, 6624.700195, 6565.200195, 6424.5, 6509.399902,
6728.600098, 6903.600098, 6860.600098, 6918.700195, 7080.299805,
6816.399902, 6733.100098, 6845, 6715, 6634.899902, 6654.200195,
6713.600098, 6578.100098, 6667.299805, 6707.899902, 6816.799805,
6918.299805), Close = c(9691.200195, 9646.599609, 9600.900391,
9579.799805, 9591.400391, 9581.799805, 9573.599609, 9543.900391,
9528.299805, 9511.700195, 9572.5, 9681.299805, 9658.799805, 9611.299805,
9573.700195, 9518.5, 9562, 9366.299805, 9484.200195, 9546.700195,
9477.900391, 9367.900391, 9404.700195, 9562.900391, 9717.799805,
9811.299805, 9811, 9816, 9882.599609, 9940.400391, 9909.799805,
9956.799805, 10022.200195, 10005.799805, 10083.599609, 9931,
9886.200195, 9483.5, 9250.799805, 9316.799805, 8985.900391, 8723.200195,
8741.5, 8811.599609, 8910, 8683, 8375.599609, 7708.700195, 7461.5,
7436.399902, 6390.899902, 6629.600098, 6107.200195, 6498.5, 6274.799805,
6395.799805, 6443.299805, 6230.200195, 6717.299805, 6942.399902,
7033.200195, 6777.899902, 6659.899902, 6785.399902, 6579.399902,
6574.100098, 6581.600098, 6844.299805, 7002, 6951.799805, 7070.600098,
7108.600098, 6839.5, 6763.399902, 6875.799805, 6831.5, 6634.899902,
6719.799805, 6746.5, 6613.899902, 6731.799805, 6836.399902, 7055.700195,
6922.299805), Adj.Close = c(9691.200195, 9646.599609, 9600.900391,
9579.799805, 9591.400391, 9581.799805, 9573.599609, 9543.900391,
9528.299805, 9511.700195, 9572.5, 9681.299805, 9658.799805, 9611.299805,
9573.700195, 9518.5, 9562, 9366.299805, 9484.200195, 9546.700195,
9477.900391, 9367.900391, 9404.700195, 9562.900391, 9717.799805,
9811.299805, 9811, 9816, 9882.599609, 9940.400391, 9909.799805,
9956.799805, 10022.200195, 10005.799805, 10083.599609, 9931,
9886.200195, 9483.5, 9250.799805, 9316.799805, 8985.900391, 8723.200195,
8741.5, 8811.599609, 8910, 8683, 8375.599609, 7708.700195, 7461.5,
7436.399902, 6390.899902, 6629.600098, 6107.200195, 6498.5, 6274.799805,
6395.799805, 6443.299805, 6230.200195, 6717.299805, 6942.399902,
7033.200195, 6777.899902, 6659.899902, 6785.399902, 6579.399902,
6574.100098, 6581.600098, 6844.299805, 7002, 6951.799805, 7070.600098,
7108.600098, 6839.5, 6763.399902, 6875.799805, 6831.5, 6634.899902,
6719.799805, 6746.5, 6613.899902, 6731.799805, 6836.399902, 7055.700195,
6922.299805), Volume = c(142379600L, 135130000L, 103520400L,
133476100L, 133957600L, 151793500L, 132894500L, 124149000L, 143774600L,
161995000L, 133841500L, 159823200L, 99959200L, 126448800L, 134567800L,
169141000L, 154731100L, 164515500L, 154919400L, 187145800L, 164094200L,
303553700L, 168665700L, 174133900L, 221922900L, 248416700L, 179586900L,
132132500L, 167628300L, 199830200L, 161753500L, 127061800L, 105784200L,
184042000L, 151866100L, 178849000L, 194954500L, 256063800L, 263346200L,
299256300L, 341792600L, 497983900L, 355485000L, 351219900L, 321258300L,
342803500L, 345901100L, 598769200L, 506034000L, 369150100L, 723609100L,
473165800L, 583614700L, 388451800L, 398609500L, 346225500L, 486796200L,
282033200L, 301211900L, 322631900L, 248806600L, 225316500L, 232005200L,
258181200L, 235113700L, 233309800L, 215387500L, 225965400L, 260857500L,
155910100L, 240348600L, 259986200L, 289924500L, 237962600L, 251272600L,
186644500L, 200783000L, 177928500L, 163723500L, 182925300L, 147263100L,
233782400L, 247762400L, 314979200L)), class = "data.frame", row.names = c(NA,
-84L))