I would like to calculate the rolling covariance for my panel dataset.
The data looks like this:
structure(list(Name = c("A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C", "C",
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C",
"C", "C", "C"), Date = c("01.08.2018", "02.08.2018", "03.08.2018",
"04.08.2018", "05.08.2018", "06.08.2018", "07.08.2018", "08.08.2018",
"09.08.2018", "10.08.2018", "11.08.2018", "12.08.2018", "13.08.2018",
"14.08.2018", "15.08.2018", "16.08.2018", "17.08.2018", "18.08.2018",
"01.08.2018", "02.08.2018", "03.08.2018", "04.08.2018", "05.08.2018",
"06.08.2018", "07.08.2018", "08.08.2018", "09.08.2018", "10.08.2018",
"11.08.2018", "12.08.2018", "13.08.2018", "14.08.2018", "15.08.2018",
"16.08.2018", "17.08.2018", "18.08.2018", "01.08.2018", "02.08.2018",
"03.08.2018", "04.08.2018", "05.08.2018", "06.08.2018", "07.08.2018",
"08.08.2018", "09.08.2018", "10.08.2018", "11.08.2018", "12.08.2018",
"13.08.2018", "14.08.2018", "15.08.2018", "16.08.2018", "17.08.2018",
"18.08.2018", "19.08.2018", "20.08.2018", "21.08.2018", "22.08.2018",
"23.08.2018", "24.08.2018", "25.08.2018", "26.08.2018", "27.08.2018"
), Y = c(-0.021104989, 0.005829159, -0.005993838, 0.012343494,
0.000735194, -0.005313411, -0.023474922, -0.006807478, -0.002674863,
-0.004020429, -0.001613125, 0.005300896, 0.018783846, 0.009664223,
-0.00810932, 0.00757471, 0.008253359, 0.003760981, -0.001052405,
-0.007286345, -0.008488191, -0.002538641, -0.009791452, 0.000446472,
-0.002980809, -0.009180535, 0.008837298, -0.0027743, 0.006254873,
0.001064582, 0.00178255, 0.005754829, 0.004967109, 0.005663851,
0.012421897, 0.002893901, -0.000674701, 0.005609272, -0.002664995,
-0.004614426, -0.020395375, 0.000400389, -0.007096134, 0.003788953,
-0.004816989, -0.012074368, -0.016623213, 0.002353228, -0.003833742,
-0.018048155, -0.003742604, 0.001912499, 0.012315676, -0.011545789,
-0.008412867, -0.008541933, -0.009458445, -0.016025502, -0.012383462,
-2.21e-05, -0.014338361, 0.016535732, -0.000234764), X = c(-0.000674701,
0.005609272, -0.002664995, -0.004614426, -0.020395375, 0.000400389,
-0.007096134, 0.003788953, -0.004816989, -0.012074368, -0.016623213,
-0.000674701, 0.005609272, -0.002664995, -0.004614426, -0.020395375,
0.000400389, -0.007096134, 0.003788953, -0.004816989, -0.012074368,
-0.016623213, -0.000674701, 0.005609272, -0.002664995, -0.004614426,
-0.020395375, 0.000400389, -0.007096134, 0.003788953, -0.004816989,
-0.012074368, -0.016623213, -0.000674701, 0.005609272, -0.002664995,
-0.004614426, -0.020395375, 0.000400389, -0.007096134, 0.003788953,
-0.004816989, -0.012074368, -0.016623213, -0.000674701, 0.005609272,
-0.002664995, -0.004614426, -0.020395375, 0.000400389, -0.007096134,
0.003788953, -0.004816989, -0.012074368, -0.016623213, -0.000674701,
0.005609272, -0.002664995, -0.004614426, -0.020395375, 0.000400389,
-0.007096134, 0.003788953)), class = "data.frame", row.names = c(NA,
-63L))
My real data consists of over 1000 observations per group. Now I would like to calculate the covariance between X and Y with a rolling time window of 250 observations for each group and for each Date. In order not to lose too many observations, I would like that I also have a covariance even if there are less than 250 prior observations available. In such cases just the the maximum amount of observations that are possible, e.g., in row 4 for Name A just use the three prior observations.
I have come so far:
Data <- Data %>%
group_by(Name) %>%
mutate(Covariance=cov(X,Y))
However, how can I add the rolling time window?
I appreciate a lot your help.
CodePudding user response:
You could do this using slide2_dbl
from the slide
package, which works nicely with dplyr
:
library(dplyr)
Data %>%
# mutate(Date = as.Date(Date, "%d.%m.%Y")) %>% # optional, might help later
# to make Date into a date
group_by(Name) %>%
mutate(cor_roll = slider::slide2_dbl(
.x = X,
.y = Y,
.f = ~cor(.x, .y),
.before = 249, # correlation using current row and the 249 preceding rows
.complete = FALSE # sequences shorter than 250 ok to calc on
)) %>%
ungroup()
Result
# A tibble: 63 × 5
Name Date Y X cor_roll
<chr> <chr> <dbl> <dbl> <dbl>
1 A 01.08.2018 -0.0211 -0.000675 NA
2 A 02.08.2018 0.00583 0.00561 1
3 A 03.08.2018 -0.00599 -0.00266 0.677
4 A 04.08.2018 0.0123 -0.00461 0.00368
5 A 05.08.2018 0.000735 -0.0204 -0.0937
6 A 06.08.2018 -0.00531 0.000400 -0.120
7 A 07.08.2018 -0.0235 -0.00710 -0.000141
8 A 08.08.2018 -0.00681 0.00379 -0.0155
9 A 09.08.2018 -0.00267 -0.00482 -0.0211
10 A 10.08.2018 -0.00402 -0.0121 -0.0316
# … with 53 more rows
CodePudding user response:
You can also use the roll
package.
For example, a rolling window of size 4 can be achieved like so:
library(dplyr)
library(roll)
df %>%
group_by(Name) %>%
mutate(rcov = roll_cov(X, Y, width = 4, min_obs = 1))
ungroup()
# A tibble: 63 × 5
Name Date Y X rcov
<chr> <chr> <dbl> <dbl> <dbl>
1 A 01.08.2018 -0.0211 -0.000675 NA
2 A 02.08.2018 0.00583 0.00561 0.0000846
3 A 03.08.2018 -0.00599 -0.00266 0.0000395
4 A 04.08.2018 0.0123 -0.00461 0.000000240
5 A 05.08.2018 0.000735 -0.0204 0.0000160
6 A 06.08.2018 -0.00531 0.000400 -0.0000153
7 A 07.08.2018 -0.0235 -0.00710 -0.0000107
8 A 08.08.2018 -0.00681 0.00379 -0.0000265
9 A 09.08.2018 -0.00267 -0.00482 0.0000259
10 A 10.08.2018 -0.00402 -0.0121 0.00000516
# … with 53 more rows