R: Calculate covariance for a rolling window and for different groups-CodePudding

I would like to calculate the rolling covariance for my panel dataset.

The data looks like this:

structure(list(Name = c("A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "B", 
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", 
"B", "B", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C", "C", 
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", 
"C", "C", "C"), Date = c("01.08.2018", "02.08.2018", "03.08.2018", 
"04.08.2018", "05.08.2018", "06.08.2018", "07.08.2018", "08.08.2018", 
"09.08.2018", "10.08.2018", "11.08.2018", "12.08.2018", "13.08.2018", 
"14.08.2018", "15.08.2018", "16.08.2018", "17.08.2018", "18.08.2018", 
"01.08.2018", "02.08.2018", "03.08.2018", "04.08.2018", "05.08.2018", 
"06.08.2018", "07.08.2018", "08.08.2018", "09.08.2018", "10.08.2018", 
"11.08.2018", "12.08.2018", "13.08.2018", "14.08.2018", "15.08.2018", 
"16.08.2018", "17.08.2018", "18.08.2018", "01.08.2018", "02.08.2018", 
"03.08.2018", "04.08.2018", "05.08.2018", "06.08.2018", "07.08.2018", 
"08.08.2018", "09.08.2018", "10.08.2018", "11.08.2018", "12.08.2018", 
"13.08.2018", "14.08.2018", "15.08.2018", "16.08.2018", "17.08.2018", 
"18.08.2018", "19.08.2018", "20.08.2018", "21.08.2018", "22.08.2018", 
"23.08.2018", "24.08.2018", "25.08.2018", "26.08.2018", "27.08.2018"
), Y = c(-0.021104989, 0.005829159, -0.005993838, 0.012343494, 
0.000735194, -0.005313411, -0.023474922, -0.006807478, -0.002674863, 
-0.004020429, -0.001613125, 0.005300896, 0.018783846, 0.009664223, 
-0.00810932, 0.00757471, 0.008253359, 0.003760981, -0.001052405, 
-0.007286345, -0.008488191, -0.002538641, -0.009791452, 0.000446472, 
-0.002980809, -0.009180535, 0.008837298, -0.0027743, 0.006254873, 
0.001064582, 0.00178255, 0.005754829, 0.004967109, 0.005663851, 
0.012421897, 0.002893901, -0.000674701, 0.005609272, -0.002664995, 
-0.004614426, -0.020395375, 0.000400389, -0.007096134, 0.003788953, 
-0.004816989, -0.012074368, -0.016623213, 0.002353228, -0.003833742, 
-0.018048155, -0.003742604, 0.001912499, 0.012315676, -0.011545789, 
-0.008412867, -0.008541933, -0.009458445, -0.016025502, -0.012383462, 
-2.21e-05, -0.014338361, 0.016535732, -0.000234764), X = c(-0.000674701, 
0.005609272, -0.002664995, -0.004614426, -0.020395375, 0.000400389, 
-0.007096134, 0.003788953, -0.004816989, -0.012074368, -0.016623213, 
-0.000674701, 0.005609272, -0.002664995, -0.004614426, -0.020395375, 
0.000400389, -0.007096134, 0.003788953, -0.004816989, -0.012074368, 
-0.016623213, -0.000674701, 0.005609272, -0.002664995, -0.004614426, 
-0.020395375, 0.000400389, -0.007096134, 0.003788953, -0.004816989, 
-0.012074368, -0.016623213, -0.000674701, 0.005609272, -0.002664995, 
-0.004614426, -0.020395375, 0.000400389, -0.007096134, 0.003788953, 
-0.004816989, -0.012074368, -0.016623213, -0.000674701, 0.005609272, 
-0.002664995, -0.004614426, -0.020395375, 0.000400389, -0.007096134, 
0.003788953, -0.004816989, -0.012074368, -0.016623213, -0.000674701, 
0.005609272, -0.002664995, -0.004614426, -0.020395375, 0.000400389, 
-0.007096134, 0.003788953)), class = "data.frame", row.names = c(NA, 
-63L))

My real data consists of over 1000 observations per group. Now I would like to calculate the covariance between X and Y with a rolling time window of 250 observations for each group and for each Date. In order not to lose too many observations, I would like that I also have a covariance even if there are less than 250 prior observations available. In such cases just the the maximum amount of observations that are possible, e.g., in row 4 for Name A just use the three prior observations.

I have come so far:

Data <- Data %>%
group_by(Name) %>%
mutate(Covariance=cov(X,Y))

However, how can I add the rolling time window?

I appreciate a lot your help.

CodePudding user response：

You could do this using slide2_dbl from the slide package, which works nicely with dplyr:

library(dplyr)
Data %>%
  # mutate(Date = as.Date(Date, "%d.%m.%Y")) %>% # optional, might help later
                                                 # to make Date into a date
  group_by(Name) %>%
  mutate(cor_roll = slider::slide2_dbl(
    .x = X,
    .y = Y,
    .f = ~cor(.x, .y),
    .before = 249,     # correlation using current row and the 249 preceding rows
    .complete = FALSE  # sequences shorter than 250 ok to calc on
  )) %>%
  ungroup()

Result

# A tibble: 63 × 5
   Name  Date               Y         X  cor_roll
   <chr> <chr>          <dbl>     <dbl>     <dbl>
 1 A     01.08.2018 -0.0211   -0.000675 NA       
 2 A     02.08.2018  0.00583   0.00561   1       
 3 A     03.08.2018 -0.00599  -0.00266   0.677   
 4 A     04.08.2018  0.0123   -0.00461   0.00368 
 5 A     05.08.2018  0.000735 -0.0204   -0.0937  
 6 A     06.08.2018 -0.00531   0.000400 -0.120   
 7 A     07.08.2018 -0.0235   -0.00710  -0.000141
 8 A     08.08.2018 -0.00681   0.00379  -0.0155  
 9 A     09.08.2018 -0.00267  -0.00482  -0.0211  
10 A     10.08.2018 -0.00402  -0.0121   -0.0316  
# … with 53 more rows

CodePudding user response：

You can also use the roll package. For example, a rolling window of size 4 can be achieved like so:

library(dplyr)
library(roll)
df %>%
  group_by(Name) %>% 
  mutate(rcov = roll_cov(X, Y, width = 4, min_obs = 1))
  ungroup()

# A tibble: 63 × 5
   Name  Date               Y         X         rcov
   <chr> <chr>          <dbl>     <dbl>        <dbl>
 1 A     01.08.2018 -0.0211   -0.000675 NA          
 2 A     02.08.2018  0.00583   0.00561   0.0000846  
 3 A     03.08.2018 -0.00599  -0.00266   0.0000395  
 4 A     04.08.2018  0.0123   -0.00461   0.000000240
 5 A     05.08.2018  0.000735 -0.0204    0.0000160  
 6 A     06.08.2018 -0.00531   0.000400 -0.0000153  
 7 A     07.08.2018 -0.0235   -0.00710  -0.0000107  
 8 A     08.08.2018 -0.00681   0.00379  -0.0000265  
 9 A     09.08.2018 -0.00267  -0.00482   0.0000259  
10 A     10.08.2018 -0.00402  -0.0121    0.00000516 
# … with 53 more rows