I would like to perform a rolling max using an (adaptive) non-equi join in data.table
. Basing myself on
CodePudding user response:
Do you need to do it with canonical data.table
? This seems reasonable given your data sample:
ex[, value2 := mapply(function(a, b) max(value[a:b], na.rm = TRUE), row, end_window)]
ex
# time value end_window row rowid value2
# <POSc> <num> <int> <int> <int> <num>
# 1: 2022-03-14 08:20:02 13344.77 540 1 1 13361.77
# 2: 2022-03-14 08:20:02 13343.52 541 2 2 13361.77
# 3: 2022-03-14 08:20:03 13342.27 547 3 3 13361.77
# 4: 2022-03-14 08:20:04 13343.27 541 4 4 13361.77
# 5: 2022-03-14 08:20:04 13343.02 541 5 5 13361.77
# 6: 2022-03-14 08:20:05 13343.27 541 6 6 13361.77
# 7: 2022-03-14 08:20:05 13343.27 541 7 7 13361.77
# 8: 2022-03-14 08:20:06 13342.27 547 8 8 13361.77
# 9: 2022-03-14 08:20:06 13344.77 540 9 9 13361.77
# 10: 2022-03-14 08:20:07 13345.02 233 10 10 13361.77
# ---
# 613: 2022-03-14 08:26:10 13324.42 630 613 613 13324.92
# 614: 2022-03-14 08:26:11 13324.27 630 614 614 13324.92
# 615: 2022-03-14 08:26:11 13323.55 630 615 615 13324.92
# 616: 2022-03-14 08:26:11 13324.92 625 616 616 13324.92
# 617: 2022-03-14 08:26:12 13324.77 625 617 617 13324.77
# 618: 2022-03-14 08:26:12 13324.02 630 618 618 13324.02
# 619: 2022-03-14 08:26:13 13323.17 630 619 619 13323.17
# 620: 2022-03-14 08:26:13 13323.02 630 620 620 13323.02
# 621: 2022-03-14 08:26:14 13322.52 638 621 621 13322.52
# 622: 2022-03-14 08:26:15 13322.27 638 622 622 13322.27
CodePudding user response:
Taking only the first element of the last instance of end_window
seems to take care of the immediate problem, but a join isn't going to work well for a rolling max
. A simple grouping operation will have better performance:
ex[, end_window := pmin(end_window, .N)] # don't search beyond the extent of ex
# join solution
f1 <- function(ex) res1 <- ex[ex, .(rollmax = value, row, end_window), on = .(row >= row, row <= end_window)][, .(rollmax = max(rollmax), end_window = end_window[1]), row]
# grouping operation
f2 <- function(ex) res2 <- ex[, .(rollmax = max(ex$value[row:end_window]), end_window), row]
f1(ex)[596]
#> row rollmax end_window
#> 1: 596 13328.67 622
f2(ex)[596]
#> row rollmax end_window
#> 1: 596 13328.67 622
max(ex$value[596:622])
#> [1] 13328.67
microbenchmark::microbenchmark(f1 = f1(ex),
f2 = f2(ex),
check = "identical")
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> f1 4.3879 4.91275 5.576867 5.0952 5.34325 11.6638 100
#> f2 1.8610 1.93600 2.120508 1.9838 2.07520 6.6689 100