Rolling mean in fixed intervals in R-CodePudding

I would like to calculate a rolling average for a fixed interval in my dataset.

start end value value_per_unit
4     20  20    1.25
21    33  40    3.33
34    45  30    2.73
46    60  10    0.71

I would like to obtain the value for a fixed interval of 10 as follows:

start end value_per_unit
4    13   1.25
14   23   1.874
24   33   3.33
.
.

Where:

for the interval c(4, 14): (1.25*10)/10 = 1.25
for the interval c(15, 25): (1.25*7   3.33*3)/10 = 1.874
for the interval c(26,36): (10*3.33)/10 = 3.33

Is it possible to achieve this in R?

CodePudding user response：

There seems to be some confusion in your question, however this approach gives the desired output:

library(dplyr, warn.conflicts = FALSE)

# Create the test data
df <- tribble(
  ~start, ~end, ~value, ~value_per_unit,
  4     ,   20,     20,            1.25,
  21    ,   33,     40,            3.33,
  34    ,   45,     30,            2.73,
  46    ,   60,     10,            0.71
)

# Some data prep to apply the transformation
df1 <- df %>% 
  rowwise() %>% 
  mutate(row = list(seq(from = start, to = end))) %>% 
  ungroup() %>% 
  tidyr::unnest(row) %>% 
  mutate(group = (row - 4) %/% 10)

# This hopefully illustrates what's happening here:
print(df1, n = 20)
#> # A tibble: 57 x 6
#>    start   end value value_per_unit   row group
#>    <dbl> <dbl> <dbl>          <dbl> <int> <dbl>
#>  1     4    20    20           1.25     4     0
#>  2     4    20    20           1.25     5     0
#>  3     4    20    20           1.25     6     0
#>  4     4    20    20           1.25     7     0
#>  5     4    20    20           1.25     8     0
#>  6     4    20    20           1.25     9     0
#>  7     4    20    20           1.25    10     0
#>  8     4    20    20           1.25    11     0
#>  9     4    20    20           1.25    12     0
#> 10     4    20    20           1.25    13     0
#> 11     4    20    20           1.25    14     1
#> 12     4    20    20           1.25    15     1
#> 13     4    20    20           1.25    16     1
#> 14     4    20    20           1.25    17     1
#> 15     4    20    20           1.25    18     1
#> 16     4    20    20           1.25    19     1
#> 17     4    20    20           1.25    20     1
#> 18    21    33    40           3.33    21     1
#> 19    21    33    40           3.33    22     1
#> 20    21    33    40           3.33    23     1
#> # ... with 37 more rows

# Summarise to create new values of start, end and value_per_unit
df1 %>% 
  group_by(group) %>% 
  summarise(
    start = min(row), 
    end = max(row), 
    value_per_unit = mean(value_per_unit), 
    .groups = "drop"
  ) %>% 
  select(-group)
#> # A tibble: 6 x 3
#>   start   end value_per_unit
#>   <int> <int>          <dbl>
#> 1     4    13           1.25
#> 2    14    23           1.87
#> 3    24    33           3.33
#> 4    34    43           2.73
#> 5    44    53           1.11
#> 6    54    60           0.71

^{Created on 2021-10-12 by the reprex package (v2.0.0)}

CodePudding user response：

Not very concise but this could help you too:

library(tidyverse)

seq(4, 60, 10) %>%
  enframe(value = "start") %>%
  mutate(end = ifelse(start   9 > max(df$end), max(df$end), start   9)) %>%
  {map2(.$start, .$end, ~ c(.x:.y))} %>%
  map_dfc(~ df %>%
            rowwise() %>%
            mutate(cnt = length(intersect(.x, seq(start, end, 1)))) %>%
            pull(cnt)) %>%
  bind_cols(as_tibble(df$value_per_unit)) %>%
  mutate(across(matches("\\d "), ~ sum(.x * value) / 10)) %>%
  select(-value) %>%
  slice_head() %>%
  pivot_longer(everything(), names_to = "name", values_to = "weighted_avg") %>%
  mutate(name = gsub("\\.{3}", "", name))

# A tibble: 6 x 2
  name  weighted_avg
  <chr>        <dbl>
1 1            1.25 
2 2            1.87 
3 3            3.33 
4 4            2.73 
5 5            1.11 
6 6            0.497