Home > OS >  Moving average by multiple group
Moving average by multiple group

Time:07-03

I have a following DF (demo). I would like to find the previous 3 month moving average of Amount column per ID, Year and Month.

       ID        YEAR       MONTH   AMOUNT    
1      ABC       2020        09      100       
2      ABC       2020        11      200       
3      ABC       2020        12      300        
4      ABC       2021        01      400      
5      ABC       2021        04      500       
6      PQR       2020        10      100      
7      PQR       2020        11      200      
8      PQR       2020        12      300       
9      PQR       2021        01      400      
10     PQR       2021        03      500       

Following is an attempt.

library(TTR)
library(dplyr)
DF %>% group_by(ID, YEAR, MONTH) %>% mutate(3MA = runMean(AMOUNT, 3))

resulting in error with n=3 is outside valid range.

Desired Output:

       ID        YEAR       MONTH   AMOUNT    3MA 
1      ABC       2020        09      100       NA
2      ABC       2020        11      200       NA
3      ABC       2020        12      300       NA 
4      ABC       2021        01      400      200 (100 200 300)/3
5      ABC       2021        04      500      300 (400 300 200)/3 
6      PQR       2020        10      100       NA
7      PQR       2020        11      200       NA
8      PQR       2020        12      300       NA 
9      PQR       2021        01      400      200 (100 200 300)/3
10     PQR       2021        03      500      300 (400 300 200)/3 

CodePudding user response:

An option using a sliding window:

library(tidyverse)
library(slider)

df <- tribble(
  ~id, ~year, ~month, ~amount,
  "ABC", 2020, 09, 100,
  "ABC", 2020, 11, 200,
  "ABC", 2020, 12, 300,
  "ABC", 2021, 01, 400,
  "ABC", 2021, 04, 500,
  "PQR", 2020, 10, 100,
  "PQR", 2020, 11, 200,
  "PQR", 2020, 12, 300,
  "PQR", 2021, 01, 400,
  "PQR", 2021, 03, 500
)

df |> 
  arrange(id, year, month) |> 
  group_by(id) |> 
  mutate(ma3 = slide_dbl(lag(amount), mean, .before = 2, complete = TRUE)) |> 
  ungroup() # if needed

#> # A tibble: 10 × 5
#>    id     year month amount   ma3
#>    <chr> <dbl> <dbl>  <dbl> <dbl>
#>  1 ABC    2020     9    100    NA
#>  2 ABC    2020    11    200    NA
#>  3 ABC    2020    12    300    NA
#>  4 ABC    2021     1    400   200
#>  5 ABC    2021     4    500   300
#>  6 PQR    2020    10    100    NA
#>  7 PQR    2020    11    200    NA
#>  8 PQR    2020    12    300    NA
#>  9 PQR    2021     1    400   200
#> 10 PQR    2021     3    500   300

Created on 2022-07-02 by the reprex package (v2.0.1)

CodePudding user response:

You can use the following code:

library(dplyr)   
arrange(DF,ID,YEAR) %>% 
  group_by(ID) %>%
  mutate(lag1=lag(AMOUNT),
         lag2=lag(AMOUNT,2),
         lag3=lag(AMOUNT,3),
         movave=(lag1 lag2 lag3)/3)
#> # A tibble: 10 × 8
#> # Groups:   ID [2]
#>    ID     YEAR MONTH AMOUNT  lag1  lag2  lag3 movave
#>    <chr> <int> <int>  <int> <int> <int> <int>  <dbl>
#>  1 ABC    2020     9    100    NA    NA    NA     NA
#>  2 ABC    2020    11    200   100    NA    NA     NA
#>  3 ABC    2020    12    300   200   100    NA     NA
#>  4 ABC    2021     1    400   300   200   100    200
#>  5 ABC    2021     4    500   400   300   200    300
#>  6 PQR    2020    10    100    NA    NA    NA     NA
#>  7 PQR    2020    11    200   100    NA    NA     NA
#>  8 PQR    2020    12    300   200   100    NA     NA
#>  9 PQR    2021     1    400   300   200   100    200
#> 10 PQR    2021     3    500   400   300   200    300

Created on 2022-07-02 by the reprex package (v2.0.1)

CodePudding user response:

Here is a way.

suppressPackageStartupMessages({
  library(dplyr)
  library(TTR)
})

x <- '       ID        YEAR       MONTH   AMOUNT    
1      ABC       2020        09      100       
2      ABC       2020        11      200       
3      ABC       2020        12      300        
4      ABC       2021        01      400      
5      ABC       2021        04      500       
6      PQR       2020        10      100      
7      PQR       2020        11      200      
8      PQR       2020        12      300       
9      PQR       2021        01      400      
10     PQR       2021        03      500  '
DF <- read.table(textConnection(x), header = TRUE)

DF %>%
  arrange(ID, YEAR, MONTH) %>%
  group_by(ID) %>%
  mutate(`3MA` = lag(runMean(AMOUNT, 3)))
#> # A tibble: 10 × 5
#> # Groups:   ID [2]
#>    ID     YEAR MONTH AMOUNT `3MA`
#>    <chr> <int> <int>  <int> <dbl>
#>  1 ABC    2020     9    100    NA
#>  2 ABC    2020    11    200    NA
#>  3 ABC    2020    12    300    NA
#>  4 ABC    2021     1    400   200
#>  5 ABC    2021     4    500   300
#>  6 PQR    2020    10    100    NA
#>  7 PQR    2020    11    200    NA
#>  8 PQR    2020    12    300    NA
#>  9 PQR    2021     1    400   200
#> 10 PQR    2021     3    500   300

Created on 2022-07-02 by the reprex package (v2.0.1)

CodePudding user response:

Try this

DF |> arrange(ID , YEAR , MONTH) |> group_by(ID) |>
 mutate(`3M` = (lag(AMOUNT)   lag(AMOUNT ,2)   lag(AMOUNT , 3)) / 3)

  • output
# A tibble: 10 × 5
# Groups:   ID [2]
   ID     YEAR MONTH AMOUNT  `3M`
   <chr> <int> <int>  <int> <dbl>
 1 ABC    2020     9    100    NA
 2 ABC    2020    11    200    NA
 3 ABC    2020    12    300    NA
 4 ABC    2021     1    400   200
 5 ABC    2021     4    500   300
 6 PQR    2020    10    100    NA
 7 PQR    2020    11    200    NA
 8 PQR    2020    12    300    NA
 9 PQR    2021     1    400   200
10 PQR    2021     3    500   300
  •  Tags:  
  • r
  • Related