Home > Enterprise >  R Lag Two Variables
R Lag Two Variables

Time:11-24

DATA = data.frame(STUDENT = c(1,1,1,1,1,1,2,2,2,2,2, 3,3,3),
                  YEAR = c(2000, 2000, 2000, 2001, 2001, 2001, 2000, 2000, 2001, 2001, 2001, 2001, 2001, 2001),
                  TRIMESTER= c(1,2,3,1,2,3,2,3,1,2,3,1,2,3),
                  SCORE = c(5,7,8,9,10,3,4,6,3,1,2,3,6, 9),
                  WANT = c(NA,NA,NA,4,3,-5,NA,NA,NA,-3,-4,NA,NA,NA))

I have DATA and wish to create 'WANT' which is calculate by:

For each STUDENT, find the SCORE where (YEAR = YEAR 1, TRIMESTER = TRIMESTER) and get the difference.

EX: SCORE(STUDENT = 1, TRIMESTER = 1, YEAR = 2001) - SCORE(STUDENT = 1, TRIMESTER = 1, YEAR = 2000)

CodePudding user response:

Does this do what you want?

library(tidyverse)

DATA = data.frame(STUDENT = c(1,1,1,1,1,1,2,2,2,2,2, 3,3,3),
                  YEAR = c(2000, 2000, 2000, 2001, 2001, 2001, 2000, 2000, 2001, 2001, 2001, 2001, 2001, 2001),
                  TRIMESTER= c(1,2,3,1,2,3,2,3,1,2,3,1,2,3),
                  SCORE = c(5,7,8,9,10,3,4,6,3,1,2,3,6, 9),
                  WANT = c(NA,NA,NA,4,3,-5,NA,NA,NA,-3,-4,NA,NA,NA))

DATA <- as_tibble(DATA)

DATA %>% 
    group_by(STUDENT, TRIMESTER) %>% 
    mutate(LAST = lag(SCORE),
           DIFF = SCORE - LAST)
#> # A tibble: 14 x 7
#> # Groups:   STUDENT, TRIMESTER [9]
#>    STUDENT  YEAR TRIMESTER SCORE  WANT  LAST  DIFF
#>      <dbl> <dbl>     <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1       1  2000         1     5    NA    NA    NA
#>  2       1  2000         2     7    NA    NA    NA
#>  3       1  2000         3     8    NA    NA    NA
#>  4       1  2001         1     9     4     5     4
#>  5       1  2001         2    10     3     7     3
#>  6       1  2001         3     3    -5     8    -5
#>  7       2  2000         2     4    NA    NA    NA
#>  8       2  2000         3     6    NA    NA    NA
#>  9       2  2001         1     3    NA    NA    NA
#> 10       2  2001         2     1    -3     4    -3
#> 11       2  2001         3     2    -4     6    -4
#> 12       3  2001         1     3    NA    NA    NA
#> 13       3  2001         2     6    NA    NA    NA
#> 14       3  2001         3     9    NA    NA    NA

Created on 2022-11-23 with reprex v2.0.2

CodePudding user response:

Your data

DATA = data.frame(STUDENT = c(1,1,1,1,1,1,2,2,2,2,2, 3,3,3),
                  YEAR = c(2000, 2000, 2000, 2001, 2001, 2001, 2000, 2000, 2001, 2001, 2001, 2001, 2001, 2001),
                  TRIMESTER= c(1,2,3,1,2,3,2,3,1,2,3,1,2,3),
                  SCORE = c(5,7,8,9,10,3,4,6,3,1,2,3,6, 9))

using merge

DATA=merge(
  DATA,
  cbind(
    DATA[,c("STUDENT","TRIMESTER")],
    DATA[,"YEAR",drop=F] 1,
    "MATCH"=DATA[,"SCORE"]
  ),
  by=c("STUDENT","YEAR","TRIMESTER"),
  all.x=T
)
DATA$WANT=DATA$SCORE-DATA$MATCH

result

   STUDENT YEAR TRIMESTER SCORE MATCH WANT
1        1 2000         1     5    NA   NA
2        1 2000         2     7    NA   NA
3        1 2000         3     8    NA   NA
4        1 2001         1     9     5    4
5        1 2001         2    10     7    3
6        1 2001         3     3     8   -5
7        2 2000         2     4    NA   NA
8        2 2000         3     6    NA   NA
9        2 2001         1     3    NA   NA
10       2 2001         2     1     4   -3
11       2 2001         3     2     6   -4
12       3 2001         1     3    NA   NA
13       3 2001         2     6    NA   NA
14       3 2001         3     9    NA   NA
  • Related