Home > Blockchain >  Lag Comparison By Year
Lag Comparison By Year

Time:11-21

DATA = data.frame(STUDENT = c(1,1,1,1,1,2,2,2,2,3,3,3),
                  YEAR = c(2000,2000,2001,2001,2002,2000,2001,2001,2002,2000,2001,2001),
                  SEMESTER = c(1,2,1,2,1,2,1,2,1,1,2,1),
                  SCORE = c(7,4,5,6,8,9,1,1,1,2,3,4),
                  WANT= c(NA, NA, 1, NA, 1, NA, 0, NA, 2, NA, 1, NA),
                  WANT2=c(NA, NA, 1, NA, 2, NA, -8, NA, 0, NA, 1, NA))

I have 'DATA' and wish to create the 'WANT' variable which does this:

if SCORE from SEMESTER = 1 and YEAR = N > SCORE from SEMESTER = 2 and YEAR = N-1, 1
if SCORE from SEMESTER = 1 and YEAR = N < SCORE from SEMESTER = 2 and YEAR = N-1, 2
if SCORE from SEMESTER = 1 and YEAR = N = SCORE from SEMESTER = 2 and YEAR = N-1, 0

CodePudding user response:

If I'm understanding your question correctly, this could be one way of doing it:

library(tidyverse)

DATA = data.frame(STUDENT = c(1,1,1,1,1,2,2,2,2,3,3,3),
                  YEAR = c(2000,2000,2001,2001,2002,2000,2001,2001,2002,2000,2001,2001),
                  SEMESTER = c(1,2,1,2,1,2,1,2,1,1,2,1),
                  SCORE = c(7,4,5,6,8,9,1,1,1,2,3,4),
                  WANT= c(NA, NA, 1, NA, 1, NA, 0, NA, 2, NA, 1, NA))

DATA |> 
  # These lines fill in 'missing' semesters
  complete(STUDENT, YEAR, SEMESTER) |> 
  arrange(STUDENT, YEAR, SEMESTER) |> 
  group_by(STUDENT) |> 
  # These lines check 'last score' for each student
  mutate(WANT = case_when(
    SEMESTER == 2 ~ NA,
    SCORE > lag(SCORE) ~ 1,
    SCORE < lag(SCORE) ~ 2,
    SCORE == lag(SCORE) ~ 0
  )
) |> 
  # These lines re-shorten code to only those containing scores
  filter(!is.na(SCORE))
#> # A tibble: 12 × 5
#> # Groups:   STUDENT [3]
#>    STUDENT  YEAR SEMESTER SCORE  WANT
#>      <dbl> <dbl>    <dbl> <dbl> <dbl>
#>  1       1  2000        1     7    NA
#>  2       1  2000        2     4    NA
#>  3       1  2001        1     5     1
#>  4       1  2001        2     6    NA
#>  5       1  2002        1     8     1
#>  6       2  2000        2     9    NA
#>  7       2  2001        1     1     2
#>  8       2  2001        2     1    NA
#>  9       2  2002        1     1     0
#> 10       3  2000        1     2    NA
#> 11       3  2001        1     4    NA
#> 12       3  2001        2     3    NA

CodePudding user response:

Using dplyr with case_when:

DATA %>%
  group_by(STUDENT) %>%
  arrange(YEAR, SEMESTER) %>%
  mutate(WANT = case_when(SEMESTER == 1 & lag(SEMESTER) == 2 & YEAR == lag(YEAR)   1 & SCORE > lag(SCORE) ~ 1,
                          SEMESTER == 1 & lag(SEMESTER) == 2 & YEAR == lag(YEAR)   1 & SCORE < lag(SCORE) ~ 2,
                          SEMESTER == 1 & lag(SEMESTER) == 2 & YEAR == lag(YEAR)   1 & SCORE == lag(SCORE) ~ 0)) %>%
  arrange(STUDENT)

Result:

   STUDENT  YEAR SEMESTER SCORE  WANT
     <dbl> <dbl>    <dbl> <dbl> <dbl>
 1       1  2000        1     7    NA
 2       1  2000        2     4    NA
 3       1  2001        1     5     1
 4       1  2001        2     6    NA
 5       1  2002        1     8     1
 6       2  2000        2     9    NA
 7       2  2001        1     1     2
 8       2  2001        2     1    NA
 9       2  2002        1     1     0
10       3  2000        1     2    NA
11       3  2001        1     4    NA
12       3  2001        2     3    NA

CodePudding user response:

Not exactly your expected results but i think you need to use pivot_wider

DATA |>
    select(STUDENT, YEAR, SEMESTER, SCORE) |>
    pivot_wider(names_from = SEMESTER, values_from = SCORE) |>
    complete(YEAR) |>
    arrange(YEAR) |>
    group_by(STUDENT) |>
    mutate(CHANGE = case_when(`1` >  lag(`2`,1) ~ 1,
                              `1` <  lag(`2`,1) ~ 2,
                              `1` == lag(`2`,1) ~ 0,
                              TRUE ~ NA_real_)) |>
    ungroup() |>
    arrange(STUDENT, YEAR)


##>   # A tibble: 8 × 5
##>    YEAR STUDENT   `1`   `2` CHANGE
##>   <dbl>   <dbl> <dbl> <dbl>  <dbl>
##> 1  2000       1     7     4     NA
##> 2  2001       1     5     6      1
##> 3  2002       1     8    NA      1
##> 4  2000       2    NA     9     NA
##> 5  2001       2     1     1      2
##> 6  2002       2     1    NA      0
##> 7  2000       3     2    NA     NA
##> 8  2001       3     4     3     NA
  • Related