Home > OS >  Mutate column values based on previous row in another column
Mutate column values based on previous row in another column

Time:10-11

For the sake of reproducibility and transparancy, I'm trying to do something rather difficult for me in R (which I could easily do in Excel otherwise but not very reproducibly). Here's some data:

structure(list(Subject = c(500, 500, 500, 500, 500, 500, 500, 
500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 
500, 501, 501, 501, 501, 501, 501, 501, 501, 501, 501, 501, 501, 
502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 502, 
502), Block = c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 10L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L), Colour = c("GREEN", "RED", "RED", "GREEN", "RED", 
"GREEN", "RED", "RED", "GREEN", "GREEN", "RED", "GREEN", "GREEN", 
"RED", "RED", "RED", "GREEN", "GREEN", "GREEN", "RED", "GREEN", 
"GREEN", "RED", "RED", "GREEN", "RED", "GREEN", "RED", "RED", 
"GREEN", "GREEN", "RED", "GREEN", "GREEN", "RED", "GREEN", "RED", 
"RED", "GREEN", "GREEN", "GREEN", "RED", "GREEN", "RED", "RED", 
"RED", "RED"), Cong = c(21L, 21L, 22L, 22L, 21L, 22L, 21L, 22L, 
22L, 21L, 21L, 22L, 21L, 22L, 21L, 22L, 21L, 22L, 21L, 22L, 22L, 
21L, 21L, 22L, 22L, 21L, 22L, 21L, 22L, 22L, 21L, 21L, 22L, 21L, 
21L, 22L, 22L, 21L, 22L, 22L, 21L, 22L, 21L, 21L, 22L, 22L, 21L
), CorrAns = c("GREEN", "RED", "circle", "triangle", "triangle", 
"GREEN", "triangle", "RED", "GREEN", "GREEN", "triangle", "GREEN", 
"GREEN", "RED", "triangle", "circle", "GREEN", "GREEN", "GREEN", 
"circle", "GREEN", "GREEN", "RED", "circle", "triangle", "triangle", 
"GREEN", "triangle", "RED", "GREEN", "GREEN", "triangle", "GREEN", 
"circle", "triangle", "GREEN", "RED", "triangle", "triangle", 
"GREEN", "GREEN", "RED", "GREEN", "triangle", "RED", "circle", 
"triangle"), CorrResp = c(4L, 3L, 4L, 3L, 3L, 4L, 3L, 3L, 4L, 
4L, 3L, 4L, 4L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 
3L, 4L, 3L, 3L, 4L, 4L, 3L, 4L, 4L, 3L, 4L, 3L, 3L, 3L, 4L, 4L, 
3L, 4L, 3L, 3L, 4L, 3L), Jitter1 = c(646L, 1294L, 639L, 1194L, 
758L, 1205L, 942L, 542L, 1256L, 1250L, 1031L, 1486L, 1426L, 1123L, 
1306L, 1426L, 1037L, 1208L, 1201L, 842L, 646L, 646L, 1294L, 639L, 
1194L, 758L, 1205L, 942L, 542L, 1256L, 1250L, 1031L, 1486L, 1342L, 
822L, 741L, 589L, 1025L, 914L, 1382L, 1384L, 875L, 615L, 922L, 
1296L, 923L, 595L), Jitter2 = c(1146L, 1794L, 1139L, 1694L, 1258L, 
1705L, 1442L, 1042L, 1756L, 1750L, 1531L, 1986L, 1926L, 1623L, 
1806L, 1926L, 1537L, 1708L, 1701L, 1342L, 1146L, 1146L, 1794L, 
1139L, 1694L, 1258L, 1705L, 1442L, 1042L, 1756L, 1750L, 1531L, 
1986L, 1842L, 1322L, 1241L, 1089L, 1525L, 1414L, 1882L, 1884L, 
1375L, 1115L, 1422L, 1796L, 1423L, 1095L), Procedure.Trial. = c("TrialProc2", 
"TrialProc2", "TrialProc2", "TrialProc2", "TrialProc2", "TrialProc2", 
"TrialProc2", "TrialProc2", "TrialProc2", "TrialProc2", "TrialProc2", 
"TrialProc2", "TrialProc2", "TrialProc2", "TrialProc2", "TrialProc2", 
"TrialProc2", "TrialProc2", "TrialProc2", "TrialProc2", "TrialProc5", 
"TrialProc2", "TrialProc2", "TrialProc2", "TrialProc2", "TrialProc2", 
"TrialProc2", "TrialProc2", "TrialProc2", "TrialProc2", "TrialProc2", 
"TrialProc2", "TrialProc2", "TrialProc5", "TrialProc5", "TrialProc5", 
"TrialProc5", "TrialProc5", "TrialProc5", "TrialProc5", "TrialProc5", 
"TrialProc5", "TrialProc5", "TrialProc5", "TrialProc5", "TrialProc5", 
"TrialProc5"), Shape = c("circle", "triangle", "circle", "triangle", 
"triangle", "triangle", "triangle", "circle", "triangle", "circle", 
"triangle", "triangle", "circle", "circle", "triangle", "circle", 
"circle", "triangle", "circle", "circle", "triangle", "circle", 
"triangle", "circle", "triangle", "triangle", "triangle", "triangle", 
"circle", "triangle", "circle", "triangle", "triangle", "circle", 
"triangle", "triangle", "circle", "triangle", "triangle", "triangle", 
"circle", "circle", "circle", "triangle", "circle", "circle", 
"triangle"), Shift = c(2L, 2L, 2L, 2L, 7L, 8L, 8L, 8L, 7L, 7L, 
8L, 8L, 7L, 7L, 8L, 7L, 8L, 7L, 7L, 8L, 8L, 2L, 2L, 2L, 2L, 7L, 
8L, 8L, 8L, 7L, 7L, 8L, 8L, 2L, 2L, 2L, 2L, 8L, 7L, 8L, 7L, 7L, 
7L, 8L, 8L, 8L, 7L)), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 
208L, 209L, 210L, 211L, 212L, 213L, 214L, 215L, 216L, 217L, 218L, 
219L, 220L, 417L, 418L, 419L, 420L, 421L, 422L, 423L, 424L, 425L, 
426L, 427L, 428L, 429L, 430L), class = "data.frame")

What I'm trying to do is to mutate the rows in column Shift that have values 2 to a value 7 or 8 based on the preceding row, within that participant only. The rule is:

  • if participants have to respond to RED or GREEN (colours) both in the preceding and the current trial, the current trial counts as a stay trial and should be mutated from a 2 to a 7.
  • if participants have to respond to triangle or circle (shapes) both in the preceding and the current trial, the current trial counts as a stay trial and should be mutated from a 2 to a 7.
  • if participants have to respond to triangle or circle (shapes) in the preceding BUT to RED or GREEN (colours) in the current (or the other way around), the current trial counts as a shift trial and should be mutated from a 2 to an 8 in Shift column.

Also, if the current trial is their first data row (i.e. their first trial), it should be mutated to a 7. I imagine my code something like the below but I can't figure out how to make it work or specifically - refer to the previous row:

exampledata %>%
  group_by(Subject) %>% 
  mutate(Shift = case_when((CorrAns %in% "RED" | CorrAns %in% "GREEN") 
                           && (CorrAns %in% "RED" | CorrAns %in% "GREEN") ~ 7), #if previous trial is the same
         Shift = case_when((CorrAns %in% "triangle" | CorrAns %in% "circle") 
                           && (CorrAns %in% "RED" | CorrAns %in% "GREEN") ~ 8), #if previous trial is different
         Shift = case_when((CorrAns %in% "RED" | CorrAns %in% "GREEN") 
                           && (CorrAns %in% "triangle" | CorrAns %in% "circle") ~ 8), #if previous trial is different
         Shift = case_when((CorrAns %in% "triangle" | CorrAns %in% "circle") 
                           && (CorrAns %in% "triangle" | CorrAns %in% "circle") ~ 7), #if previous trial is the same
         Shift = case_when() ~ 7) #if previous trial is the first trial

If anyone has suggestions, I'll be very thankful!

Finally, here are is my version info (OS is Windows): "R version 4.0.3 (2020-10-10)"

CodePudding user response:

Maybe I misunderstood, but I don't get why do you need the previous row? If you want only the first row to be 7 and the other 8 then the code snippet below will do the work.

library(tidyverse)
df %>%
  group_by(Subject) %>% 
  mutate(shift_new = case_when( (CorrAns %in% c('GREEN', 'RED')) &
                                  (lag(CorrAns) %in% c('GREEN', 'RED')) &
                                  Shift == 2 ~ 7,
                                (CorrAns %in% c('triangle', 'circle')) &
                                  (lag(CorrAns) %in% c('triangle', 'circle')) &
                                  Shift == 2 ~ 7, 
                                (CorrAns %in% c('triangle', 'circle')) &
                                  (lag(CorrAns) %in% c('GREEN', 'RED')) &
                                  Shift == 2 ~ 8,
                                (CorrAns %in% c('GREEN', 'RED')) &
                                  (lag(CorrAns) %in% c('triangle', 'circle')) &
                                  Shift == 2 ~ 8)) %>% 
  mutate(shift_new = coalesce(shift_new, Shift))

coalesce is to remove NAs and place the values of Shift instead.

CodePudding user response:

You can also do this by creating an indicator column that defines shapes vs colors, and then subtract the difference between that indicator and its lag from 8, as below:

exampledata %>%
  mutate(typ = if_else(CorrAns %in% c("GREEN","RED"),0,1)) %>% 
  group_by(Subject) %>% 
  mutate(Shift = coalesce(8-(typ==lag(typ)),7))
  •  Tags:  
  • r
  • Related