identify whenever values repeat in r-CodePudding

I have a dataframe like this.

data <- data.frame(Condition = c(1,1,2,3,1,1,2,2,2,3,1,1,2,3,3))

I want to populate a new variable Sequence which identifies whenever Condition starts again from 1.

So the new dataframe would look like this.

Thanks in advance for the help!

data <- data.frame(Condition = c(1,1,2,3,1,1,2,2,2,3,1,1,2,3,3),
                   Sequence = c(1,1,1,1,2,2,2,2,2,2,3,3,3,3,3))

CodePudding user response：

base R

data$Sequence2 <- cumsum(c(TRUE, data$Condition[-1] == 1 & data$Condition[-nrow(data)] != 1))
data
#    Condition Sequence Sequence2
# 1          1        1         1
# 2          1        1         1
# 3          2        1         1
# 4          3        1         1
# 5          1        2         2
# 6          1        2         2
# 7          2        2         2
# 8          2        2         2
# 9          2        2         2
# 10         3        2         2
# 11         1        3         3
# 12         1        3         3
# 13         2        3         3
# 14         3        3         3
# 15         3        3         3

dplyr

library(dplyr)
data %>%
  mutate(
    Sequence2 = cumsum(Condition == 1 & lag(Condition != 1, default = TRUE))
  )
#    Condition Sequence Sequence2
# 1          1        1         1
# 2          1        1         1
# 3          2        1         1
# 4          3        1         1
# 5          1        2         2
# 6          1        2         2
# 7          2        2         2
# 8          2        2         2
# 9          2        2         2
# 10         3        2         2
# 11         1        3         3
# 12         1        3         3
# 13         2        3         3
# 14         3        3         3
# 15         3        3         3

CodePudding user response：

This took a while. Finally I find this solution:

library(dplyr)
data %>% 
  group_by(Sequnce = cumsum(
    ifelse(Condition==1, lead(Condition) 1, Condition) 
      - Condition==1)
    )

   Condition Sequnce
       <dbl>   <int>
 1         1       1
 2         1       1
 3         2       1
 4         3       1
 5         1       2
 6         1       2
 7         2       2
 8         2       2
 9         2       2
10         3       2
11         1       3
12         1       3
13         2       3
14         3       3
15         3       3