I need to filter out n-1
consecutive rows if there are at least 3 consecutive values type=Yes
. For example for id=2
, there are 3 continuous Yes
so for this id I only keep it up to the first value =Yes
.
df <- data.frame(
id = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 7),
type = c("No", "Yes", "Yes", "DK", "Yes", "No", "Yes", "Yes", "Yes",
"DK", "No", "No", "Yes", "Yes", "DK", "No", "Yes", "Yes", "No",
NA, "No", "Yes", "Yes", "Yes", "No", "Yes", "Yes", "Yes", "Yes"
),
cumlsum_Yes = c(0, 1, 2, 0, 1, 0, 1, 2, 3, 0, 0, 0,
1, 2, 0, 0, 1, 2, 0, 0, 0, 1, 2, 3, 0, 1, 2, 3, 4)
)
i.e.
id type cumlsum_Yes
1 1 No 0
2 1 Yes 1
3 1 Yes 2
4 1 DK 0
5 1 Yes 1
6 2 No 0
7 2 Yes 1
8 2 Yes 2
9 2 Yes 3
10 2 DK 0
11 3 No 0
12 3 No 0
13 3 Yes 1
14 3 Yes 2
15 3 DK 0
16 4 No 0
17 4 Yes 1
18 4 Yes 2
19 5 No 0
20 5 <NA> 0
21 6 No 0
22 6 Yes 1
23 6 Yes 2
24 6 Yes 3
25 7 No 0
26 7 Yes 1
27 7 Yes 2
28 7 Yes 3
29 7 Yes 4
The expected result looks like this:
id type cumlsum_Yes
1 1 No 0
2 1 Yes 1
3 1 Yes 2
4 1 DK 0
5 1 Yes 1
6 2 No 0
7 2 Yes 1
8 3 No 0
9 3 No 0
10 3 Yes 1
11 3 Yes 2
12 3 DK 0
13 4 No 0
14 4 Yes 1
15 4 Yes 2
16 5 No 0
17 5 <NA> 0
18 6 No 0
19 6 Yes 1
20 7 No 0
21 7 Yes 1
CodePudding user response:
This is ugly, but it works
library(tidyverse)
out <- data.frame(id=numeric(), type=character(), cumlsum_Yes=numeric())
for(i in 1:length(unique(df$id))) {
group <- df %>%
filter(id == i)
if(any(group$cumlsum_Yes >= 3 )) {
group <- group[1:(first(which(group$type == "Yes"))), ]
}
out <- rbind(out, group)
}
out
# id type cumlsum_Yes
# 1 1 No 0
# 2 1 Yes 1
# 3 1 Yes 2
# 4 1 DK 0
# 5 1 Yes 1
# 6 2 No 0
# 7 2 Yes 1
# 8 3 No 0
# 9 3 No 0
# 10 3 Yes 1
# 11 3 Yes 2
# 12 3 DK 0
# 13 4 No 0
# 14 4 Yes 1
# 15 4 Yes 2
# 16 5 No 0
# 17 5 <NA> 0
# 18 6 No 0
# 19 6 Yes 1
# 20 7 No 0
# 21 7 Yes 1