Home > front end >  removing scores after 4 consecutive errors
removing scores after 4 consecutive errors

Time:12-28

I have these scores per participant and I want to sum them all up, but there's one condition: participants cannot make 4 errors in a row, so if there's four consecutive zeros, the rest of the scores should not count. Is there a simple way of going about it? In this example, pt4 would be the problematic one and the last two scores should be removed from the count.

library(dplyr)

pt1 <- rep(1, 44)

pt2 <- c(rep(1,35), rep(0, 9))

pt3 <- c(rep(1,37), rep(0, 7))

pt4 <- c(rep(1,38), rep(0, 4), rep(1,2))

df <- data.frame(item = rep(1:44, 4), pt = rep(LETTERS[1:4], each = 44), acc = c(pt1, pt2, pt3, pt4))

df %>% group_by(pt) %>%

dplyr::summarise(
total = sum(as.numeric(acc)))

CodePudding user response:

This can be resolved using run-length encoding (rle). To really make sure this works, I'll add some less-than-4-long 0s in the data.

df$acc[4:6] <- 0

A quick function:

func <- function(z) {
  r <- rle(z)
  r$values <- !cumsum(r$lengths >= 4 & !r$values) > 0
  inverse.rle(r)
}

And using it.

base R

df[ave(df$acc != 0, df$pt, FUN = func),]
#     item pt acc
# 1      1  A   1
# 2      2  A   1
# 3      3  A   1
# 4      4  A   0
# 5      5  A   0
# 6      6  A   0
# 7      7  A   1
# 8      8  A   1
# 9      9  A   1
# 10    10  A   1
# 11    11  A   1
# 12    12  A   1
# 13    13  A   1
# 14    14  A   1
# 15    15  A   1
# 16    16  A   1
# 17    17  A   1
# 18    18  A   1
# 19    19  A   1
# 20    20  A   1
# 21    21  A   1
# 22    22  A   1
# 23    23  A   1
# 24    24  A   1
# 25    25  A   1
# 26    26  A   1
# 27    27  A   1
# 28    28  A   1
# 29    29  A   1
# 30    30  A   1
# 31    31  A   1
# 32    32  A   1
# 33    33  A   1
# 34    34  A   1
# 35    35  A   1
# 36    36  A   1
# 37    37  A   1
# 38    38  A   1
# 39    39  A   1
# 40    40  A   1
# 41    41  A   1
# 42    42  A   1
# 43    43  A   1
# 44    44  A   1
# 45     1  B   1
# 46     2  B   1
# 47     3  B   1
# 48     4  B   1
# 49     5  B   1
# 50     6  B   1
# 51     7  B   1
# 52     8  B   1
# 53     9  B   1
# 54    10  B   1
# 55    11  B   1
# 56    12  B   1
# 57    13  B   1
# 58    14  B   1
# 59    15  B   1
# 60    16  B   1
# 61    17  B   1
# 62    18  B   1
# 63    19  B   1
# 64    20  B   1
# 65    21  B   1
# 66    22  B   1
# 67    23  B   1
# 68    24  B   1
# 69    25  B   1
# 70    26  B   1
# 71    27  B   1
# 72    28  B   1
# 73    29  B   1
# 74    30  B   1
# 75    31  B   1
# 76    32  B   1
# 77    33  B   1
# 78    34  B   1
# 79    35  B   1
# 89     1  C   1
# 90     2  C   1
# 91     3  C   1
# 92     4  C   1
# 93     5  C   1
# 94     6  C   1
# 95     7  C   1
# 96     8  C   1
# 97     9  C   1
# 98    10  C   1
# 99    11  C   1
# 100   12  C   1
# 101   13  C   1
# 102   14  C   1
# 103   15  C   1
# 104   16  C   1
# 105   17  C   1
# 106   18  C   1
# 107   19  C   1
# 108   20  C   1
# 109   21  C   1
# 110   22  C   1
# 111   23  C   1
# 112   24  C   1
# 113   25  C   1
# 114   26  C   1
# 115   27  C   1
# 116   28  C   1
# 117   29  C   1
# 118   30  C   1
# 119   31  C   1
# 120   32  C   1
# 121   33  C   1
# 122   34  C   1
# 123   35  C   1
# 124   36  C   1
# 125   37  C   1
# 133    1  D   1
# 134    2  D   1
# 135    3  D   1
# 136    4  D   1
# 137    5  D   1
# 138    6  D   1
# 139    7  D   1
# 140    8  D   1
# 141    9  D   1
# 142   10  D   1
# 143   11  D   1
# 144   12  D   1
# 145   13  D   1
# 146   14  D   1
# 147   15  D   1
# 148   16  D   1
# 149   17  D   1
# 150   18  D   1
# 151   19  D   1
# 152   20  D   1
# 153   21  D   1
# 154   22  D   1
# 155   23  D   1
# 156   24  D   1
# 157   25  D   1
# 158   26  D   1
# 159   27  D   1
# 160   28  D   1
# 161   29  D   1
# 162   30  D   1
# 163   31  D   1
# 164   32  D   1
# 165   33  D   1
# 166   34  D   1
# 167   35  D   1
# 168   36  D   1
# 169   37  D   1
# 170   38  D   1

dplyr

library(dplyr)
df %>%
  group_by(pt) %>%
  filter(func(acc != 0)) %>%
  ungroup()
# # A tibble: 154 x 3
#     item pt      acc
#    <int> <chr> <dbl>
#  1     1 A         1
#  2     2 A         1
#  3     3 A         1
#  4     4 A         0
#  5     5 A         0
#  6     6 A         0
#  7     7 A         1
#  8     8 A         1
#  9     9 A         1
# 10    10 A         1
# # ... with 144 more rows
  • Related