How to identify each participants digit span?-CodePudding

Issue: calculating the outcome measure 'digit span' for a digit span task using R.

Digit span task description: the participant is awarded one mark for each instance they correctly recall a sequence of digits. The task is designed so that the sequence to recall starts at a string length of 3 digits and continues up to 9 digits. There are two trials at each string length. If the participant performs correctly on at least one of the two trials at a specific span length the string length is increased on the next trial. If the participant performs incorrectly on both trials at a specific span length the task is terminated. I wish to calculate digit span as the outcome measure: the maximum string length at which participant’s repeated back the sequence in the correct order on 50% of trials (i.e., 1 out of the 2 trials).

I have a dataframe with 8 columns, where:

nDigs: string length
pair: the first (0) and second (1) trial at each string length
mark: sequence recalled correctly (1) or incorrectly (0)

structure(list(subject = c("participant_001", "participant_001", 
"participant_001", "participant_001", "participant_001", "participant_001", 
"participant_001", "participant_001", "participant_001", "participant_001", 
"participant_001", "participant_001", "participant_001", "participant_001", 
"participant_001", "participant_001", "participant_001", "participant_001", 
"participant_001", "participant_001", "participant_001", "participant_001", 
"participant_002", "participant_002", "participant_002", "participant_002", 
"participant_002", "participant_002", "participant_002", "participant_002", 
"participant_002", "participant_002", "participant_002", "participant_002", 
"participant_002", "participant_002", "participant_002", "participant_002", 
"participant_002", "participant_002", "participant_002", "participant_002", 
"participant_002", "participant_002", "participant_003", "participant_003", 
"participant_003", "participant_003", "participant_003", "participant_003", 
"participant_003", "participant_003", "participant_003", "participant_003", 
"participant_003", "participant_003", "participant_003", "participant_003", 
"participant_003", "participant_003", "participant_003", "participant_003", 
"participant_003", "participant_003", "participant_003", "participant_003", 
"participant_004", "participant_004", "participant_004", "participant_004", 
"participant_004", "participant_004", "participant_004", "participant_004", 
"participant_004", "participant_004", "participant_004", "participant_004", 
"participant_004", "participant_004", "participant_004", "participant_004", 
"participant_004", "participant_004", "participant_004", "participant_004", 
"participant_004", "participant_004", "participant_004", "participant_004"
), session = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 16L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 16L, 16L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), schedule = c("nonspaced", 
"nonspaced", "nonspaced", "nonspaced", "nonspaced", "nonspaced", 
"nonspaced", "nonspaced", "nonspaced", "nonspaced", "nonspaced", 
"nonspaced", "nonspaced", "nonspaced", "nonspaced", "nonspaced", 
"nonspaced", "nonspaced", "nonspaced", "nonspaced", "nonspaced", 
"nonspaced", "spaced", "spaced", "spaced", "spaced", "spaced", 
"spaced", "spaced", "spaced", "spaced", "spaced", "spaced", "spaced", 
"spaced", "spaced", "spaced", "spaced", "spaced", "spaced", "spaced", 
"spaced", "spaced", "spaced", "spaced", "spaced", "spaced", "spaced", 
"spaced", "spaced", "spaced", "spaced", "spaced", "spaced", "spaced", 
"spaced", "spaced", "spaced", "spaced", "spaced", "spaced", "spaced", 
"spaced", "spaced", "spaced", "spaced", "spaced", "spaced", "spaced", 
"spaced", "spaced", "spaced", "spaced", "spaced", "spaced", "spaced", 
"spaced", "spaced", "spaced", "spaced", "spaced", "spaced", "spaced", 
"spaced", "spaced", "spaced", "spaced", "spaced", "spaced", "spaced"
), group = c("training", "training", "training", "training", 
"training", "training", "training", "training", "training", "training", 
"training", "training", "training", "training", "training", "training", 
"training", "training", "training", "training", "training", "training", 
"training", "training", "training", "training", "training", "training", 
"training", "training", "training", "training", "training", "training", 
"training", "training", "training", "training", "training", "training", 
"training", "training", "training", "training", "training", "training", 
"training", "training", "training", "training", "training", "training", 
"training", "training", "training", "training", "training", "training", 
"training", "training", "training", "training", "training", "training", 
"training", "training", "control", "control", "control", "control", 
"control", "control", "control", "control", "control", "control", 
"control", "control", "control", "control", "control", "control", 
"control", "control", "control", "control", "control", "control", 
"control", "control"), counterbalancing = c("verbalspatial", 
"verbalspatial", "verbalspatial", "verbalspatial", "verbalspatial", 
"verbalspatial", "verbalspatial", "verbalspatial", "verbalspatial", 
"verbalspatial", "verbalspatial", "verbalspatial", "verbalspatial", 
"verbalspatial", "verbalspatial", "verbalspatial", "verbalspatial", 
"verbalspatial", "verbalspatial", "verbalspatial", "verbalspatial", 
"verbalspatial", "spatialverbal", "spatialverbal", "spatialverbal", 
"spatialverbal", "spatialverbal", "spatialverbal", "spatialverbal", 
"spatialverbal", "spatialverbal", "spatialverbal", "spatialverbal", 
"spatialverbal", "spatialverbal", "spatialverbal", "spatialverbal", 
"spatialverbal", "spatialverbal", "spatialverbal", "spatialverbal", 
"spatialverbal", "spatialverbal", "spatialverbal", "spatialverbal", 
"spatialverbal", "spatialverbal", "spatialverbal", "spatialverbal", 
"spatialverbal", "spatialverbal", "spatialverbal", "spatialverbal", 
"spatialverbal", "spatialverbal", "spatialverbal", "spatialverbal", 
"spatialverbal", "spatialverbal", "spatialverbal", "spatialverbal", 
"spatialverbal", "spatialverbal", "spatialverbal", "spatialverbal", 
"spatialverbal", "verbalspatial", "verbalspatial", "verbalspatial", 
"verbalspatial", "verbalspatial", "verbalspatial", "verbalspatial", 
"verbalspatial", "verbalspatial", "verbalspatial", "verbalspatial", 
"verbalspatial", "verbalspatial", "verbalspatial", "verbalspatial", 
"verbalspatial", "verbalspatial", "verbalspatial", "verbalspatial", 
"verbalspatial", "verbalspatial", "verbalspatial", "verbalspatial", 
"verbalspatial"), nDigs = c(3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 
7L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 3L, 3L, 4L, 
4L, 5L, 5L, 6L, 6L, 7L, 7L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 
7L, 8L, 8L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 3L, 
3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 
6L, 7L, 7L, 8L, 8L, 9L, 9L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 
7L), pair = c(0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L), mark = c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 
1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 
0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 
0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L)), class = "data.frame", row.names = c(NA, 
-90L))

Goal: for each participant at each session I need to specify the highest string length at which they scored 50% or more. Essentially, a dataframe that looks like the following:

structure(list(subject = c("participant_001", "participant_001", 
"participant_002", "participant_002", "participant_003", "participant_003", 
"participant_004", "participant_004"), session = c(1L, 16L, 1L, 
16L, 1L, 16L, 1L, 16L), schedule = c("nonspaced", "nonspaced", 
"spaced", "spaced", "spaced", "spaced", "spaced", "spaced"), 
    group = c("training", "training", "training", "training", 
    "training", "training", "control", "control"), counterbalancing = c("verbalspatial", 
    "verbalspatial", "spatialverbal", "spatialverbal", "spatialverbal", 
    "spatialverbal", "verbalspatial", "verbalspatial"), digitSpan = c(6L, 
    7L, 6L, 7L, 7L, 6L, 8L, 6L)), class = "data.frame", row.names = c(NA, 
-8L))

What I've tried: I thought I might need to filter for each subject and session (or preferably loop this), then sum the total mark (/2) at each nDigs. Then use an if statement to check, starting from the highest nDigs pair (i.e., 9), whether the mark is 50% or greater. If it is they are awarded digit span = 9, if not continue until it is. I've checked similar posts but to no avail.

dsf <- df %>%
  dplyr::filter(subject == "participant_001" & session == "1")

df2 <- with(dsf, sum(mark[nDigs == '3']))
df3 <- with(dsf, sum(mark[nDigs == '4']))
df4 <- with(dsf, sum(mark[nDigs == '5']))
df5 <- with(dsf, sum(mark[nDigs == '6']))
df6 <- with(dsf, sum(mark[nDigs == '7']))
df7 <- with(dsf, sum(mark[nDigs == '8']))
df8 <- with(dsf, sum(mark[nDigs == '9']))

CodePudding user response：

library(dplyr)
                                                                                                                                                                                                                                                                                                                                                                                                                                                           -8L))
df %>% 
  group_by(subject,session,schedule,group,nDigs) %>%
  summarise(success_rate = sum(mark)/2) %>%
  ungroup() %>%
  filter(success_rate>=0.5) %>%
  arrange(subject,session,schedule,group,desc(nDigs)) %>%
  group_by(subject,session,schedule,group) %>%
  slice(1)