Issue: calculating the outcome measure 'digit span' for a digit span task using R.
Digit span task description: the participant is awarded one mark for each instance they correctly recall a sequence of digits. The task is designed so that the sequence to recall starts at a string length of 3 digits and continues up to 9 digits. There are two trials at each string length. If the participant performs correctly on at least one of the two trials at a specific span length the string length is increased on the next trial. If the participant performs incorrectly on both trials at a specific span length the task is terminated. I wish to calculate digit span as the outcome measure: the maximum string length at which participant’s repeated back the sequence in the correct order on 50% of trials (i.e., 1 out of the 2 trials).
I have a dataframe with 8 columns, where:
- nDigs: string length
- pair: the first (0) and second (1) trial at each string length
- mark: sequence recalled correctly (1) or incorrectly (0)
structure(list(subject = c("participant_001", "participant_001",
"participant_001", "participant_001", "participant_001", "participant_001",
"participant_001", "participant_001", "participant_001", "participant_001",
"participant_001", "participant_001", "participant_001", "participant_001",
"participant_001", "participant_001", "participant_001", "participant_001",
"participant_001", "participant_001", "participant_001", "participant_001",
"participant_002", "participant_002", "participant_002", "participant_002",
"participant_002", "participant_002", "participant_002", "participant_002",
"participant_002", "participant_002", "participant_002", "participant_002",
"participant_002", "participant_002", "participant_002", "participant_002",
"participant_002", "participant_002", "participant_002", "participant_002",
"participant_002", "participant_002", "participant_003", "participant_003",
"participant_003", "participant_003", "participant_003", "participant_003",
"participant_003", "participant_003", "participant_003", "participant_003",
"participant_003", "participant_003", "participant_003", "participant_003",
"participant_003", "participant_003", "participant_003", "participant_003",
"participant_003", "participant_003", "participant_003", "participant_003",
"participant_004", "participant_004", "participant_004", "participant_004",
"participant_004", "participant_004", "participant_004", "participant_004",
"participant_004", "participant_004", "participant_004", "participant_004",
"participant_004", "participant_004", "participant_004", "participant_004",
"participant_004", "participant_004", "participant_004", "participant_004",
"participant_004", "participant_004", "participant_004", "participant_004"
), session = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), schedule = c("nonspaced",
"nonspaced", "nonspaced", "nonspaced", "nonspaced", "nonspaced",
"nonspaced", "nonspaced", "nonspaced", "nonspaced", "nonspaced",
"nonspaced", "nonspaced", "nonspaced", "nonspaced", "nonspaced",
"nonspaced", "nonspaced", "nonspaced", "nonspaced", "nonspaced",
"nonspaced", "spaced", "spaced", "spaced", "spaced", "spaced",
"spaced", "spaced", "spaced", "spaced", "spaced", "spaced", "spaced",
"spaced", "spaced", "spaced", "spaced", "spaced", "spaced", "spaced",
"spaced", "spaced", "spaced", "spaced", "spaced", "spaced", "spaced",
"spaced", "spaced", "spaced", "spaced", "spaced", "spaced", "spaced",
"spaced", "spaced", "spaced", "spaced", "spaced", "spaced", "spaced",
"spaced", "spaced", "spaced", "spaced", "spaced", "spaced", "spaced",
"spaced", "spaced", "spaced", "spaced", "spaced", "spaced", "spaced",
"spaced", "spaced", "spaced", "spaced", "spaced", "spaced", "spaced",
"spaced", "spaced", "spaced", "spaced", "spaced", "spaced", "spaced"
), group = c("training", "training", "training", "training",
"training", "training", "training", "training", "training", "training",
"training", "training", "training", "training", "training", "training",
"training", "training", "training", "training", "training", "training",
"training", "training", "training", "training", "training", "training",
"training", "training", "training", "training", "training", "training",
"training", "training", "training", "training", "training", "training",
"training", "training", "training", "training", "training", "training",
"training", "training", "training", "training", "training", "training",
"training", "training", "training", "training", "training", "training",
"training", "training", "training", "training", "training", "training",
"training", "training", "control", "control", "control", "control",
"control", "control", "control", "control", "control", "control",
"control", "control", "control", "control", "control", "control",
"control", "control", "control", "control", "control", "control",
"control", "control"), counterbalancing = c("verbalspatial",
"verbalspatial", "verbalspatial", "verbalspatial", "verbalspatial",
"verbalspatial", "verbalspatial", "verbalspatial", "verbalspatial",
"verbalspatial", "verbalspatial", "verbalspatial", "verbalspatial",
"verbalspatial", "verbalspatial", "verbalspatial", "verbalspatial",
"verbalspatial", "verbalspatial", "verbalspatial", "verbalspatial",
"verbalspatial", "spatialverbal", "spatialverbal", "spatialverbal",
"spatialverbal", "spatialverbal", "spatialverbal", "spatialverbal",
"spatialverbal", "spatialverbal", "spatialverbal", "spatialverbal",
"spatialverbal", "spatialverbal", "spatialverbal", "spatialverbal",
"spatialverbal", "spatialverbal", "spatialverbal", "spatialverbal",
"spatialverbal", "spatialverbal", "spatialverbal", "spatialverbal",
"spatialverbal", "spatialverbal", "spatialverbal", "spatialverbal",
"spatialverbal", "spatialverbal", "spatialverbal", "spatialverbal",
"spatialverbal", "spatialverbal", "spatialverbal", "spatialverbal",
"spatialverbal", "spatialverbal", "spatialverbal", "spatialverbal",
"spatialverbal", "spatialverbal", "spatialverbal", "spatialverbal",
"spatialverbal", "verbalspatial", "verbalspatial", "verbalspatial",
"verbalspatial", "verbalspatial", "verbalspatial", "verbalspatial",
"verbalspatial", "verbalspatial", "verbalspatial", "verbalspatial",
"verbalspatial", "verbalspatial", "verbalspatial", "verbalspatial",
"verbalspatial", "verbalspatial", "verbalspatial", "verbalspatial",
"verbalspatial", "verbalspatial", "verbalspatial", "verbalspatial",
"verbalspatial"), nDigs = c(3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L,
7L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 3L, 3L, 4L,
4L, 5L, 5L, 6L, 6L, 7L, 7L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L,
7L, 8L, 8L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 3L,
3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 3L, 3L, 4L, 4L, 5L, 5L, 6L,
6L, 7L, 7L, 8L, 8L, 9L, 9L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L,
7L), pair = c(0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L), mark = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L,
1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L,
0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L,
0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L)), class = "data.frame", row.names = c(NA,
-90L))
Goal: for each participant at each session I need to specify the highest string length at which they scored 50% or more. Essentially, a dataframe that looks like the following:
structure(list(subject = c("participant_001", "participant_001",
"participant_002", "participant_002", "participant_003", "participant_003",
"participant_004", "participant_004"), session = c(1L, 16L, 1L,
16L, 1L, 16L, 1L, 16L), schedule = c("nonspaced", "nonspaced",
"spaced", "spaced", "spaced", "spaced", "spaced", "spaced"),
group = c("training", "training", "training", "training",
"training", "training", "control", "control"), counterbalancing = c("verbalspatial",
"verbalspatial", "spatialverbal", "spatialverbal", "spatialverbal",
"spatialverbal", "verbalspatial", "verbalspatial"), digitSpan = c(6L,
7L, 6L, 7L, 7L, 6L, 8L, 6L)), class = "data.frame", row.names = c(NA,
-8L))
What I've tried: I thought I might need to filter for each subject and session (or preferably loop this), then sum the total mark (/2) at each nDigs. Then use an if statement to check, starting from the highest nDigs pair (i.e., 9), whether the mark is 50% or greater. If it is they are awarded digit span = 9, if not continue until it is. I've checked similar posts but to no avail.
dsf <- df %>%
dplyr::filter(subject == "participant_001" & session == "1")
df2 <- with(dsf, sum(mark[nDigs == '3']))
df3 <- with(dsf, sum(mark[nDigs == '4']))
df4 <- with(dsf, sum(mark[nDigs == '5']))
df5 <- with(dsf, sum(mark[nDigs == '6']))
df6 <- with(dsf, sum(mark[nDigs == '7']))
df7 <- with(dsf, sum(mark[nDigs == '8']))
df8 <- with(dsf, sum(mark[nDigs == '9']))
CodePudding user response:
library(dplyr)
-8L))
df %>%
group_by(subject,session,schedule,group,nDigs) %>%
summarise(success_rate = sum(mark)/2) %>%
ungroup() %>%
filter(success_rate>=0.5) %>%
arrange(subject,session,schedule,group,desc(nDigs)) %>%
group_by(subject,session,schedule,group) %>%
slice(1)