Objective: From a time-series df, make a plot of each occurrence of a particular state (or factor level) with x timepoints before, and y timepoints after, the onset (i.e. first row) of that state. The graph should be centered on zero (on the x-axis), such that the x timepoints before the event are negative values, and the y timepoints after the event are positive values. This is the same principal as a peristimulus time histogram.
The data: I have time-series data where different states can occur for variable amounts of time. First I use run length encoding (rle) to determine the start and stop of each run of each state (not shown). Second, I use a function,
CodePudding user response:
This approach subdivides the data into plotGroups where each group starts one step before each new A (except for the first grp), and the counter is set at zero for each group's first A. The division point prior is determined by the n
in lead()
, and we could add a filter to limit the points after.
# edit to fix first group counting
df %>%
mutate(start = state == "A" & lag(state, default = "") != "A") %>%
mutate(plotGroup = cumsum(lead(start, n = 1, default = FALSE))) %>%
group_by(plotGroup) %>%
mutate(counter = row_number() - row_number()[start]) %>%
ungroup() %>%
filter(counter <= 2) %>%
ggplot(aes(counter, data, group = plotGroup))
geom_line()
Result before plotting:
# A tibble: 14 × 6
state start rleGroup data plotGroup counter
<chr> <lgl> <chr> <dbl> <int> <int>
1 A TRUE 1 0.0198 0 0
2 A FALSE 1 0.338 0 1
3 A FALSE 1 0.635 0 2
4 B FALSE 2 0.0138 1 -1
5 A TRUE 3 0.218 1 0
6 A FALSE 3 0.208 1 1
7 X FALSE 4 0.0934 1 2
8 Z FALSE 6 0.499 2 -1
9 A TRUE 7 0.0417 2 0
10 A FALSE 7 0.934 2 1
11 A FALSE 7 0.507 2 2
12 B FALSE 8 0.555 3 -1
13 A TRUE 9 0.158 3 0
14 A FALSE 9 0.437 3 1
CodePudding user response:
#Define number of rows you want before and after the zero-centered graph
after <- 2
before <- 1
#made up data
df <- data.frame(
state = c("A","A","A","A","A","B","A","A","X","Y","Z","A","A","A","B","A","A"),
start = c("start","NA","NA","NA","NA","NA","start","NA","NA","NA","NA","start","NA","NA","NA","start","NA"),
rleGroup = c("1","1","1","1","1","2","3","3","4","5","6","7","7","7","8","9","9"),
data = runif(17)
)
df <- df %>% tidyr::unite(stateStart, c(state,start), sep = ".", remove = FALSE)
#extract the rows before and after the onset of a particular state
extract.with.context <- function(x, colname, rows, after = 0, before = 0) {
match.idx <- which(x[[colname]] %in% rows)
span <- seq(from = -before, to = after)
extend.idx <- c(outer(match.idx, span, ` `))
extend.idx <- Filter(function(i) i > 0 & i <= nrow(x), extend.idx)
extend.idx <- sort(unique(extend.idx))
return(x[extend.idx, , drop = FALSE])
}
extracted.df = extract.with.context(x=df, colname="stateStart", rows=c("A.start"), after = after, before = before)
# Create plotGroup
# if we go off starting cue = T/F, and start counting when lead (by "before") is T,
# then we should get correct plotGroup, regardless whether the desired state is in first row or not
boo <- extracted.df %>%
dplyr::mutate(start2 = state == "A" & lag(state, default = "") != "A") %>%
mutate(plotGroup = cumsum(lead(start2, n = before, default = FALSE)))
#create the counter/sequence to zero the graph
counter <- rep(NA, times = length(boo$start)) # make an empty counter
starts <- which(boo$start == "start") # find the start positions
counter[starts] <- 0
for(i in 1:after){ # for every position after a start, up to "after"
indexes <- starts i # index of positions "i" after the start
indexes_1 <- indexes[which(indexes %in% 1:length(counter))] # indexes can run over the length of the counter - we only want indexes that are within the length of the counter
counter[indexes_1] <- i # for those indexes, put in the count, i
}
for(i in 1:before){ # same as for "after", but in reverse for "before"
indexes <- starts - i
indexes_1 <- indexes[which(indexes %in% 1:length(counter))]
counter[indexes_1] <- -i
}
boo$span <- counter
boo
stateStart state start rleGroup data start2 plotGroup span
1 A.start A start 1 0.22771277 TRUE 0 0
2 A.NA A NA 1 0.39769158 FALSE 0 1
3 A.NA A NA 1 0.42416120 FALSE 0 2
6 B.NA B NA 2 0.06402964 FALSE 1 -1
7 A.start A start 3 0.22233942 TRUE 1 0
8 A.NA A NA 3 0.77667057 FALSE 1 1
9 X.NA X NA 4 0.36675437 FALSE 1 2
11 Z.NA Z NA 6 0.49100719 FALSE 2 -1
12 A.start A start 7 0.26012695 TRUE 2 0
13 A.NA A NA 7 0.88900224 FALSE 2 1
14 A.NA A NA 7 0.59714172 FALSE 2 2
15 B.NA B NA 8 0.15040234 FALSE 3 -1
16 A.start A start 9 0.85581300 TRUE 3 0
17 A.NA A NA 9 0.15780435 FALSE 3 1
# plot
ggplot(data=boo, aes(x=span, y = data, group = plotGroup))
geom_line()