Extract and plot n rows before and after the onset of each event, centered on zero-CodePudding

Objective: From a time-series df, make a plot of each occurrence of a particular state (or factor level) with x timepoints before, and y timepoints after, the onset (i.e. first row) of that state. The graph should be centered on zero (on the x-axis), such that the x timepoints before the event are negative values, and the y timepoints after the event are positive values. This is the same principal as a peristimulus time histogram.

The data: I have time-series data where different states can occur for variable amounts of time. First I use run length encoding (rle) to determine the start and stop of each run of each state (not shown). Second, I use a function,

CodePudding user response：

This approach subdivides the data into plotGroups where each group starts one step before each new A (except for the first grp), and the counter is set at zero for each group's first A. The division point prior is determined by the n in lead(), and we could add a filter to limit the points after.

# edit to fix first group counting
df %>%
  mutate(start = state == "A" & lag(state, default = "") != "A") %>%
  mutate(plotGroup = cumsum(lead(start, n = 1, default = FALSE))) %>%
  group_by(plotGroup) %>%
  mutate(counter = row_number() - row_number()[start]) %>%
  ungroup() %>%
  filter(counter <= 2) %>%
  ggplot(aes(counter, data, group = plotGroup))  
  geom_line()

Result before plotting:

# A tibble: 14 × 6
   state start rleGroup   data plotGroup counter
   <chr> <lgl> <chr>     <dbl>     <int>   <int>
 1 A     TRUE  1        0.0198         0       0
 2 A     FALSE 1        0.338          0       1
 3 A     FALSE 1        0.635          0       2
 4 B     FALSE 2        0.0138         1      -1
 5 A     TRUE  3        0.218          1       0
 6 A     FALSE 3        0.208          1       1
 7 X     FALSE 4        0.0934         1       2
 8 Z     FALSE 6        0.499          2      -1
 9 A     TRUE  7        0.0417         2       0
10 A     FALSE 7        0.934          2       1
11 A     FALSE 7        0.507          2       2
12 B     FALSE 8        0.555          3      -1
13 A     TRUE  9        0.158          3       0
14 A     FALSE 9        0.437          3       1

CodePudding user response：

#Define number of rows you want before and after the zero-centered graph
after <- 2
before <- 1

#made up data 
df <- data.frame(
  state =      c("A","A","A","A","A","B","A","A","X","Y","Z","A","A","A","B","A","A"),
  start =      c("start","NA","NA","NA","NA","NA","start","NA","NA","NA","NA","start","NA","NA","NA","start","NA"),
  rleGroup =   c("1","1","1","1","1","2","3","3","4","5","6","7","7","7","8","9","9"),
  data = runif(17)
)
df <- df %>% tidyr::unite(stateStart, c(state,start), sep = ".", remove = FALSE)

#extract the rows before and after the onset of a particular state
extract.with.context <- function(x, colname, rows, after = 0, before = 0) {
  match.idx  <- which(x[[colname]] %in% rows)
  span       <- seq(from = -before, to = after)
  extend.idx <- c(outer(match.idx, span, ` `))
  extend.idx <- Filter(function(i) i > 0 & i <= nrow(x), extend.idx)
  extend.idx <- sort(unique(extend.idx))
  return(x[extend.idx, , drop = FALSE]) 
}

extracted.df = extract.with.context(x=df, colname="stateStart", rows=c("A.start"), after = after, before = before)

# Create plotGroup
# if we go off starting cue = T/F, and start counting when lead (by "before") is T, 
# then we should get correct plotGroup, regardless whether the desired state is in first row or not 
boo <- extracted.df %>%
  dplyr::mutate(start2 = state == "A" & lag(state, default = "") != "A") %>%
  mutate(plotGroup = cumsum(lead(start2, n = before, default = FALSE)))

#create the counter/sequence to zero the graph 
counter <- rep(NA, times = length(boo$start)) # make an empty counter
starts <- which(boo$start == "start") # find the start positions
counter[starts] <- 0

for(i in 1:after){ # for every position after a start, up to "after"
  indexes <- starts   i # index of positions "i" after the start
  indexes_1 <- indexes[which(indexes %in% 1:length(counter))] # indexes can run over the length of the counter - we only want indexes that are within the length of the counter
  counter[indexes_1] <- i # for those indexes, put in the count, i
}
for(i in 1:before){ # same as for "after", but in reverse for "before"
  indexes <- starts - i
  indexes_1 <- indexes[which(indexes %in% 1:length(counter))]
  counter[indexes_1] <- -i
}

boo$span <- counter
boo
   stateStart state start rleGroup       data start2 plotGroup span
1     A.start     A start        1 0.22771277   TRUE         0    0
2        A.NA     A    NA        1 0.39769158  FALSE         0    1
3        A.NA     A    NA        1 0.42416120  FALSE         0    2
6        B.NA     B    NA        2 0.06402964  FALSE         1   -1
7     A.start     A start        3 0.22233942   TRUE         1    0
8        A.NA     A    NA        3 0.77667057  FALSE         1    1
9        X.NA     X    NA        4 0.36675437  FALSE         1    2
11       Z.NA     Z    NA        6 0.49100719  FALSE         2   -1
12    A.start     A start        7 0.26012695   TRUE         2    0
13       A.NA     A    NA        7 0.88900224  FALSE         2    1
14       A.NA     A    NA        7 0.59714172  FALSE         2    2
15       B.NA     B    NA        8 0.15040234  FALSE         3   -1
16    A.start     A start        9 0.85581300   TRUE         3    0
17       A.NA     A    NA        9 0.15780435  FALSE         3    1

# plot 
ggplot(data=boo, aes(x=span, y = data, group = plotGroup))   
  geom_line()