Given a matrix like the following:
p1 p2
[1,] 1 1065
[2,] 1 1465
[3,] 2 1464
[4,] 3 1463
[5,] 4 1462
[6,] 27 438
[7,] 29 635
[8,] 31 1012
[9,] 46 768
[10,] 53 1466
[11,] 63 401
[12,] 74 966
[13,] 75 1689
[14,] 86 682
[15,] 87 683
[16,] 90 612
[17,] 92 1608
How can i construct an index that identifies paired runs in p1 and p2? Can I identify that rows 2-5 represent paired runs and rows 14-15 represent paired runs, while 12-13 though paired in p1, are not paired in p2. If this question could be improved through clarification please let me know.
Sample data:
structure(c(1L, 1L, 2L, 3L, 4L, 27L, 29L, 31L, 46L, 53L, 63L,
74L, 75L, 86L, 87L, 90L, 92L, 1065L, 1465L, 1464L, 1463L, 1462L,
438L, 635L, 1012L, 768L, 1466L, 401L, 966L, 1689L, 682L, 683L,
612L, 1608L), .Dim = c(17L, 2L), .Dimnames = list(NULL, c("p1",
"p2")))
CodePudding user response:
Something like this?
m <- structure(c(1L, 1L, 2L, 3L, 4L, 27L, 29L, 31L, 46L, 53L, 63L,
74L, 75L, 86L, 87L, 90L, 92L, 1065L, 1465L, 1464L, 1463L, 1462L,
438L, 635L, 1012L, 768L, 1466L, 401L, 966L, 1689L, 682L, 683L,
612L, 1608L), .Dim = c(17L, 2L), .Dimnames = list(NULL, c("p1",
"p2")))
blnRun <- abs(diff(m[, "p1"])) == 1 & abs(diff(m[, "p2"])) == 1
cbind(m, idxRun = cumsum(c(blnRun, FALSE) & !c(FALSE, blnRun))*(c(FALSE, blnRun) | c(blnRun, FALSE)))
#> p1 p2 idxRun
#> [1,] 1 1065 0
#> [2,] 1 1465 1
#> [3,] 2 1464 1
#> [4,] 3 1463 1
#> [5,] 4 1462 1
#> [6,] 27 438 0
#> [7,] 29 635 0
#> [8,] 31 1012 0
#> [9,] 46 768 0
#> [10,] 53 1466 0
#> [11,] 63 401 0
#> [12,] 74 966 0
#> [13,] 75 1689 0
#> [14,] 86 682 2
#> [15,] 87 683 2
#> [16,] 90 612 0
#> [17,] 92 1608 0
CodePudding user response:
Another solution, based on dplyr
and data.table::rleid
:
library(tidyverse)
df <- structure(c(1L, 1L, 2L, 3L, 4L, 27L, 29L, 31L, 46L, 53L, 63L,
74L, 75L, 86L, 87L, 90L, 92L, 1065L, 1465L, 1464L, 1463L, 1462L,
438L, 635L, 1012L, 768L, 1466L, 401L, 966L, 1689L, 682L, 683L,
612L, 1608L), .Dim = c(17L, 2L), .Dimnames = list(NULL, c("p1",
"p2")))
df %>%
as.data.frame %>%
mutate(
idx1 = ifelse(abs(lag(p2)-p2) == 1 & abs(lag(p1)-p1) == 1,1,0),
idx2 = ifelse(abs(lead(p2)-p2) == 1 & abs(lead(p1)-p1) == 1,1,0),
idx = pmax(idx1,idx2, na.rm = T),
idx2 = idx,
idx = ifelse(idx != 0, data.table::rleid(idx), 0),
idx1 = NULL) %>%
group_by(idx2) %>%
mutate(idx = ifelse(idx == 0, 0, data.table::rleid(idx))) %>%
ungroup %>% select(-idx2)
#> # A tibble: 17 × 3
#> p1 p2 idx
#> <int> <int> <dbl>
#> 1 1 1065 0
#> 2 1 1465 1
#> 3 2 1464 1
#> 4 3 1463 1
#> 5 4 1462 1
#> 6 27 438 0
#> 7 29 635 0
#> 8 31 1012 0
#> 9 46 768 0
#> 10 53 1466 0
#> 11 63 401 0
#> 12 74 966 0
#> 13 75 1689 0
#> 14 86 682 2
#> 15 87 683 2
#> 16 90 612 0
#> 17 92 1608 0