Construct an index of paired blocks-CodePudding

Given a matrix like the following:

      p1   p2
 [1,]  1 1065
 [2,]  1 1465
 [3,]  2 1464
 [4,]  3 1463
 [5,]  4 1462
 [6,] 27  438
 [7,] 29  635
 [8,] 31 1012
 [9,] 46  768
[10,] 53 1466
[11,] 63  401
[12,] 74  966
[13,] 75 1689
[14,] 86  682
[15,] 87  683
[16,] 90  612
[17,] 92 1608

How can i construct an index that identifies paired runs in p1 and p2? Can I identify that rows 2-5 represent paired runs and rows 14-15 represent paired runs, while 12-13 though paired in p1, are not paired in p2. If this question could be improved through clarification please let me know.

Sample data:

structure(c(1L, 1L, 2L, 3L, 4L, 27L, 29L, 31L, 46L, 53L, 63L, 
74L, 75L, 86L, 87L, 90L, 92L, 1065L, 1465L, 1464L, 1463L, 1462L, 
438L, 635L, 1012L, 768L, 1466L, 401L, 966L, 1689L, 682L, 683L, 
612L, 1608L), .Dim = c(17L, 2L), .Dimnames = list(NULL, c("p1", 
"p2")))

CodePudding user response：

Something like this?

m <- structure(c(1L, 1L, 2L, 3L, 4L, 27L, 29L, 31L, 46L, 53L, 63L, 
                 74L, 75L, 86L, 87L, 90L, 92L, 1065L, 1465L, 1464L, 1463L, 1462L, 
                 438L, 635L, 1012L, 768L, 1466L, 401L, 966L, 1689L, 682L, 683L, 
                 612L, 1608L), .Dim = c(17L, 2L), .Dimnames = list(NULL, c("p1", 
                                                                           "p2")))

blnRun <- abs(diff(m[, "p1"])) == 1 & abs(diff(m[, "p2"])) == 1
cbind(m, idxRun = cumsum(c(blnRun, FALSE) & !c(FALSE, blnRun))*(c(FALSE, blnRun) | c(blnRun, FALSE)))
#>       p1   p2 idxRun
#>  [1,]  1 1065      0
#>  [2,]  1 1465      1
#>  [3,]  2 1464      1
#>  [4,]  3 1463      1
#>  [5,]  4 1462      1
#>  [6,] 27  438      0
#>  [7,] 29  635      0
#>  [8,] 31 1012      0
#>  [9,] 46  768      0
#> [10,] 53 1466      0
#> [11,] 63  401      0
#> [12,] 74  966      0
#> [13,] 75 1689      0
#> [14,] 86  682      2
#> [15,] 87  683      2
#> [16,] 90  612      0
#> [17,] 92 1608      0

CodePudding user response：

Another solution, based on dplyr and data.table::rleid:

library(tidyverse)

df <- structure(c(1L, 1L, 2L, 3L, 4L, 27L, 29L, 31L, 46L, 53L, 63L, 
            74L, 75L, 86L, 87L, 90L, 92L, 1065L, 1465L, 1464L, 1463L, 1462L, 
            438L, 635L, 1012L, 768L, 1466L, 401L, 966L, 1689L, 682L, 683L, 
            612L, 1608L), .Dim = c(17L, 2L), .Dimnames = list(NULL, c("p1", 
                                                                      "p2")))

df %>%
  as.data.frame %>% 
  mutate(
    idx1 = ifelse(abs(lag(p2)-p2) == 1 & abs(lag(p1)-p1) == 1,1,0),
    idx2 = ifelse(abs(lead(p2)-p2) == 1 & abs(lead(p1)-p1) == 1,1,0),
    idx = pmax(idx1,idx2, na.rm = T), 
    idx2 = idx,
    idx = ifelse(idx != 0, data.table::rleid(idx), 0), 
    idx1 = NULL) %>% 
  group_by(idx2) %>% 
  mutate(idx = ifelse(idx == 0, 0, data.table::rleid(idx))) %>%
  ungroup %>% select(-idx2)

#> # A tibble: 17 × 3
#>       p1    p2   idx
#>    <int> <int> <dbl>
#>  1     1  1065     0
#>  2     1  1465     1
#>  3     2  1464     1
#>  4     3  1463     1
#>  5     4  1462     1
#>  6    27   438     0
#>  7    29   635     0
#>  8    31  1012     0
#>  9    46   768     0
#> 10    53  1466     0
#> 11    63   401     0
#> 12    74   966     0
#> 13    75  1689     0
#> 14    86   682     2
#> 15    87   683     2
#> 16    90   612     0
#> 17    92  1608     0