Find difference between consecutive rows after grouping columns that starts

I want to group my df based on the column clone, and also based on what the type starts with, anc and ev.

After I group based on these conditions, I want to find the difference between consecutive rows

library(tidyverse)

set.seed(123)
df <- tibble(
  anc_0=c(runif(6,0.045,0.048)), 
  anc_3=c(runif(6,0.045,1.56)), 
  ev_0=c(runif(6,0.045,0.048)), 
  ev_3=c(runif(6,0.045,1.56)), 
  clone=c(rep(c("clone1","clone2"), each=3))
)

df %>% 
  pivot_longer(!clone, names_to = "type", values_to = "OD") %>% 
  group_by(across(starts_with("anc")), across(starts_with("ev")), clone) %>% 
  mutate(diff= OD - lag(OD))
#> # A tibble: 24 × 4
#> # Groups:   clone [2]
#>    clone  type      OD   diff
#>    <chr>  <chr>  <dbl>  <dbl>
#>  1 clone1 anc_0 0.0459 NA    
#>  2 clone1 anc_3 0.845   0.799
#>  3 clone1 ev_0  0.0470 -0.798
#>  4 clone1 ev_3  0.542   0.495
#>  5 clone1 anc_0 0.0474 -0.494
#>  6 clone1 anc_3 1.40    1.35 
#>  7 clone1 ev_0  0.0467 -1.35 
#>  8 clone1 ev_3  1.49    1.44 
#>  9 clone1 anc_0 0.0462 -1.44 
#> 10 clone1 anc_3 0.880   0.834
#> # … with 14 more rows

^{Created on 2022-12-25 with reprex v2.0.2}

I want my data to look like this

#>    clone  type      OD   diff
#>    <chr>  <chr>  <dbl>  <dbl>
#>  1 clone1 anc_0 0.0459 NA    
#>  2 clone1 anc_3 0.845   0.799
#>  3 clone1 ev_0  0.0470 NA
#>  4 clone1 ev_3  0.542   0.495
#>  5 clone1 anc_0 0.0474 NA
#>  6 clone1 anc_3 1.40    1.35 
#>  7 clone1 ev_0  0.0467 NA
#>  8 clone1 ev_3  1.49    1.44 
#>  9 clone1 anc_0 0.0462 NA 
#> 10 clone1 anc_3 0.880   0.834
#> # … with 14 more rows

any idea or help is appreciated

CodePudding user response：

We reshape to long with pivot_longer, do the grouping by clone and the rleid of substring of type i.e. after removing the _ followed by one or more digits (\\d ) and create the diff column

library(dplyr)
library(tidyr)
library(data.table)
 df %>% 
  pivot_longer(!clone, names_to = "type", values_to = "OD") %>% 
  group_by(clone, grp2 = rleid(str_remove(type, '_\\d '))) %>% 
  mutate(diff = OD - lag(OD)) %>%
  ungroup %>%
  select(-grp2)

-output

# A tibble: 24 × 4
   clone  type      OD   diff
   <chr>  <chr>  <dbl>  <dbl>
 1 clone1 anc_0 0.0459 NA    
 2 clone1 anc_3 0.845   0.799
 3 clone1 ev_0  0.0470 NA    
 4 clone1 ev_3  0.542   0.495
 5 clone1 anc_0 0.0474 NA    
 6 clone1 anc_3 1.40    1.35 
 7 clone1 ev_0  0.0467 NA    
 8 clone1 ev_3  1.49    1.44 
 9 clone1 anc_0 0.0462 NA    
10 clone1 anc_3 0.880   0.834
# … with 14 more rows