How to check if pairs remain the same between years?-CodePudding

I have a data frame with pairs of individual birds (male and female) that were observed in several years. I am trying to figure out whether these pairs have changed from one year to the next so that I can do some further analyses.

My data is structured like this:

dat <- tibble(year = rep(1:3, each = 3),
              Male = c("A1", "B1", "C1",
                       "A1", "B1", "C1",
                       "A1", "B1", "C2"),
              Female = c("X1", "Y1", "Z1",
                         "X1", "Y2", "Z2",
                         "X1", "Y2", "Z2"))

# A tibble: 9 x 3
   year Male  Female
  <int> <chr> <chr> 
1     1 A1    X1    
2     1 B1    Y1    
3     1 C1    Z1    
4     2 A1    X1    
5     2 B1    Y2    
6     2 C1    Z2    
7     3 A1    X1    
8     3 B1    Y2    
9     3 C2    Z2

And my expected output is something like:

# A tibble: 9 x 5
   year Male  Female male_state female_state
  <int> <chr> <chr>  <chr>      <chr>       
1     1 A1    X1     new        new         
2     1 B1    Y1     new        new         
3     1 C1    Z1     new        new         
4     2 A1    X1     reunited   reunited    
5     2 B1    Y2     divorced   new         
6     2 C1    Z2     divorced   new         
7     3 A1    X1     reunited   reunited    
8     3 B1    Y2     reunited   reunited    
9     3 C2    Z2     new        divorced

I cannot figure out how to check whether a value from a different column is the same in the year before (e.g. if the male ID is the same for a certain female in year 2 or 3 as in the year prior). Any ideas?

CodePudding user response：

This (probably overcomplicated) pipe produces the following output.

dat <- tibble(year = rep(1:3, each = 3),
              Male = c("A1", "B1", "C1",
                       "A1", "B1", "C1",
                       "A1", "B1", "C2"),
              Female = c("X1", "Y1", "Z1",
                         "X1", "Y2", "Z2",
                         "X1", "Y2", "Z2"))

dat %>% 
  mutate(pair=paste0(Male,Female)) %>% 
  arrange(pair,year) %>% 
  mutate(check = if_else((pair==lag(pair)) & (year>lag(year)), 'old couple', 'new couple')) %>%
  mutate(check = if_else(is.na(check), 'new couple', check)) %>% 
  mutate(divorced = if_else((Male == lag(Male)) & (Female != lag(Female)), 'divorce', '')) %>%
  mutate(divorced = if_else(is.na(divorced), '', divorced))

OUTPUT:

# A tibble: 9 × 6
   year Male  Female pair  check      divorced 
  <int> <chr> <chr>  <chr> <chr>      <chr>    
1     1 A1    X1     A1X1  new couple ""       
2     2 A1    X1     A1X1  old couple ""       
3     3 A1    X1     A1X1  old couple ""       
4     1 B1    Y1     B1Y1  new couple ""       
5     2 B1    Y2     B1Y2  new couple "divorce"
6     3 B1    Y2     B1Y2  old couple ""       
7     1 C1    Z1     C1Z1  new couple ""       
8     2 C1    Z2     C1Z2  new couple "divorce"
9     3 C2    Z2     C2Z2  new couple ""

CodePudding user response：

Try this:

library(tidyverse)

dat <- tibble(
  year = rep(1:3, each = 3),
  Male = c(
    "A1", "B1", "C1",
    "A1", "B1", "C1",
    "A1", "B1", "C2"
  ),
  Female = c(
    "X1", "Y1", "Z1",
    "X1", "Y2", "Z2",
    "X1", "Y2", "Z2"
  )
)

dat |> 
  mutate(pairing = str_c(Male, "|", Female)) |> 
  add_count(pairing) |> 
  group_by(pairing) |> 
  mutate(male_state = if_else(pairing == lag(pairing), "reunited", NA_character_),
         female_state = if_else(pairing == lag(pairing), "reunited", NA_character_)) |> 
  group_by(Male) |> 
  mutate(
    male_state = if_else(row_number() == 1, "new", male_state),
    male_state = if_else(is.na(male_state), "divorced", male_state)
    ) |> 
  group_by(Female) |> 
  mutate(
    female_state = if_else(row_number() == 1, "new", female_state),
    female_state = if_else(is.na(female_state), "divorced", female_state)
  ) |> 
  arrange(year, Male) 
#> # A tibble: 9 × 7
#> # Groups:   Female [5]
#>    year Male  Female pairing     n male_state female_state
#>   <int> <chr> <chr>  <chr>   <int> <chr>      <chr>       
#> 1     1 A1    X1     A1|X1       3 new        new         
#> 2     1 B1    Y1     B1|Y1       1 new        new         
#> 3     1 C1    Z1     C1|Z1       1 new        new         
#> 4     2 A1    X1     A1|X1       3 reunited   reunited    
#> 5     2 B1    Y2     B1|Y2       2 divorced   new         
#> 6     2 C1    Z2     C1|Z2       1 divorced   new         
#> 7     3 A1    X1     A1|X1       3 reunited   reunited    
#> 8     3 B1    Y2     B1|Y2       2 reunited   reunited    
#> 9     3 C2    Z2     C2|Z2       1 new        divorced

^{Created on 2022-05-03 by the reprex package (v2.0.1)}