Home > database >  ifelse applied to multiple rows defined by date range
ifelse applied to multiple rows defined by date range

Time:06-21

Sample data here

dat <- structure(list(UserEmail = c("[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]", "[email protected]", 
"[email protected]"), State = c("NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", 
"NSW"), date = structure(c(14853, 14883, 14975, 14975, 15006, 
15006, 15006, 15006, 15065, 15095, 15156, 15156, 15187, 15218, 
15248, 15309, 15309, 15340, 15340, 15340, 15371, 15371, 15431, 
15706, 15706, 15765, 15857, 15918, 16010, 16191, 16222, 16252, 
16283, 16344, 16375, 16375, 16375, 16436, 16526, 16617, 16617, 
16648, 16648, 16709, 16709, 16709, 16709, 16709, 16709, 16770, 
16770, 16770, 16770, 16770, 16801, 16801, 16832, 16832, 16832, 
16832, 16861, 16861, 16861, 16861, 16861, 16861, 16861, 16861, 
16892, 16922, 16922, 16922, 16953, 16953, 16953, 16953, 16953, 
16953, 16953, 16953, 16953, 16953, 16953, 16953, 16983, 16983, 
16983, 16983, 16983, 16983, 16983, 17014, 17014, 17014, 17014, 
17014, 17014, 17014, 17014, 17014, 17045, 17045, 17045, 17045, 
17045, 17045, 17045, 17045, 17045, 17045, 17045, 17045, 17045, 
17075, 17075, 17075, 17075, 17075, 17075, 17075, 17075, 17075, 
17075, 17075, 17075, 17075, 17075, 17075, 17075, 17075, 17075, 
17075, 17075, 17075, 17075, 17075, 17075, 17075, 17075, 17075, 
17075, 17075, 17075, 17075, 17075, 17075, 17075, 17106, 17106, 
17106, 17106, 17106, 17136, 17136, 17136, 17167, 17167, 17167, 
17167, 17167, 17167, 17167, 17198, 17198, 17198, 17198, 17198, 
17198, 17198, 17198, 17318, 17318, 17318, 17318, 17348, 17348, 
17348, 17379, 17379, 17379, 17410, 17410, 17440, 17440, 17440, 
17440, 17440, 17440, 17440, 17440, 17440, 17440, 17440, 17440, 
17440, 17440, 17440, 17440, 17440, 17440), class = "Date"), rhdv = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 
1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1), pindone = c(0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 
0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 
0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 
0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0)), row.names = c(NA, -200L), groups = structure(list(
    UserEmail = c("[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]", 
    "[email protected]", "[email protected]", "[email protected]"
    ), .rows = structure(list(155L, 72L, 171:172, 66L, 30L, 174L, 
        c(1L, 13L), 3L, 6L, 22L, 96L, 40L, 173L, 26L, 97L, 70L, 
        c(164L, 167L, 168L), 57:59, 2L, 25L, c(44L, 45L, 46L, 
        47L, 48L, 49L, 50L, 51L, 54L, 55L, 71L, 84L, 87L, 89L, 
        90L, 91L, 94L, 95L, 102L, 105L, 110L, 111L, 112L, 113L, 
        149L, 157L, 158L, 159L, 175L, 179L), 61L, 56L, c(15L, 
        16L, 17L, 18L, 19L, 21L), 98L, 115:147, 4L, 60L, 8L, 
        24L, 154L, 177L, 5L, 34L, 38L, 176L, 28L, 150L, 148L, 
        14L, 35:37, 183L, 12L, 7L, c(153L, 156L), 62:65, 23L, 
        c(178L, 182L), 33L, 103L, 32L, 43L, c(41L, 67L, 68L), 
        108:109, 11L, 180L, 165L, 73:83, 69L, 9L, c(181L, 190L, 
        191L, 192L, 193L, 194L, 195L, 196L, 197L, 198L, 199L, 
        200L), 29L, 31L, c(85L, 86L, 88L, 93L, 101L, 104L, 106L, 
        107L, 114L, 151L, 160L, 161L, 162L, 163L, 166L, 170L), 
        184:189, 100L, 169L, 52L, 10L, 27L, 99L, 20L, 152L, c(42L, 
        53L, 92L), 39L), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, 75L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

Question: For each dat$rhdv record within unique groupings of dat$UserEmail I would like to identify all instances within a 12 month window either side of the date of the dat$rhdv record where dat$pindone == 1. The solution I'm looking for is a new column appended to dat, dat$pindone12, which indicates 0 if the above is false or 1 if the above is true (i.e if true then there should be a dat$pindone == 1 record occurring within 12 months before or after a dat$rhdv == 1 record for that specific dat$UserEmail).

I have been through may iterations of attempted solutions without any success to date. I suspect the beginnings of the solution are

dat$pindone12 <- dat %>% group_by(UserEmail) %>% ifelse(rhdv == 1 & *condition to refer to required date range across any rows* & pindone == 1, 1, 0) 

CodePudding user response:

It's probably easier to break it down into steps, i.e. work out the 'rhdv - 12mo' and 'rhdv 12mo' then check whether the pindone's fall within the timeframe. Not sure if this solution will work on your actual data, but perhaps:

library(tidyverse)
library(lubridate)

dat2 <- dat %>%
  group_by(UserEmail) %>%
  mutate(Start_date = ifelse(rhdv == 1, date - years(1), NA),
         End_date = ifelse(rhdv == 1, date   years(1), NA)) %>%
  mutate(Start_date = as.Date(Start_date, origin = "1970-01-01"),
         End_date = as.Date(End_date, origin = "1970-01-01")) %>%
  fill(c(Start_date, End_date), .direction = "downup") %>%
  mutate(pindone12 = ifelse(date > Start_date &
                              date < End_date &
                              pindone == 1,
                            1, 0))
dat2 %>%
  arrange(desc(pindone12))
#> # A tibble: 200 × 8
#> # Groups:   UserEmail [75]
#>    UserEmail      State date        rhdv pindone Start_date End_date   pindone12
#>    <chr>          <chr> <date>     <dbl>   <dbl> <date>     <date>         <dbl>
#>  1 samuel.porter… NSW   2016-06-01     0       1 2015-06-01 2017-06-01         1
#>  2 samuel.porter… NSW   2016-06-01     1       1 2015-06-01 2017-06-01         1
#>  3 samuel.porter… NSW   2016-06-01     0       1 2015-06-01 2017-06-01         1
#>  4 samuel.porter… NSW   2016-06-01     0       1 2015-06-01 2017-06-01         1
#>  5 samuel.porter… NSW   2016-06-01     0       1 2015-06-01 2017-06-01         1
#>  6 samuel.porter… NSW   2016-06-01     0       1 2015-06-01 2017-06-01         1
#>  7 benradic@gmai… NSW   2010-09-01     0       0 NA         NA                 0
#>  8 gettrapped@op… NSW   2010-10-01     0       0 NA         NA                 0
#>  9 bradley.grove… NSW   2011-01-01     0       0 NA         NA                 0
#> 10 jimt@tadaust.… NSW   2011-01-01     0       0 NA         NA                 0
#> # … with 190 more rows

Created on 2022-06-21 by the reprex package (v2.0.1)

This shows a single UserEmail with 6 pindone12 records; is this what you're expecting to see?

CodePudding user response:

jared_mamrot solution looks good. I read your directions as: within each group of UserEmail, if a record has rhdv ==1, find records within a year of that date and if any of those other records have pindone ==1, that particular record gets pindone12 = 1, otherwise 0. I think jared's solution is giving you pindone12 = 1 if pindone for that record == 1. Also, his solution is giving pindon12 = 1 for records with rhdv == 0. If that's what you want, great. Based on my understanding:

library(purrr)
library(dplyr)
# Group and nest data
dat %>% 
  group_by(UserEmail) %>% 
  nest() %>% 
  # pmap to retain UserEmail column
  purrr::pmap_dfr( function(UserEmail, data){
    data %>% 
      # tmp data to index within each group
      {. -> tmp
        lapply(1:nrow(tmp), function(i){
          # If more than one record, otherwise pindone12 is 0 because no other records with pindone == 1
          if(nrow(tmp) > 1) {
            tmp %>% 
              # For each record get other records within one year
              filter(date <= (date[i]  365) & date >= (date[i] - 365)) %>% 
              # Find other records that have pindone == 1 and that record has rhdv == 1
              dplyr::mutate(pindone12 = ifelse(any(pindone[-i] == 1 & rhdv[i] == 1), 1, 0),
                            email = UserEmail) %>% 
              # Retain that one record
              dplyr::slice(i)
          } else {
            tmp %>% 
              mutate(pindone12 = 0,
                     email = UserEmail)
          }
        })
      } %>% 
      bind_rows
  })

When I then filter by those records with pindone12 == 1

... %>% filter(pindone12 == 1)
# A tibble: 6 × 6
  State date        rhdv pindone pindone12 email                             
  <chr> <date>     <dbl>   <dbl>     <dbl> <chr>                             
1 NSW   2016-06-01     1       1         1 [email protected]…
2 NSW   2016-06-01     1       0         1 [email protected]…
3 NSW   2016-06-01     1       0         1 [email protected]…
4 NSW   2016-06-01     1       0         1 [email protected]…
5 NSW   2016-06-01     1       0         1 [email protected]…
6 NSW   2016-06-01     1       0         1 [email protected]
  •  Tags:  
  • r
  • Related