Sample data here
dat <- structure(list(UserEmail = c("[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]"), State = c("NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW", "NSW",
"NSW"), date = structure(c(14853, 14883, 14975, 14975, 15006,
15006, 15006, 15006, 15065, 15095, 15156, 15156, 15187, 15218,
15248, 15309, 15309, 15340, 15340, 15340, 15371, 15371, 15431,
15706, 15706, 15765, 15857, 15918, 16010, 16191, 16222, 16252,
16283, 16344, 16375, 16375, 16375, 16436, 16526, 16617, 16617,
16648, 16648, 16709, 16709, 16709, 16709, 16709, 16709, 16770,
16770, 16770, 16770, 16770, 16801, 16801, 16832, 16832, 16832,
16832, 16861, 16861, 16861, 16861, 16861, 16861, 16861, 16861,
16892, 16922, 16922, 16922, 16953, 16953, 16953, 16953, 16953,
16953, 16953, 16953, 16953, 16953, 16953, 16953, 16983, 16983,
16983, 16983, 16983, 16983, 16983, 17014, 17014, 17014, 17014,
17014, 17014, 17014, 17014, 17014, 17045, 17045, 17045, 17045,
17045, 17045, 17045, 17045, 17045, 17045, 17045, 17045, 17045,
17075, 17075, 17075, 17075, 17075, 17075, 17075, 17075, 17075,
17075, 17075, 17075, 17075, 17075, 17075, 17075, 17075, 17075,
17075, 17075, 17075, 17075, 17075, 17075, 17075, 17075, 17075,
17075, 17075, 17075, 17075, 17075, 17075, 17075, 17106, 17106,
17106, 17106, 17106, 17136, 17136, 17136, 17167, 17167, 17167,
17167, 17167, 17167, 17167, 17198, 17198, 17198, 17198, 17198,
17198, 17198, 17198, 17318, 17318, 17318, 17318, 17348, 17348,
17348, 17379, 17379, 17379, 17410, 17410, 17440, 17440, 17440,
17440, 17440, 17440, 17440, 17440, 17440, 17440, 17440, 17440,
17440, 17440, 17440, 17440, 17440, 17440), class = "Date"), rhdv = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1), pindone = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0,
0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0)), row.names = c(NA, -200L), groups = structure(list(
UserEmail = c("[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]",
"[email protected]", "[email protected]", "[email protected]"
), .rows = structure(list(155L, 72L, 171:172, 66L, 30L, 174L,
c(1L, 13L), 3L, 6L, 22L, 96L, 40L, 173L, 26L, 97L, 70L,
c(164L, 167L, 168L), 57:59, 2L, 25L, c(44L, 45L, 46L,
47L, 48L, 49L, 50L, 51L, 54L, 55L, 71L, 84L, 87L, 89L,
90L, 91L, 94L, 95L, 102L, 105L, 110L, 111L, 112L, 113L,
149L, 157L, 158L, 159L, 175L, 179L), 61L, 56L, c(15L,
16L, 17L, 18L, 19L, 21L), 98L, 115:147, 4L, 60L, 8L,
24L, 154L, 177L, 5L, 34L, 38L, 176L, 28L, 150L, 148L,
14L, 35:37, 183L, 12L, 7L, c(153L, 156L), 62:65, 23L,
c(178L, 182L), 33L, 103L, 32L, 43L, c(41L, 67L, 68L),
108:109, 11L, 180L, 165L, 73:83, 69L, 9L, c(181L, 190L,
191L, 192L, 193L, 194L, 195L, 196L, 197L, 198L, 199L,
200L), 29L, 31L, c(85L, 86L, 88L, 93L, 101L, 104L, 106L,
107L, 114L, 151L, 160L, 161L, 162L, 163L, 166L, 170L),
184:189, 100L, 169L, 52L, 10L, 27L, 99L, 20L, 152L, c(42L,
53L, 92L), 39L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 75L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
Question: For each dat$rhdv
record within unique groupings of dat$UserEmail
I would like to identify all instances within a 12 month window either side of the date of the dat$rhdv
record where dat$pindone == 1
. The solution I'm looking for is a new column appended to dat
, dat$pindone12
, which indicates 0
if the above is false or 1
if the above is true (i.e if true then there should be a dat$pindone == 1
record occurring within 12 months before or after a dat$rhdv == 1
record for that specific dat$UserEmail
).
I have been through may iterations of attempted solutions without any success to date. I suspect the beginnings of the solution are
dat$pindone12 <- dat %>% group_by(UserEmail) %>% ifelse(rhdv == 1 & *condition to refer to required date range across any rows* & pindone == 1, 1, 0)
CodePudding user response:
It's probably easier to break it down into steps, i.e. work out the 'rhdv - 12mo' and 'rhdv 12mo' then check whether the pindone's fall within the timeframe. Not sure if this solution will work on your actual data, but perhaps:
library(tidyverse)
library(lubridate)
dat2 <- dat %>%
group_by(UserEmail) %>%
mutate(Start_date = ifelse(rhdv == 1, date - years(1), NA),
End_date = ifelse(rhdv == 1, date years(1), NA)) %>%
mutate(Start_date = as.Date(Start_date, origin = "1970-01-01"),
End_date = as.Date(End_date, origin = "1970-01-01")) %>%
fill(c(Start_date, End_date), .direction = "downup") %>%
mutate(pindone12 = ifelse(date > Start_date &
date < End_date &
pindone == 1,
1, 0))
dat2 %>%
arrange(desc(pindone12))
#> # A tibble: 200 × 8
#> # Groups: UserEmail [75]
#> UserEmail State date rhdv pindone Start_date End_date pindone12
#> <chr> <chr> <date> <dbl> <dbl> <date> <date> <dbl>
#> 1 samuel.porter… NSW 2016-06-01 0 1 2015-06-01 2017-06-01 1
#> 2 samuel.porter… NSW 2016-06-01 1 1 2015-06-01 2017-06-01 1
#> 3 samuel.porter… NSW 2016-06-01 0 1 2015-06-01 2017-06-01 1
#> 4 samuel.porter… NSW 2016-06-01 0 1 2015-06-01 2017-06-01 1
#> 5 samuel.porter… NSW 2016-06-01 0 1 2015-06-01 2017-06-01 1
#> 6 samuel.porter… NSW 2016-06-01 0 1 2015-06-01 2017-06-01 1
#> 7 benradic@gmai… NSW 2010-09-01 0 0 NA NA 0
#> 8 gettrapped@op… NSW 2010-10-01 0 0 NA NA 0
#> 9 bradley.grove… NSW 2011-01-01 0 0 NA NA 0
#> 10 jimt@tadaust.… NSW 2011-01-01 0 0 NA NA 0
#> # … with 190 more rows
Created on 2022-06-21 by the reprex package (v2.0.1)
This shows a single UserEmail with 6 pindone12 records; is this what you're expecting to see?
CodePudding user response:
jared_mamrot solution looks good. I read your directions as: within each group of UserEmail, if a record has rhdv ==1, find records within a year of that date and if any of those other records have pindone ==1, that particular record gets pindone12 = 1, otherwise 0. I think jared's solution is giving you pindone12 = 1 if pindone for that record == 1. Also, his solution is giving pindon12 = 1 for records with rhdv == 0. If that's what you want, great. Based on my understanding:
library(purrr)
library(dplyr)
# Group and nest data
dat %>%
group_by(UserEmail) %>%
nest() %>%
# pmap to retain UserEmail column
purrr::pmap_dfr( function(UserEmail, data){
data %>%
# tmp data to index within each group
{. -> tmp
lapply(1:nrow(tmp), function(i){
# If more than one record, otherwise pindone12 is 0 because no other records with pindone == 1
if(nrow(tmp) > 1) {
tmp %>%
# For each record get other records within one year
filter(date <= (date[i] 365) & date >= (date[i] - 365)) %>%
# Find other records that have pindone == 1 and that record has rhdv == 1
dplyr::mutate(pindone12 = ifelse(any(pindone[-i] == 1 & rhdv[i] == 1), 1, 0),
email = UserEmail) %>%
# Retain that one record
dplyr::slice(i)
} else {
tmp %>%
mutate(pindone12 = 0,
email = UserEmail)
}
})
} %>%
bind_rows
})
When I then filter by those records with pindone12 == 1
... %>% filter(pindone12 == 1)
# A tibble: 6 × 6
State date rhdv pindone pindone12 email
<chr> <date> <dbl> <dbl> <dbl> <chr>
1 NSW 2016-06-01 1 1 1 [email protected]…
2 NSW 2016-06-01 1 0 1 [email protected]…
3 NSW 2016-06-01 1 0 1 [email protected]…
4 NSW 2016-06-01 1 0 1 [email protected]…
5 NSW 2016-06-01 1 0 1 [email protected]…
6 NSW 2016-06-01 1 0 1 [email protected]…