Home > Software design >  Get date last condition by group
Get date last condition by group

Time:11-01

Consider this data frame:

   idPerson idAppt decision date      
 1 A             1 a        2021-09-10
 2 A             1 b        2021-09-11
 3 A             1 c        2021-09-12
 4 A             1 d        2021-09-13
 5 A             2 a        2021-09-20
 6 A             2 b        2021-09-21
 7 A             3 a        2021-09-10
 8 A             3 b        2021-09-11
 9 B             1 a        2021-09-10
10 B             1 b        2021-09-11
11 B             1 c        2021-09-12
12 B             1 d        2021-09-13
13 B             2 a        2021-09-11
14 B             2 b        2021-09-12
15 B             3 a        2021-09-14
16 B             3 b        2021-09-15

For each groups of idPerson and idAppt, I'd like to get a date2 column, but with certain conditions:

  • For any groups (idPerson x idAppt) whose decision == "a" starts later than the date of decision == "d" of any other idAppt group with the same idPerson, report the date when decision == "d" of that group.
  • For any other group that do not meet this requirement, date2 should be the earliest date for this given idPerson.

Which yields this data frame:

   idPerson idAppt decision date       date2     
 1 A             1 a        2021-09-10 2021-09-10
 2 A             1 b        2021-09-11 2021-09-10
 3 A             1 c        2021-09-12 2021-09-10
 4 A             1 d        2021-09-13 2021-09-10
 5 A             2 a        2021-09-20 2021-09-13
 6 A             2 b        2021-09-21 2021-09-13
 7 A             3 a        2021-09-10 2021-09-10
 8 A             3 b        2021-09-11 2021-09-10
 9 B             1 a        2021-09-10 2021-09-10
10 B             1 b        2021-09-11 2021-09-10
11 B             1 c        2021-09-12 2021-09-10
12 B             1 d        2021-09-13 2021-09-10
13 B             2 a        2021-09-11 2021-09-10
14 B             2 b        2021-09-12 2021-09-10
15 B             3 a        2021-09-14 2021-09-13
16 B             3 b        2021-09-15 2021-09-13

Data:

df <- structure(list(idPerson = c("A", "A", "A", "A", "A", "A", "A", 
"A", "B", "B", "B", "B", "B", "B", "B", "B"), idAppt = c(1L, 
1L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L), 
    decision = c("a", "b", "c", "d", "a", "b", "a", "b", "a", 
    "b", "c", "d", "a", "b", "a", "b"), date = structure(c(18880, 
    18881, 18882, 18883, 18890, 18891, 18880, 18881, 18880, 18881, 
    18882, 18883, 18881, 18882, 18884, 18885), class = "Date")), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -16L))

EO <- structure(list(idPerson = c("A", "A", "A", "A", "A", "A", "A", 
"A", "B", "B", "B", "B", "B", "B", "B", "B"), idAppt = c(1L, 
1L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L), 
    decision = c("a", "b", "c", "d", "a", "b", "a", "b", "a", 
    "b", "c", "d", "a", "b", "a", "b"), date = structure(c(18880, 
    18881, 18882, 18883, 18890, 18891, 18880, 18881, 18880, 18881, 
    18882, 18883, 18881, 18882, 18884, 18885), class = "Date"), 
    date2 = c("2021-09-10", "2021-09-10", "2021-09-10", "2021-09-10", 
    "2021-09-13", "2021-09-13", "2021-09-10", "2021-09-10", "2021-09-10", 
    "2021-09-10", "2021-09-10", "2021-09-10", "2021-09-10", "2021-09-10", 
    "2021-09-13", "2021-09-13")), row.names = c(NA, -16L), class = c("tbl_df", 
"tbl", "data.frame"))

CodePudding user response:

df %>%
  group_by(idPerson) %>%
  mutate(d_date = min(date[decision == "d"]), min_date_person = min(date)) %>%
  group_by(idPerson, idAppt) %>%
  mutate(date2 = if_else(date[decision == "a"] > d_date, d_date, min_date_person)) %>%
  ungroup()
# # A tibble: 16 × 7
#    idPerson idAppt decision date       d_date     min_date_person date2     
#    <chr>     <int> <chr>    <date>     <date>     <date>          <date>    
#  1 A             1 a        2021-09-10 2021-09-13 2021-09-10      2021-09-10
#  2 A             1 b        2021-09-11 2021-09-13 2021-09-10      2021-09-10
#  3 A             1 c        2021-09-12 2021-09-13 2021-09-10      2021-09-10
#  4 A             1 d        2021-09-13 2021-09-13 2021-09-10      2021-09-10
#  5 A             2 a        2021-09-20 2021-09-13 2021-09-10      2021-09-13
#  6 A             2 b        2021-09-21 2021-09-13 2021-09-10      2021-09-13
#  7 A             3 a        2021-09-10 2021-09-13 2021-09-10      2021-09-10
#  8 A             3 b        2021-09-11 2021-09-13 2021-09-10      2021-09-10
#  9 B             1 a        2021-09-10 2021-09-13 2021-09-10      2021-09-10
# 10 B             1 b        2021-09-11 2021-09-13 2021-09-10      2021-09-10
# 11 B             1 c        2021-09-12 2021-09-13 2021-09-10      2021-09-10
# 12 B             1 d        2021-09-13 2021-09-13 2021-09-10      2021-09-10
# 13 B             2 a        2021-09-11 2021-09-13 2021-09-10      2021-09-10
# 14 B             2 b        2021-09-12 2021-09-13 2021-09-10      2021-09-10
# 15 B             3 a        2021-09-14 2021-09-13 2021-09-10      2021-09-13
# 16 B             3 b        2021-09-15 2021-09-13 2021-09-10      2021-09-13

CodePudding user response:

#Select earlist date for decision d by idPerson
df_d <- df%>%
  filter(decision=="d")%>%
  group_by(idPerson)%>%
  arrange(date)%>%
  slice(1)%>%
  rename(earliest_d=date)%>%
  select(idPerson,earliest_d)%>%
  ungroup

#Select earlist date for decision a
df_a <- df%>%
  filter(decision=="a")%>%
  rename(earliest_a=date)%>%
  select(-decision)

# merge above datasets with df, create date2 by given conditions
df2 <- df%>%
  base::merge(df_d,by=c("idPerson"))%>%
  base::merge(df_a,by=c("idPerson","idAppt"))%>%
  mutate(date2=case_when(earliest_d>earliest_a~earliest_a,
                         T~earliest_d))%>%
  select(-earliest_d,-earliest_a)
df2

Output:

> df2
   idPerson idAppt decision       date      date2
1         A      1        a 2021-09-10 2021-09-10
2         A      1        b 2021-09-11 2021-09-10
3         A      1        c 2021-09-12 2021-09-10
4         A      1        d 2021-09-13 2021-09-10
5         A      2        a 2021-09-20 2021-09-13
6         A      2        b 2021-09-21 2021-09-13
7         A      3        a 2021-09-10 2021-09-10
8         A      3        b 2021-09-11 2021-09-10
9         B      1        a 2021-09-10 2021-09-10
10        B      1        b 2021-09-11 2021-09-10
11        B      1        c 2021-09-12 2021-09-10
12        B      1        d 2021-09-13 2021-09-10
13        B      2        a 2021-09-11 2021-09-11
14        B      2        b 2021-09-12 2021-09-11
15        B      3        a 2021-09-14 2021-09-13
16        B      3        b 2021-09-15 2021-09-13
  • Related