Home > Enterprise >  Check if date intervals overlap
Check if date intervals overlap

Time:09-19

I have a following dataframe in which each client has a different program period. I am trying to create a loop which would check if a client participated in a program during a chosen period (monthly frequency).

Client <- c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L") 
EntryDate <- as.Date(c("2004-01-01","2004-02-11", "2004-03-21","2004-04-14","2004-05-22","2004-05-08","2004-07-20",
                  "2004-07-28","2004-09-30","2004-10-25","2004-11-28","2004-12-30"))
ExitDate <- as.Date(c("2005-01-22","2004-03-01", "2004-05-21","2004-04-28","2004-08-09","2004-08-08","2005-06-20",
                       "2004-10-28","2004-10-30","2004-11-01","2004-12-28","2005-12-30"))
df <- data.frame(Client,EntryDate, ExitDate)
df$interval_client = interval(df$EntryDate,df$ExitDate)

   Client  EntryDate   ExitDate                interval_client
1       A 2004-01-01 2005-01-22 2004-01-01 UTC--2005-01-22 UTC
2       B 2004-02-11 2004-03-01 2004-02-11 UTC--2004-03-01 UTC
3       C 2004-03-21 2004-05-21 2004-03-21 UTC--2004-05-21 UTC
4       D 2004-04-14 2004-04-28 2004-04-14 UTC--2004-04-28 UTC
5       E 2004-05-22 2004-08-09 2004-05-22 UTC--2004-08-09 UTC
6       F 2004-05-08 2004-08-08 2004-05-08 UTC--2004-08-08 UTC
7       G 2004-07-20 2005-06-20 2004-07-20 UTC--2005-06-20 UTC
8       H 2004-07-28 2004-10-28 2004-07-28 UTC--2004-10-28 UTC
9       I 2004-09-30 2004-10-30 2004-09-30 UTC--2004-10-30 UTC
10      J 2004-10-25 2004-11-01 2004-10-25 UTC--2004-11-01 UTC
11      K 2004-11-28 2004-12-28 2004-11-28 UTC--2004-12-28 UTC
12      L 2004-12-30 2005-12-30 2004-12-30 UTC--2005-12-30 UTC

Basically, I want to see in which month (during a period from January 2004 to December 2005) a particular client participated in a program. I tried different codes but none got me needed results.

years <- 2004:2005
months <- 1:12

# option 1
for (y in years){
  for (m in months){
    df[ ,paste0(y, '-', m)] <- ifelse(df$interval_client %within% interval(ymd(paste0(y, '-', m, '-', 1)), ceiling_date(ymd(paste0(y, '-', m, '-', 1)),"month")-1), 1, 0)
  }
}

# option 2
for (y in years){
  for (m in months){
    df[ ,paste0(y, '-', m)] <- ifelse(df$interval_client >= as.Date(paste0(y, '-', m, '-', 1), '%Y-%m-%d') & df$interval_client <= ceiling_date(as.Date(paste0(y, '-', m, '-', 1), '%Y-%m%d'),"month")-1 ,1, 0)
  }
}

I want to have something like this (showing only the first 3 rows)

Client  EntryDate   ExitDate                interval_client   2004-1 2004-2 2004-3 2004-4 2004-5 2004-6 2004-7 2004-8 2004-9
1      A 2004-01-01 2005-01-22 2004-01-01 UTC--2005-01-22 UTC      1      1      1      1      1      1      1      1      1
2      B 2004-02-11 2004-03-01 2004-02-11 UTC--2004-03-01 UTC      0      1      1      0      0      0      0      0      0
3      C 2004-03-21 2004-05-21 2004-03-21 UTC--2004-05-21 UTC      0      0      1      1      1      0      0      0      0
   2004-10 2004-11 2004-12 2005-1 2005-2 2005-3 2005-4 2005-5 2005-6 2005-7 2005-8 2005-9 2005-10  2005-11 2005-12
1       1       1       1      1      0      0       0       0       0       0       0      0       0       0       0
2       0       0       0      0      0      0       0       0       0       0       0      0       0       0       0
3       0       0       0      0      0      0       0       0       0       0       0      0       0       0       0

CodePudding user response:

This is could be an option. I don't need interval_client, just putting a condition between ExitDate and EntryDate.

for (y in years){
  for (m in months){
    df[ ,paste0(y, '-', m)] <- ifelse(ExitDate >= ym(paste0(y, '-', m)) & 
                                      EntryDate <= ym(paste0(y, '-', m)), 
                                      1, 0)
  }
}

#Client  EntryDate   ExitDate                interval_client 2004-1 2004-2 2004-3 2004-4 2004-5 2004-6 2004-7 2004-8 2004-9 2004-10
#1       A 2004-01-01 2005-01-22 2004-01-01 UTC--2005-01-22 UTC      1      1      1      1      1      1      1      1      1       1
#2       B 2004-02-11 2004-03-01 2004-02-11 UTC--2004-03-01 UTC      0      0      1      0      0      0      0      0      0       0
#3       C 2004-03-21 2004-05-21 2004-03-21 UTC--2004-05-21 UTC      0      0      0      1      1      0      0      0      0       0
#4       D 2004-04-14 2004-04-28 2004-04-14 UTC--2004-04-28 UTC      0      0      0      0      0      0      0      0      0       0
#5       E 2004-05-22 2004-08-09 2004-05-22 UTC--2004-08-09 UTC      0      0      0      0      0      1      1      1      0       0
#6       F 2004-05-08 2004-08-08 2004-05-08 UTC--2004-08-08 UTC      0      0      0      0      0      1      1      1      0       0
#7       G 2004-07-20 2005-06-20 2004-07-20 UTC--2005-06-20 UTC      0      0      0      0      0      0      0      1      1       1
#8       H 2004-07-28 2004-10-28 2004-07-28 UTC--2004-10-28 UTC      0      0      0      0      0      0      0      1      1       1
#9       I 2004-09-30 2004-10-30 2004-09-30 UTC--2004-10-30 UTC      0      0      0      0      0      0      0      0      0       1
#10      J 2004-10-25 2004-11-01 2004-10-25 UTC--2004-11-01 UTC      0      0      0      0      0      0      0      0      0       0
#11      K 2004-11-28 2004-12-28 2004-11-28 UTC--2004-12-28 UTC      0      0      0      0      0      0      0      0      0       0
#12      L 2004-12-30 2005-12-30 2004-12-30 UTC--2005-12-30 UTC      0      0      0      0      0      0      0      0      0       0

CodePudding user response:

Here is another approach with data.table that should give a similar result. You can create a sequence of months from the entry and exit dates (using floor_date to start with first day of the month). Then use dcast to put into wide format of year-month.

library(lubridate)
library(data.table)

setDT(df)[ , list(Client = Client, month = format(seq(floor_date(EntryDate, "month"), floor_date(ExitDate, "month"), by = "month"), format = "%Y-%m")), by = 1:nrow(df)][
  , dcast(.SD, Client ~ month, fun = length)
]

Output

    Client 2004-01 2004-02 2004-03 2004-04 2004-05 2004-06 2004-07 2004-08 2004-09 2004-10 2004-11 2004-12 2005-01 2005-02 2005-03 2005-04 2005-05 2005-06 2005-07 2005-08 2005-09
 1:      A       1       1       1       1       1       1       1       1       1       1       1       1       1       0       0       0       0       0       0       0       0
 2:      B       0       1       1       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
 3:      C       0       0       1       1       1       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
 4:      D       0       0       0       1       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
 5:      E       0       0       0       0       1       1       1       1       0       0       0       0       0       0       0       0       0       0       0       0       0
 6:      F       0       0       0       0       1       1       1       1       0       0       0       0       0       0       0       0       0       0       0       0       0
 7:      G       0       0       0       0       0       0       1       1       1       1       1       1       1       1       1       1       1       1       0       0       0
 8:      H       0       0       0       0       0       0       1       1       1       1       0       0       0       0       0       0       0       0       0       0       0
 9:      I       0       0       0       0       0       0       0       0       1       1       0       0       0       0       0       0       0       0       0       0       0
10:      J       0       0       0       0       0       0       0       0       0       1       1       0       0       0       0       0       0       0       0       0       0
11:      K       0       0       0       0       0       0       0       0       0       0       1       1       0       0       0       0       0       0       0       0       0
12:      L       0       0       0       0       0       0       0       0       0       0       0       1       1       1       1       1       1       1       1       1       1
    2005-10 2005-11 2005-12
 1:       0       0       0
 2:       0       0       0
 3:       0       0       0
 4:       0       0       0
 5:       0       0       0
 6:       0       0       0
 7:       0       0       0
 8:       0       0       0
 9:       0       0       0
10:       0       0       0
11:       0       0       0
12:       1       1       1
  • Related