Home > Net >  Count the number of occurrences of an event in the past 1 year
Count the number of occurrences of an event in the past 1 year

Time:03-21

I am trying to count the number of occurences of an event within 1 year of the date of that row. I have came up with the count for number of days since the last event but cannot figure out how to continue as I need to look back 365 days, and not just look forward from the date of last dummy.

I need the counts only when level is not NA, but that is not a big issue.

dtIhave = data.table(
  id = c(rep(1,17),rep(2,13)),
  date = c(as.Date("2014-12-05"),
           as.Date("2015-01-23"),
           as.Date("2015-03-06"),
           as.Date("2015-05-15"),
           as.Date("2015-08-06"),
           as.Date("2015-10-29"),
           as.Date("2016-01-21"),
           as.Date("2016-04-06"),
           as.Date("2016-07-11"),
           as.Date("2016-10-03"),
           as.Date("2016-11-11"),
           as.Date("2016-12-07"),
           as.Date("2017-10-25"),
           as.Date("2018-01-09"),
           as.Date("2018-02-12"),
           as.Date("2018-07-04"),
           as.Date("2018-11-30"),
           as.Date("2014-05-14"),
           as.Date("2014-09-03"),
           as.Date("2014-09-04"),
           as.Date("2014-10-15"),
           as.Date("2014-11-08"),
           as.Date("2014-12-05"),
           as.Date("2014-12-18"),
           as.Date("2014-12-20"),
           as.Date("2014-12-23"),
           as.Date("2015-05-15"),
           as.Date("2015-08-19"),
           as.Date("2016-06-23"),
           as.Date("2017-04-21")),
  level = c(rnorm(10,7,1),
            NA,
            rnorm(9,7,1),
            NA,NA,
            7,
            NA,NA,NA,
            rnorm(4,7,1)), 
  dummyflag = c(rep("NA",10), 
               1,
               rep("NA",9),
               1,
               1,
               "NA",
               1,
               1,
               1,
               rep("NA",4)),
  dayssincedummy = c(rep(NA,11),
                     26,348,424,458,600,749,
                     rep(NA,4),
                     24,27,40,2,3,143,239,548,850)
  )
> dtIhave
    id       date    level dummyflag dayssincedummy
 1:  1 2014-12-05 6.804714        NA             NA
 2:  1 2015-01-23 7.502166        NA             NA
 3:  1 2015-03-06 8.477009        NA             NA
 4:  1 2015-05-15 8.215358        NA             NA
 5:  1 2015-08-06 5.331809        NA             NA
 6:  1 2015-10-29 7.176035        NA             NA
 7:  1 2016-01-21 8.660609        NA             NA
 8:  1 2016-04-06 6.296125        NA             NA
 9:  1 2016-07-11 7.429104        NA             NA
10:  1 2016-10-03 8.232085        NA             NA
11:  1 2016-11-11       NA         1             NA
12:  1 2016-12-07 7.362405        NA             26
13:  1 2017-10-25 6.918330        NA            348
14:  1 2018-01-09 6.251580        NA            424
15:  1 2018-02-12 7.755554        NA            458
16:  1 2018-07-04 7.410075        NA            600
17:  1 2018-11-30 7.082293        NA            749
18:  2 2014-05-14 6.656496        NA             NA
19:  2 2014-09-03 7.250200        NA             NA
20:  2 2014-09-04 8.224639        NA             NA
21:  2 2014-10-15       NA         1             NA
22:  2 2014-11-08       NA         1             24
23:  2 2014-12-05 7.000000        NA             27
24:  2 2014-12-18       NA         1             40
25:  2 2014-12-20       NA         1              2
26:  2 2014-12-23       NA         1              3
27:  2 2015-05-15 5.999202        NA            143
28:  2 2015-08-19 8.090059        NA            239
29:  2 2016-06-23 6.663109        NA            548
30:  2 2017-04-21 6.901531        NA            850
    id       date    level dummyflag dayssincedummy

dtIwant = data.table(
  id = c(rep(1,17),rep(2,13)),
  date = c(as.Date("2014-12-05"),
           as.Date("2015-01-23"),
           as.Date("2015-03-06"),
           as.Date("2015-05-15"),
           as.Date("2015-08-06"),
           as.Date("2015-10-29"),
           as.Date("2016-01-21"),
           as.Date("2016-04-06"),
           as.Date("2016-07-11"),
           as.Date("2016-10-03"),
           as.Date("2016-11-11"),
           as.Date("2016-12-07"),
           as.Date("2017-10-25"),
           as.Date("2018-01-09"),
           as.Date("2018-02-12"),
           as.Date("2018-07-04"),
           as.Date("2018-11-30"),
           as.Date("2014-05-14"),
           as.Date("2014-09-03"),
           as.Date("2014-09-04"),
           as.Date("2014-10-15"),
           as.Date("2014-11-08"),
           as.Date("2014-12-05"),
           as.Date("2014-12-18"),
           as.Date("2014-12-20"),
           as.Date("2014-12-23"),
           as.Date("2015-05-15"),
           as.Date("2015-08-19"),
           as.Date("2016-06-23"),
           as.Date("2017-04-21")),
  level = c(rnorm(10,7,1),
            NA,
            rnorm(9,7,1),
            NA,NA,
            7,
            NA,NA,NA,
            rnorm(4,7,1)), 
  dummyflag = c(rep("NA",10), 
                1,
                rep("NA",9),
                1,
                1,
                "NA",
                1,
                1,
                1,
                rep("NA",4)),
  dayssincedummy = c(rep(NA,11),
                     26,348,424,458,600,749,
                     rep(NA,4),
                     24,27,40,2,3,143,239,548,850),
  countofdummieswithin1yr = c(rep(0,11),
                              rep(1,2),
                              rep(0,9),
                              2,0,0,0,5,5,0,0)
                              
)

> dtIwant
    id       date    level dummyflag dayssincedummy countofdummieswithin1yr
 1:  1 2014-12-05 6.815477        NA             NA                       0
 2:  1 2015-01-23 8.378549        NA             NA                       0
 3:  1 2015-03-06 7.650022        NA             NA                       0
 4:  1 2015-05-15 5.836217        NA             NA                       0
 5:  1 2015-08-06 6.808881        NA             NA                       0
 6:  1 2015-10-29 6.538186        NA             NA                       0
 7:  1 2016-01-21 7.262269        NA             NA                       0
 8:  1 2016-04-06 7.623513        NA             NA                       0
 9:  1 2016-07-11 5.138703        NA             NA                       0
10:  1 2016-10-03 6.438781        NA             NA                       0
11:  1 2016-11-11       NA         1             NA                       0
12:  1 2016-12-07 6.678947        NA             26                       1
13:  1 2017-10-25 7.372212        NA            348                       1
14:  1 2018-01-09 7.173373        NA            424                       0
15:  1 2018-02-12 6.895957        NA            458                       0
16:  1 2018-07-04 4.507874        NA            600                       0
17:  1 2018-11-30 7.784138        NA            749                       0
18:  2 2014-05-14 8.253700        NA             NA                       0
19:  2 2014-09-03 6.471196        NA             NA                       0
20:  2 2014-09-04 6.555935        NA             NA                       0
21:  2 2014-10-15       NA         1             NA                       0
22:  2 2014-11-08       NA         1             24                       0
23:  2 2014-12-05 7.000000        NA             27                       2
24:  2 2014-12-18       NA         1             40                       0
25:  2 2014-12-20       NA         1              2                       0
26:  2 2014-12-23       NA         1              3                       0
27:  2 2015-05-15 6.703456        NA            143                       5
28:  2 2015-08-19 7.016929        NA            239                       5
29:  2 2016-06-23 7.870895        NA            548                       0
30:  2 2017-04-21 7.178562        NA            850                       0
    id       date    level dummyflag dayssincedummy countofdummieswithin1yr

typing gibberish as there is too much code in my question: ajksdfcksjadbf jklsdaakjsdhfkjsdhafkajsdfasdf asd gasd fgasdfsadfasdfas dfa dfas dfasd fa

CodePudding user response:

Try seq.Date with '-1 year' and loop over the rows using sapply.

library(data.table)
sapply(seq_len(nrow(dtIhave)), \(i) 
       dtIhave[date            
  • Related