Error in UseMethod("summarise") : no applicable method for 'summarise' applied t-CodePudding

I'm trying to analyse data with dates.

Here is the code for my data:

structure(list(id = c(1L, 1L, 1L, 2L, 2L, 2L, 6L, 6L, 6L, 6L, 6L, 6L), drug = c("b", "b", "c", "b", "b", "c", "a", "a", "a", "a", "a", "b"), hospital = c(142953L, 142953L, 142953L, 12035L, 12035L, 12035L, 133163L, 133163L, 133163L, 133163L, 133163L, 133163L), start_date.y = structure(c(12173, 12204, 12753, 12311, 12341, 12400, 12877, 12907, 12938, 13091, 13121, 13152), class = "Date"), total.price = c(100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L), Diff = c(89L, 31L, 549L, 0L, 30L, 59L, 31L, 30L, 31L, 153L, 30L, 31L), discontinuation = c("0", "0", "1", "0", "0", "0", "0", "0", "0", "1", "0", "0")), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA, -12L), groups = structure(list(id = c(1L, 2L, 6L), .rows = structure(list(1:3, 4:6, 7:12), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -3L), .drop = TRUE))

What I'm trying to calculate is the start date and end date of the drug prescriptions for each id.
First, I grouped data by "id" and "drug" variables.

If the variable 'discontinuation' ==1, the person's end date will be the discontinuation date.
If the variable 'discontinuation' == 0, the person's end date will be the last date of the prescriptions(max(start_date)).

I tried to calculate this by the code below.

bio_exp_dc <-bio_exp_dc %>% group_by(id) %>% summarise(start=min(start_date.y,na.rm = TRUE),end= ifelse(discontinuation==1,start_date.y,max(start_date.y)))

However, the following error occured:

>Error in UseMethod("summarise") :   
  no applicable method for 'summarise' applied to an object of class "Date"   
In addition: Warning message:   
In min.default(c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,  :
  no non-missing arguments to min; returning Inf

CodePudding user response：

You could try aggregate with range. We first get numbers (days since onset of UNIX epoch) but may easily restore "Date" format.

bio <- transform(bio, disc=ave(discontinuation, id, drug, FUN=cumsum))
a <- aggregate(start_date.y ~ id   drug, bio[bio$disc == 0, ], range) |> do.call(what=data.frame)
dt <- grep('date', names(a))
a[dt] <- lapply(a[dt], as.Date, origin='1970-01-01')
a
#   id drug start_date.y.1 start_date.y.2
# 1  6    a     2005-04-04     2005-06-04
# 2  1    b     2003-05-01     2003-06-01
# 3  2    b     2003-09-16     2003-10-16
# 4  6    b     2006-01-04     2006-01-04
# 5  2    c     2003-12-14     2003-12-14

Data:

bio <- structure(list(id = c(1L, 1L, 1L, 2L, 2L, 2L, 6L, 6L, 6L, 6L, 
6L, 6L), drug = c("b", "b", "c", "b", "b", "c", "a", "a", "a", 
"a", "a", "b"), hospital = c(142953L, 142953L, 142953L, 12035L, 
12035L, 12035L, 133163L, 133163L, 133163L, 133163L, 133163L, 
133163L), start_date.y = structure(c(12173, 12204, 12753, 12311, 
12341, 12400, 12877, 12907, 12938, 13091, 13121, 13152), class = "Date"), 
    total.price = c(100L, 100L, 100L, 100L, 100L, 100L, 100L, 
    100L, 100L, 100L, 100L, 100L), Diff = c(89L, 31L, 549L, 0L, 
    30L, 59L, 31L, 30L, 31L, 153L, 30L, 31L), discontinuation = c("0", 
    "0", "1", "0", "0", "0", "0", "0", "0", "1", "0", "0")), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -12L), groups = structure(list(
    id = c(1L, 2L, 6L), .rows = structure(list(1:3, 4:6, 7:12), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -3L), .drop = TRUE))

CodePudding user response：

df %>%
  group_by(id, drug) %>%
  summarise(
    start = min(start_date.y, na.rm = TRUE),
    end = if_else(any(discontinuation == 1), start_date.y[match(1, discontinuation)], max(start_date.y))
  )

# A tibble: 6 × 4
# Groups:   id [3]
     id drug  start      end       
  <int> <chr> <date>     <date>    
1     1 b     2003-05-01 2003-06-01
2     1 c     2004-12-01 2004-12-01
3     2 b     2003-09-16 2003-10-16
4     2 c     2003-12-14 2003-12-14
5     6 a     2005-04-04 2005-11-04
6     6 b     2006-01-04 2006-01-04