I have a DF_1
that shows the hospital admission date (date and time), the hospital discharge date (date and time) and whether the patient is a hospital readmission less than 30 days. Look:
ID <- c(111,222,222,333,444,444,555,666,1010,1010,1010)
PATIENT_ADMISSION <- c('18/03/2022 15:30','24/03/2022 12:28','27/03/2022 01:38','31/03/2022 08:53','16/04/2022 22:45','22/04/2022 13:15','05/04/2022 05:44','30/03/2022 06:16','10/01/2022 17:30','16/03/2022 22:00','08/04/2022 14:49')
PATIENT_DISCHARGE <- c('01/04/2022 11:20','26/03/2022 12:56','27/03/2022 17:52','01/04/2022 16:15','17/04/2022 12:26','25/04/2022 14:54','05/04/2022 11:44','07/04/2022 05:23','12/01/2022 06:35','06/04/2022 11:35','12/04/2022 12:36')
PATIENT_READMISSION_30D <- c('N','N','Y','N','N','Y','N','N','N','N','Y')
DF_1 <- data.frame(ID,PATIENT_ADMISSION,PATIENT_DISCHARGE,PATIENT_READMISSION_30D)
I want to include one more information in DF_1
: I want to know if this readmission (PATIENT_READMISSION_30D = Y) happened within 72 hours. Thus, my DF_1
would have one more variable and would be presented as follows:
ID <- c(111,222,222,333,444,444,555,666,1010,1010,1010)
PATIENT_ADMISSION <- c('18/03/2022 15:30','24/03/2022 12:28','27/03/2022 01:38','31/03/2022 08:53','16/04/2022 22:45','22/04/2022 13:15','05/04/2022 05:44','30/03/2022 06:16','10/01/2022 17:30','16/03/2022 22:00','08/04/2022 14:49')
PATIENT_DISCHARGE <- c('01/04/2022 11:20','26/03/2022 12:56','27/03/2022 17:52','01/04/2022 16:15','17/04/2022 12:26','25/04/2022 14:54','05/04/2022 11:44','07/04/2022 05:23','12/01/2022 06:35','06/04/2022 11:35','12/04/2022 12:36')
PATIENT_READMISSION_30D <- c('N','N','Y','N','N','Y','N','N','N','N','Y')
PATIENT_READMISSION_72H <- c('','','Y','','','N','','','','','Y')
DF_1 <- data.frame(ID,PATIENT_ADMISSION,PATIENT_DISCHARGE,PATIENT_READMISSION_30D,PATIENT_READMISSION_72H)
Therefore, I would like to know how it is possible to check and include this new variable.
CodePudding user response:
Based on readmission since the last discharge:
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(lubridate))
ID <- c(111,222,222,333,444,444,555,666,1010,1010,1010)
PATIENT_ADMISSION <- c('18/03/2022 15:30','24/03/2022 12:28','27/03/2022 01:38','31/03/2022 08:53','16/04/2022 22:45','22/04/2022 13:15','05/04/2022 05:44','30/03/2022 06:16','10/01/2022 17:30','16/03/2022 22:00','08/04/2022 14:49')
PATIENT_DISCHARGE <- c('01/04/2022 11:20','26/03/2022 12:56','27/03/2022 17:52','01/04/2022 16:15','17/04/2022 12:26','25/04/2022 14:54','05/04/2022 11:44','07/04/2022 05:23','12/01/2022 06:35','06/04/2022 11:35','12/04/2022 12:36')
PATIENT_READMISSION_30D <- c('N','N','Y','N','N','Y','N','N','N','N','Y')
DF_1 <- tibble(ID,PATIENT_ADMISSION,PATIENT_DISCHARGE,PATIENT_READMISSION_30D)
DF_1 |>
rename_with(~ str_remove_all(., "PATIENT_")) |> # Just to reduce the text
group_by(ID) |>
mutate(across(c(ADMISSION, DISCHARGE), dmy_hm),
READMISSION_72H = case_when(
READMISSION_30D == "Y" &
difftime(ADMISSION, lag(DISCHARGE), units = "hours") <= 72 ~ "Y",
READMISSION_30D == "Y" &
difftime(ADMISSION, lag(DISCHARGE), units = "hours") > 72 ~ "N",
TRUE ~ NA_character_
)
)
#> # A tibble: 11 × 5
#> # Groups: ID [7]
#> ID ADMISSION DISCHARGE READMISSION_30D READMISSION_72H
#> <dbl> <dttm> <dttm> <chr> <chr>
#> 1 111 2022-03-18 15:30:00 2022-04-01 11:20:00 N <NA>
#> 2 222 2022-03-24 12:28:00 2022-03-26 12:56:00 N <NA>
#> 3 222 2022-03-27 01:38:00 2022-03-27 17:52:00 Y Y
#> 4 333 2022-03-31 08:53:00 2022-04-01 16:15:00 N <NA>
#> 5 444 2022-04-16 22:45:00 2022-04-17 12:26:00 N <NA>
#> 6 444 2022-04-22 13:15:00 2022-04-25 14:54:00 Y N
#> 7 555 2022-04-05 05:44:00 2022-04-05 11:44:00 N <NA>
#> 8 666 2022-03-30 06:16:00 2022-04-07 05:23:00 N <NA>
#> 9 1010 2022-01-10 17:30:00 2022-01-12 06:35:00 N <NA>
#> 10 1010 2022-03-16 22:00:00 2022-04-06 11:35:00 N <NA>
#> 11 1010 2022-04-08 14:49:00 2022-04-12 12:36:00 Y Y
Created on 2022-05-19 by the reprex package (v2.0.1)
CodePudding user response:
Here is a possible dplyr
solution:
library(lubridate)
library(dplyr)
DF_1 %>%
mutate(across(2:3, dmy_hm),
hours = floor(difftime(PATIENT_ADMISSION, lag(PATIENT_DISCHARGE), units="hours")),
READMISSION_72H = case_when(PATIENT_READMISSION_30D == "Y" & hours <= 72 ~"Y",
PATIENT_READMISSION_30D == "Y" & hours >72 ~ "N"))
ID PATIENT_ADMISSION PATIENT_DISCHARGE PATIENT_READMISSION_30D hours READMISSION_72H
1 111 2022-03-18 15:30:00 2022-04-01 11:20:00 N NA hours <NA>
2 222 2022-03-24 12:28:00 2022-03-26 12:56:00 N -191 hours <NA>
3 222 2022-03-27 01:38:00 2022-03-27 17:52:00 Y 12 hours Y
4 333 2022-03-31 08:53:00 2022-04-01 16:15:00 N 87 hours <NA>
5 444 2022-04-16 22:45:00 2022-04-17 12:26:00 N 366 hours <NA>
6 444 2022-04-22 13:15:00 2022-04-25 14:54:00 Y 120 hours N
7 555 2022-04-05 05:44:00 2022-04-05 11:44:00 N -490 hours <NA>
8 666 2022-03-30 06:16:00 2022-04-07 05:23:00 N -150 hours <NA>
9 1010 2022-01-10 17:30:00 2022-01-12 06:35:00 N -2076 hours <NA>
10 1010 2022-03-16 22:00:00 2022-04-06 11:35:00 N 1527 hours <NA>
11 1010 2022-04-08 14:49:00 2022-04-12 12:36:00 Y 51 hours Y
CodePudding user response:
You can use difftime()
with dplyr::lag()
to calculate the time differences in hours and dplyr::case_when()
to satisfy the conditions:
# ensure proper format for dates
DF_1[2:3] <- lapply(DF_1[2:3], lubridate::dmy_hm)
DF_1 %>% mutate(PATIENT_READMISSION_72H_NEW = case_when(
PATIENT_READMISSION_30D == "N" ~ "",
difftime(PATIENT_ADMISSION, lag(PATIENT_DISCHARGE), units = "hours") <= 72 ~ "Y",
difftime(PATIENT_ADMISSION, lag(PATIENT_DISCHARGE), units = "hours") > 72 ~ "N"
))
Output:
# ID PATIENT_ADMISSION PATIENT_DISCHARGE PATIENT_READMISSION_30D PATIENT_READMISSION_72H
# 1 111 2022-03-18 15:30:00 2022-04-01 11:20:00 N
# 2 222 2022-03-24 12:28:00 2022-03-26 12:56:00 N
# 3 222 2022-03-27 01:38:00 2022-03-27 17:52:00 Y Y
# 4 333 2022-03-31 08:53:00 2022-04-01 16:15:00 N
# 5 444 2022-04-16 22:45:00 2022-04-17 12:26:00 N
# 6 444 2022-04-22 13:15:00 2022-04-25 14:54:00 Y N
# 7 555 2022-04-05 05:44:00 2022-04-05 11:44:00 N
# 8 666 2022-03-30 06:16:00 2022-04-07 05:23:00 N
# 9 1010 2022-01-10 17:30:00 2022-01-12 06:35:00 N
# 10 1010 2022-03-16 22:00:00 2022-04-06 11:35:00 N
# 11 1010 2022-04-08 14:49:00 2022-04-12 12:36:00 Y Y