I am trying to calculate the difference between multiple dates in a data frame. I provided sample data below.
I know I can find the difference between dates using difftime(Date2, Date1, unit = "days")
. Also, I tried the following code and it works, too. I just want to do it in a tidier way in the data frame.
library(glue)
library(tidyverse)
col_of_interest <- c("InstantDate")
col_orders <- paste0(col_of_interest, "_", rep(c(1:11), each = 1))
data_date <- data %>%
select(any_of(col_orders))
datadiff <- data_date[2:11] - data_date[1:10]
My unsuccessful try is here.
data_date <- data %>%
select(any_of(col_orders)) %>%
mutate(for(i in (seq(vars) - 1)) "Day_diff_{i}" := difftime(vars[i 1], vars[i], units = "days"))
Here is sample code.
data <- structure(list(Total_1 = c("NULL", "NULL", "NULL", "NULL", "NULL",
"NULL"), Total_2 = c("17", "5", "3", "13", "NULL", "0"), Total_3 = c("15",
"NULL", NA, "2", "6", NA), Total_4 = c("9", NA, NA, "8", NA,
NA), Total_5 = c("15", NA, NA, "14", NA, NA), Total_6 = c("NULL",
NA, NA, NA, NA, NA), Total_7 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_),
Total_8 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), Total_9 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), Total_10 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), Total_11 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), InstantDate_1 = structure(c(18327,
18330, 18332, 18332, 18332, 18333), class = "Date"), InstantDate_2 = structure(c(18673,
18858, 18794, 18527, 18516, 18533), class = "Date"), InstantDate_3 = structure(c(18703,
19044, NA, 18673, 18726, NA), class = "Date"), InstantDate_4 = structure(c(18786,
NA, NA, 18905, NA, NA), class = "Date"), InstantDate_5 = structure(c(18855,
NA, NA, 19006, NA, NA), class = "Date"), InstantDate_6 = structure(c(19229,
NA, NA, NA, NA, NA), class = "Date"), InstantDate_7 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"),
InstantDate_8 = structure(c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), class = "Date"), InstantDate_9 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"),
InstantDate_10 = structure(c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), class = "Date"), InstantDate_11 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"),
VisitType_1 = c("NULL", "NULL", "NULL", "NULL", "NULL", "NULL"
), VisitType_2 = c("FOLLOW UP", "FOLLOW UP", "VIRTUAL VISIT",
"OFFICE VISIT", "NULL", "VIRTUAL VISIT"), VisitType_3 = c("FOLLOW UP",
"FOLLOW UP", NA, "VIRTUAL VISIT", "VIRTUAL VISIT", NA), VisitType_4 = c("FOLLOW UP",
NA, NA, "VIRTUAL VISIT", NA, NA), VisitType_5 = c("FOLLOW UP",
NA, NA, "FOLLOW UP", NA, NA), VisitType_6 = c("FOLLOW UP",
NA, NA, NA, NA, NA), VisitType_7 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), VisitType_8 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), VisitType_9 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), VisitType_10 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), VisitType_11 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_)), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
CodePudding user response:
We may use across2
from dplyover
library(dplyover)
out <- data %>%
select(any_of(col_orders)) %>%
mutate(across2(2:11, 1:10, ~ difftime(.x, .y, units = "days"),
.names = "Day_diff_{idx}"))
-output
> out %>% select(starts_with("Day_diff"))
# A tibble: 6 × 10
Day_diff_1 Day_diff_2 Day_diff_3 Day_diff_4 Day_diff_5 Day_diff_6 Day_diff_7 Day_diff_8 Day_diff_9 Day_diff_10
<drtn> <drtn> <drtn> <drtn> <drtn> <drtn> <drtn> <drtn> <drtn> <drtn>
1 346 days 30 days 83 days 69 days 374 days NA days NA days NA days NA days NA days
2 528 days 186 days NA days NA days NA days NA days NA days NA days NA days NA days
3 462 days NA days NA days NA days NA days NA days NA days NA days NA days NA days
4 195 days 146 days 232 days 101 days NA days NA days NA days NA days NA days NA days
5 184 days 210 days NA days NA days NA days NA days NA days NA days NA days NA days
6 200 days NA days NA days NA days NA days NA days NA days NA days NA days NA days
If we want to use the for
loop
out1 <- data %>%
select(any_of(col_orders))
for(i in 2:11) out1 <- out1 %>%
mutate( "Day_diff_{i-1}" := difftime(pick(i)[[1]], pick(i-1)[[1]],
units = "days"))
-output
> out1 %>% select(starts_with("Day_diff"))
# A tibble: 6 × 10
Day_diff_1 Day_diff_2 Day_diff_3 Day_diff_4 Day_diff_5 Day_diff_6 Day_diff_7 Day_diff_8 Day_diff_9 Day_diff_10
<drtn> <drtn> <drtn> <drtn> <drtn> <drtn> <drtn> <drtn> <drtn> <drtn>
1 346 days 30 days 83 days 69 days 374 days NA days NA days NA days NA days NA days
2 528 days 186 days NA days NA days NA days NA days NA days NA days NA days NA days
3 462 days NA days NA days NA days NA days NA days NA days NA days NA days NA days
4 195 days 146 days 232 days 101 days NA days NA days NA days NA days NA days NA days
5 184 days 210 days NA days NA days NA days NA days NA days NA days NA days NA days
6 200 days NA days NA days NA days NA days NA days NA days NA days NA days NA days