Home > Net >  How to calculate date difference between multiple columns in r data frame?
How to calculate date difference between multiple columns in r data frame?

Time:12-30

I am trying to calculate the difference between multiple dates in a data frame. I provided sample data below.

I know I can find the difference between dates using difftime(Date2, Date1, unit = "days"). Also, I tried the following code and it works, too. I just want to do it in a tidier way in the data frame.

library(glue)
library(tidyverse)

col_of_interest <- c("InstantDate")
col_orders <- paste0(col_of_interest, "_", rep(c(1:11), each = 1))

data_date <- data %>% 
  select(any_of(col_orders))

datadiff <- data_date[2:11] - data_date[1:10]

My unsuccessful try is here.

data_date <- data %>% 
  select(any_of(col_orders)) %>%
  mutate(for(i in (seq(vars) - 1)) "Day_diff_{i}" := difftime(vars[i 1], vars[i], units = "days"))

Here is sample code.

data <- structure(list(Total_1 = c("NULL", "NULL", "NULL", "NULL", "NULL", 
"NULL"), Total_2 = c("17", "5", "3", "13", "NULL", "0"), Total_3 = c("15", 
"NULL", NA, "2", "6", NA), Total_4 = c("9", NA, NA, "8", NA, 
NA), Total_5 = c("15", NA, NA, "14", NA, NA), Total_6 = c("NULL", 
NA, NA, NA, NA, NA), Total_7 = c(NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_), 
Total_8 = c(NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_), Total_9 = c(NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_), Total_10 = c(NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_
), Total_11 = c(NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_), InstantDate_1 = structure(c(18327, 
18330, 18332, 18332, 18332, 18333), class = "Date"), InstantDate_2 = structure(c(18673, 
18858, 18794, 18527, 18516, 18533), class = "Date"), InstantDate_3 = structure(c(18703, 
19044, NA, 18673, 18726, NA), class = "Date"), InstantDate_4 = structure(c(18786, 
NA, NA, 18905, NA, NA), class = "Date"), InstantDate_5 = structure(c(18855, 
NA, NA, 19006, NA, NA), class = "Date"), InstantDate_6 = structure(c(19229, 
NA, NA, NA, NA, NA), class = "Date"), InstantDate_7 = structure(c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), 
InstantDate_8 = structure(c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), class = "Date"), InstantDate_9 = structure(c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), 
InstantDate_10 = structure(c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), class = "Date"), InstantDate_11 = structure(c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), 
VisitType_1 = c("NULL", "NULL", "NULL", "NULL", "NULL", "NULL"
), VisitType_2 = c("FOLLOW UP", "FOLLOW UP", "VIRTUAL VISIT", 
"OFFICE VISIT", "NULL", "VIRTUAL VISIT"), VisitType_3 = c("FOLLOW UP", 
"FOLLOW UP", NA, "VIRTUAL VISIT", "VIRTUAL VISIT", NA), VisitType_4 = c("FOLLOW UP", 
NA, NA, "VIRTUAL VISIT", NA, NA), VisitType_5 = c("FOLLOW UP", 
NA, NA, "FOLLOW UP", NA, NA), VisitType_6 = c("FOLLOW UP", 
NA, NA, NA, NA, NA), VisitType_7 = c(NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_
), VisitType_8 = c(NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_), VisitType_9 = c(NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_), VisitType_10 = c(NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_
), VisitType_11 = c(NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_)), row.names = c(NA, 
-6L), class = c("tbl_df", "tbl", "data.frame"))

CodePudding user response:

We may use across2 from dplyover

library(dplyover)
out <- data %>% 
  select(any_of(col_orders)) %>%
   mutate(across2(2:11, 1:10, ~ difftime(.x, .y, units = "days"),
     .names = "Day_diff_{idx}"))

-output

> out %>% select(starts_with("Day_diff"))
# A tibble: 6 × 10
  Day_diff_1 Day_diff_2 Day_diff_3 Day_diff_4 Day_diff_5 Day_diff_6 Day_diff_7 Day_diff_8 Day_diff_9 Day_diff_10
  <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     
1 346 days    30 days    83 days    69 days   374 days   NA days    NA days    NA days    NA days    NA days    
2 528 days   186 days    NA days    NA days    NA days   NA days    NA days    NA days    NA days    NA days    
3 462 days    NA days    NA days    NA days    NA days   NA days    NA days    NA days    NA days    NA days    
4 195 days   146 days   232 days   101 days    NA days   NA days    NA days    NA days    NA days    NA days    
5 184 days   210 days    NA days    NA days    NA days   NA days    NA days    NA days    NA days    NA days    
6 200 days    NA days    NA days    NA days    NA days   NA days    NA days    NA days    NA days    NA days    

If we want to use the for loop

out1 <- data %>% 
   select(any_of(col_orders))
for(i in 2:11) out1 <- out1 %>%
    mutate( "Day_diff_{i-1}" := difftime(pick(i)[[1]], pick(i-1)[[1]], 
        units = "days"))

-output

> out1 %>% select(starts_with("Day_diff"))
# A tibble: 6 × 10
  Day_diff_1 Day_diff_2 Day_diff_3 Day_diff_4 Day_diff_5 Day_diff_6 Day_diff_7 Day_diff_8 Day_diff_9 Day_diff_10
  <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     <drtn>     
1 346 days    30 days    83 days    69 days   374 days   NA days    NA days    NA days    NA days    NA days    
2 528 days   186 days    NA days    NA days    NA days   NA days    NA days    NA days    NA days    NA days    
3 462 days    NA days    NA days    NA days    NA days   NA days    NA days    NA days    NA days    NA days    
4 195 days   146 days   232 days   101 days    NA days   NA days    NA days    NA days    NA days    NA days    
5 184 days   210 days    NA days    NA days    NA days   NA days    NA days    NA days    NA days    NA days    
6 200 days    NA days    NA days    NA days    NA days   NA days    NA days    NA days    NA days    NA days    
  • Related