Home > database >  Calculate the time interval between two timestamps
Calculate the time interval between two timestamps

Time:10-22

Having a dataframe like this:

data.frame(id = c(1,2,3,4), time_stamp_1 = c("Nov 2016-Current", "May 2013-Current", "Oct 2015-Current", "May 2014-Current"), time_stamp_2 = c("Mar 2015-Nov 2016", "May 2008-May 2013", "Aug 2005-Current", "Oct 2014-Jan 2015"))

How is it possible to add new columns which have the time difference in months for every row and whete current insert the "Sept 2022".

Example output:

data.frame(id = c(1,2,3,4), time_stamp_1 = c("Nov 2016-Current", "May 2013-Current", "Oct 2015-Current", "May 2014-Current"), time_stamp_2 = c("Mar 2015-Nov 2016", "May 2008-May 2013", "Aug 2005-Current", "Oct 2014-Jan 2015"), time_stamp_1_duration = c(41,43,24,53), time_stamp_2_duration = c(32,12,45,32))

duration is an example only it is not the real, just for example.

CodePudding user response:

This should do the trick. First replace all the "Current" and "Sept" with the R-recognized abbreviation "Sep", then use tidy::separate and zoo::as.yearmon() to convert to year-month format, then calculate the intervals (in months (x12) per OP):

library(tidyr)
library(zoo)

df <- data.frame(id = c(1,2,3,4), time_stamp_1 = c("Nov 2016-Current", "May 2013-Current", "Oct 2015-Current", "May 2014-Current"), time_stamp_2 = c("Mar 2015-Nov 2016", "May 2008-May 2013", "Aug 2005-Current", "Oct 2014-Jan 2015"))

# convert current and Sept to "Sep 2022"
df[2:3] <- lapply(df[2:3], function(x) gsub("-Current|-Sept 2022", "-Sep 2022", x))

df %>%
  separate(time_stamp_1, into = c("my1a", "my1b"), sep = "-") %>%
  separate(time_stamp_2, into = c("my2a", "my2b"), sep = "-") %>%
  mutate(across(my1a:my2b, ~ as.yearmon(.x, format = "%b %Y"))) %>%
  mutate(interval_1 = (my1b - my1a) * 12,
         interval_2 = (my2b - my2a) * 12) %>%
  left_join(df) %>% select(names(df), "interval_1", "interval_2")

Output:

  id      time_stamp_1      time_stamp_2 interval_1 interval_2
1  1 Nov 2016-Sep 2022 Mar 2015-Nov 2016         70         20
2  2 May 2013-Sep 2022 May 2008-May 2013        112         60
3  3 Oct 2015-Sep 2022 Aug 2005-Sep 2022         83        205
4  4 May 2014-Sep 2022 Oct 2014-Jan 2015        100          3

As G. Grothendieck mentions in the comments, we could wrap this in a function:

# thanks to G. Grothendieck

ts2mos <- function(x) {
    x <- gsub("-Current|-Sept 2022", "-Sep 2022", x)
    12 * (as.yearmon(sub(".*-", "", x)) - as.yearmon(x, "%b %Y"))
}

df %>%  mutate(interval_1 = ts2mos(time_stamp_1),
                interval_2 = ts2mos(time_stamp_2))

CodePudding user response:

library(stringr)

timespan_to_duration <- function(x) {
  x[ x == 'Current' ] <- 'Sep 2022'
  x <- str_replace_all(x, '\\s ', ' 01 ')
  x <- as.POSIXct(x, format = '%b %d %Y')
  ((difftime(x[ 2 ], x[ 1 ], units = 'days') |> 
      as.integer()) / 30) |> 
    round()
}

df <- data.frame(id = c(1,2,3,4), 
           time_stamp_1 = c("Nov 2016-Current", "May 2013-Current", "Oct 2015-Current", "May 2014-Current"), 
           time_stamp_2 = c("Mar 2015-Nov 2016", "May 2008-May 2013", "Aug 2005-Current", "Oct 2014-Jan 2015"))
df$time_stamp_1_duration <- df$time_stamp_1 |> 
  str_split('-') |> 
  lapply(timespan_to_duration) |> 
  unlist()
df$time_stamp_2_duration <- df$time_stamp_2 |> 
  str_split('-') |> 
  lapply(timespan_to_duration) |> 
  unlist()
df

CodePudding user response:

Using tidyverse

library(dplyr)
library(lubridate)
library(stringr)
df1 %>% 
   mutate(across(starts_with('time_stamp'),  ~ {
  tmp <- str_replace(.x, "Current", 'Sep 2022') %>%
    str_replace("(\\w ) (\\d )-(\\w ) (\\d )", "\\2-\\1-01/\\4-\\3-01") %>% 
     interval
   tmp %/% months(1)}, .names = "{.col}_duration"))

-output

id     time_stamp_1      time_stamp_2 time_stamp_1_duration time_stamp_2_duration
1  1 Nov 2016-Current Mar 2015-Nov 2016                    70                    20
2  2 May 2013-Current May 2008-May 2013                   112                    60
3  3 Oct 2015-Current  Aug 2005-Current                    83                   205
4  4 May 2014-Current Oct 2014-Jan 2015                   100                     3

CodePudding user response:

A tidyverse approach

library(dplyr)
library(lubridate)
library(stringr)

df %>% 
  mutate(across(starts_with("time_stamp"), ~ str_replace(.x, "Current", "Sep 2022")), 
    time_stamp_1_duration = sapply(str_split(time_stamp_1, "-"), function(x) 
      interval(my(x[1]), my(x[2])) %/% months(1)), 
    time_stamp_2_duration = sapply(str_split(time_stamp_2, "-"), function(x) 
      interval(my(x[1]), my(x[2])) %/% months(1)), 
    across(starts_with("time_stamp"), ~ str_replace(.x, "Sep 2022", "Current")))
  id     time_stamp_1      time_stamp_2 time_stamp_1_duration
1  1 Nov 2016-Current Mar 2015-Nov 2016                    70
2  2 May 2013-Current May 2008-May 2013                   112
3  3 Oct 2015-Current  Aug 2005-Current                    83
4  4 May 2014-Current Oct 2014-Jan 2015                   100
  time_stamp_2_duration
1                    20
2                    60
3                   205
4                     3

CodePudding user response:

One possible solution using the function tstrsplit from data.table package. Not that I am also using the built-in constant month.abb.

df[c("duration1", "duration2")] = lapply(df[2:3], function(x) {
  x = data.table::tstrsplit(sub("Current", "Sep 2022", x), 
                            split="\\s|-", 
                            type.convert=TRUE, 
                            names=c("mo1", "yr1", "mo2", "yr2"))
  x[c("mo1", "mo2")] = lapply(x[c("mo1", "mo2")], match, month.abb)
  pmax(x$yr2 - x$yr1-1, 0) * 12   12-x$mo1   x$mo2 
})

  id     time_stamp_1      time_stamp_2 duration1 duration2
1  1 Nov 2016-Current Mar 2015-Nov 2016        70        20
2  2 May 2013-Current May 2008-May 2013       112        60
3  3 Oct 2015-Current  Aug 2005-Current        83       205
4  4 May 2014-Current Oct 2014-Jan 2015       100         3
  •  Tags:  
  • r
  • Related