I have a tibble which has different itemIDs.
What I want is the difference between timestamps, based on the itemID.
time itemID order salesPrice time_year time_week time_day time_diff
<dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <date> <dbl>
1 2018-02-24 19:35:26 2 1 9.15 2018 8 2018-02-24 113
2 2018-02-26 21:39:53 2 1 9.15 2018 9 2018-02-26 101
3 2018-05-27 16:33:25 2 1 9.15 2018 21 2018-05-27 208
4 2018-05-29 22:10:40 2 2 9.15 2018 22 2018-05-29 25
So the time_diff
should not be like above, it should be like follow (time_diff in seconds):
time itemID order salesPrice time_year time_week time_day time_diff
<dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <date> <dbl>
1 2018-02-24 19:35:26 2 1 9.15 2018 8 2018-02-24 0
2 2018-02-26 21:39:53 2 1 9.15 2018 9 2018-02-26 180267
3 2018-05-27 16:33:25 2 1 9.15 2018 21 2018-05-27 7754012
4 2018-05-29 22:10:40 2 2 9.15 2018 22 2018-05-29 193035
e.g.
"2018-05-29 22:10:40" (last transaction) - "2018-05-27 16:33:25" (actual transaction) = 193035 seconds
Reproduction:
df <- structure(list(time = structure(c(1514764916, 1514764916, 1514765231,
1514765364, 1514765364, 1514767166, 1514767919, 1514767919, 1514767919,
1514767919, 1514767919, 1514767919, 1514768104, 1514768214, 1514768214,
1514768214, 1514768214, 1514768214, 1514768214, 1514770106, 1514770983,
1514770983, 1514782167, 1514784574, 1514787174, 1514788305, 1514791067,
1514791068, 1514791069, 1514791070, 1514793524, 1514793525, 1514795443,
1514795443, 1514795444, 1514795445, 1514795446, 1514796571, 1514798636,
1514798637, 1514798638, 1514798639, 1514798640, 1514798641, 1514799127,
1514799230, 1514799230, 1514799230, 1514799231, 1514799837, 1514800396,
1514800396, 1514800705, 1514800742, 1514800812, 1514801630, 1514801631,
1514801632, 1514801633, 1514801634, 1514801635, 1514801636, 1514801637,
1514801638, 1514801639, 1514801639, 1514801640, 1514801641, 1514801642,
1514801643, 1514801643, 1514801644, 1514801645, 1514801646, 1514801647,
1514801648, 1514801649, 1514801649, 1514801650, 1514801651, 1514801652,
1514801653, 1514801654, 1514801654, 1514801655, 1514801656, 1514801657,
1514801658, 1514801659, 1514801660, 1514801661, 1514801662, 1514801663,
1514801664, 1514801665, 1514801666, 1514801667, 1514801668, 1514801669,
1514801670), tzone = "UTC", class = c("POSIXct", "POSIXt")),
itemID = c(83, 450, 7851, 83, 450, 9375, 19, 83, 109, 295,
297, 450, 2049, 19, 83, 109, 295, 297, 450, 4322, 7975, 8192,
5007, 2013, 6744, 386, 529, 529, 529, 529, 222, 222, 5094,
6653, 5094, 5094, 5094, 138, 561, 561, 561, 561, 561, 561,
5022, 222, 6574, 6575, 9368, 222, 8075, 8107, 520, 520, 520,
35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 520, 520, 520, 520,
222, 520, 520, 520, 520, 520, 520, 222, 520, 520, 520, 520,
520, 113, 520, 113, 113, 113, 113, 113, 113, 113, 113, 113,
113, 113, 113, 113, 113, 113, 113), order = c(1, 1, 2, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 1, 2, 3, 3, 4, 1, 2, 2, 2, 3, 3, 1, 1, 3, 3, 3, 5,
4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 3, 4, 3, 3, 4,
2, 4, 3, 2, 2, 3, 2, 3, 1, 3, 2, 3, 4, 5, 3, 1, 4, 4, 3,
3, 3, 3, 2, 3, 3, 5, 7, 4, 2, 6, 3, 3, 3, 6, 2, 3, 3, 3,
3), salesPrice = c(5.19, 17.42, 20.47, 5.19, 17.42, 31.02,
77.64, 5.19, 5.8, 37.79, 43.53, 17.42, 35.75, 77.64, 5.19,
5.8, 37.79, 43.53, 17.42, 22.86, 2.84, 29.92, 23.11, 17.11,
24.66, 45.53, 69.56, 69.56, 69.56, 69.56, 4.7, 4.7, 112.26,
3.11, 112.26, 112.26, 112.26, 11.85, 116.16, 116.16, 116.16,
116.16, 116.16, 116.16, 13.46, 4.7, 12.69, 17.93, 420.67,
4.7, 2.02, 25.24, 9.32, 9.32, 9.32, 368.13, 368.13, 368.13,
368.13, 368.13, 368.13, 368.13, 368.13, 368.13, 368.13, 9.32,
9.32, 9.32, 9.32, 4.7, 9.32, 9.32, 9.32, 9.32, 9.32, 9.32,
4.7, 9.32, 9.32, 9.32, 9.32, 9.32, 3.42, 9.32, 3.42, 3.42,
3.42, 3.42, 3.42, 3.42, 3.42, 3.42, 3.42, 3.42, 3.42, 3.42,
3.42, 3.42, 3.42, 3.42), time_year = c(2018, 2018, 2018,
2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018,
2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018,
2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018,
2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018,
2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018,
2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018,
2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018,
2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018,
2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018,
2018, 2018, 2018, 2018, 2018, 2018, 2018), time_week = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1), time_day = structure(c(17532, 17532, 17532,
17532, 17532, 17532, 17532, 17532, 17532, 17532, 17532, 17532,
17532, 17532, 17532, 17532, 17532, 17532, 17532, 17532, 17532,
17532, 17532, 17532, 17532, 17532, 17532, 17532, 17532, 17532,
17532, 17532, 17532, 17532, 17532, 17532, 17532, 17532, 17532,
17532, 17532, 17532, 17532, 17532, 17532, 17532, 17532, 17532,
17532, 17532, 17532, 17532, 17532, 17532, 17532, 17532, 17532,
17532, 17532, 17532, 17532, 17532, 17532, 17532, 17532, 17532,
17532, 17532, 17532, 17532, 17532, 17532, 17532, 17532, 17532,
17532, 17532, 17532, 17532, 17532, 17532, 17532, 17532, 17532,
17532, 17532, 17532, 17532, 17532, 17532, 17532, 17532, 17532,
17532, 17532, 17532, 17532, 17532, 17532, 17532), class = "Date"),
time_diff = c(NA, 0, 315, 133, 0, 1802, 753, 0, 0, 0, 0,
0, 185, 110, 0, 0, 0, 0, 0, 1892, 877, 0, 11184, 2407, 2600,
1131, 2762, 1, 1, 1, 2454, 1, 1918, 0, 1, 1, 1, 1125, 2065,
1, 1, 1, 1, 1, 486, 103, 0, 0, 1, 606, 559, 0, 309, 37, 70,
818, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,
1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1)), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), row.names = c(NA, -100L), groups = structure(list(
itemID = c(19, 19, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
83, 83, 83, 83, 109, 109, 113, 113, 113, 113, 113, 113, 113,
113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 138, 222,
222, 222, 222, 222, 222, 295, 295, 297, 297, 386, 450, 450,
450, 450, 520, 520, 520, 520, 520, 520, 520, 520, 520, 520,
520, 520, 520, 520, 520, 520, 520, 520, 520, 529, 529, 529,
529, 561, 561, 561, 561, 561, 561, 2013, 2049, 4322, 5007,
5022, 5094, 5094, 5094, 5094, 6574, 6575, 6653, 6744, 7851,
7975, 8075, 8107, 8192, 9368, 9375), time = structure(c(1514767919,
1514768214, 1514801630, 1514801631, 1514801632, 1514801633,
1514801634, 1514801635, 1514801636, 1514801637, 1514801638,
1514801639, 1514764916, 1514765364, 1514767919, 1514768214,
1514767919, 1514768214, 1514801654, 1514801655, 1514801656,
1514801657, 1514801658, 1514801659, 1514801660, 1514801661,
1514801662, 1514801663, 1514801664, 1514801665, 1514801666,
1514801667, 1514801668, 1514801669, 1514801670, 1514796571,
1514793524, 1514793525, 1514799230, 1514799837, 1514801643,
1514801649, 1514767919, 1514768214, 1514767919, 1514768214,
1514788305, 1514764916, 1514765364, 1514767919, 1514768214,
1514800705, 1514800742, 1514800812, 1514801639, 1514801640,
1514801641, 1514801642, 1514801643, 1514801644, 1514801645,
1514801646, 1514801647, 1514801648, 1514801649, 1514801650,
1514801651, 1514801652, 1514801653, 1514801654, 1514791067,
1514791068, 1514791069, 1514791070, 1514798636, 1514798637,
1514798638, 1514798639, 1514798640, 1514798641, 1514784574,
1514768104, 1514770106, 1514782167, 1514799127, 1514795443,
1514795444, 1514795445, 1514795446, 1514799230, 1514799230,
1514795443, 1514787174, 1514765231, 1514770983, 1514800396,
1514800396, 1514770983, 1514799231, 1514767166), tzone = "UTC", class = c("POSIXct",
"POSIXt")), .rows = structure(list(7L, 14L, 56L, 57L, 58L,
59L, 60L, 61L, 62L, 63L, 64L, 65L, 1L, 4L, 8L, 15L, 9L,
16L, 83L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L,
94L, 95L, 96L, 97L, 98L, 99L, 100L, 38L, 31L, 32L, 46L,
50L, 70L, 77L, 10L, 17L, 11L, 18L, 26L, 2L, 5L, 12L,
19L, 53L, 54L, 55L, 66L, 67L, 68L, 69L, 71L, 72L, 73L,
74L, 75L, 76L, 78L, 79L, 80L, 81L, 82L, 84L, 27L, 28L,
29L, 30L, 39L, 40L, 41L, 42L, 43L, 44L, 24L, 13L, 20L,
23L, 45L, 33L, 35L, 36L, 37L, 47L, 48L, 34L, 25L, 3L,
21L, 51L, 52L, 22L, 49L, 6L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -100L), .drop = TRUE))
CodePudding user response:
Here a solution using difftime
:
> difftime(as.POSIXct("2018-05-29 22:10:40"), as.POSIXct("2018-05-27 16:33:25"), units = "secs")
Time difference of 193035 secs
Solution with dplyr. First you order by itemID, then group, and finally get the time lagged with function lag
. Finally use difftime
setting units to secs:
library(dplyr)
df <- df %>% arrange(desc(itemID)) %>% group_by(itemID) %>%
mutate(lag.time = dplyr::lag(time, n = 1, default = as.POSIXct(NA)))
df <- transform(df,time_diff = difftime(as.POSIXct(time),
as.POSIXct(lag.time), units = "secs"))
Another posible solution with shift
function from data.table
, that also keep the datetime format:
library(dplyr)
library(data.table)
df <- df %>% arrange(desc(itemID)) %>% data.table::data.table()
df <- df %>% group_by(itemID) %>%
mutate(lag.time = data.table::shift(time))
df <- transform(df,time_diff = difftime(as.POSIXct(time),
as.POSIXct(lag.time), units = "secs"))
Output:
time itemID order salesPrice time_year time_week time_day time_diff lag.time
83 2018-01-01 00:51:59 109 1 5.80 2018 1 2018-01-01 NA secs <NA>
84 2018-01-01 00:56:54 109 1 5.80 2018 1 2018-01-01 295 secs 2018-01-01 00:51:59
85 2018-01-01 00:01:56 83 1 5.19 2018 1 2018-01-01 NA secs <NA>
86 2018-01-01 00:09:24 83 1 5.19 2018 1 2018-01-01 448 secs 2018-01-01 00:01:56
87 2018-01-01 00:51:59 83 1 5.19 2018 1 2018-01-01 2555 secs 2018-01-01 00:09:24
88 2018-01-01 00:56:54 83 1 5.19 2018 1 2018-01-01 295 secs 2018-01-01 00:51:59
89 2018-01-01 10:13:50 35 2 368.13 2018 1 2018-01-01 NA secs <NA>
90 2018-01-01 10:13:51 35 3 368.13 2018 1 2018-01-01 1 secs 2018-01-01 10:13:50
CodePudding user response:
Here's a one-liner using ave
.
df$time_diff <- with(df, ave(as.numeric(time), itemID, FUN=\(x) -c(0, x[-length(x)] - x[-1])))
## check
df[df$itemID == 450, ]
# time itemID order salesPrice time_year time_week time_day time_diff
# 2 2018-01-01 00:01:56 450 1 17.42 2018 1 2018-01-01 0
# 5 2018-01-01 00:09:24 450 1 17.42 2018 1 2018-01-01 448
# 12 2018-01-01 00:51:59 450 1 17.42 2018 1 2018-01-01 2555
# 19 2018-01-01 00:56:54 450 1 17.42 2018 1 2018-01-01 295