My data set contains observations taken every 30 minutes. Basically, I would like to have such as 01:00:00 as the new time; 02/01/2019 as an exampe of date and the sum of the measurement values between 00:00:00-00:30:00 and 00:30:00-01:00:00 As an output
time variable value
01:00:00 02/01/2019 234.3 (example)
How can I aggregate my data to 1 hour?
Sample data: Selected only the first 300
structure(list(time = structure(c(1800, 3600, 5400, 7200, 9000,
10800, 12600, 14400, 16200, 18000, 19800, 21600, 23400, 25200,
27000, 28800, 30600, 32400, 34200, 36000, 37800, 39600, 41400,
43200, 45000, 46800, 48600, 50400, 52200, 54000, 55800, 57600,
59400, 61200, 63000, 64800, 66600, 68400, 70200, 72000, 73800,
75600, 77400, 79200, 81000, 82800, 84600, 86400, 1800, 3600,
5400, 7200, 9000, 10800, 12600, 14400, 16200, 18000, 19800, 21600,
23400, 25200, 27000, 28800, 30600, 32400, 34200, 36000, 37800,
39600, 41400, 43200, 45000, 46800, 48600, 50400, 52200, 54000,
55800, 57600, 59400, 61200, 63000, 64800, 66600, 68400, 70200,
72000, 73800, 75600, 77400, 79200, 81000, 82800, 84600, 86400,
1800, 3600, 5400, 7200, 9000, 10800, 12600, 14400, 16200, 18000,
19800, 21600, 23400, 25200, 27000, 28800, 30600, 32400, 34200,
36000, 37800, 39600, 41400, 43200, 45000, 46800, 48600, 50400,
52200, 54000, 55800, 57600, 59400, 61200, 63000, 64800, 66600,
68400, 70200, 72000, 73800, 75600, 77400, 79200, 81000, 82800,
84600, 86400, 1800, 3600, 5400, 7200, 9000, 10800, 12600, 14400,
16200, 18000, 19800, 21600, 23400, 25200, 27000, 28800, 30600,
32400, 34200, 36000, 37800, 39600, 41400, 43200, 45000, 46800,
48600, 50400, 52200, 54000, 55800, 57600, 59400, 61200, 63000,
64800, 66600, 68400, 70200, 72000, 73800, 75600, 77400, 79200,
81000, 82800, 84600, 86400, 1800, 3600, 5400, 7200, 9000, 10800,
12600, 14400, 16200, 18000, 19800, 21600, 23400, 25200, 27000,
28800, 30600, 32400, 34200, 36000, 37800, 39600, 41400, 43200,
45000, 46800, 48600, 50400, 52200, 54000, 55800, 57600, 59400,
61200, 63000, 64800, 66600, 68400, 70200, 72000, 73800, 75600,
77400, 79200, 81000, 82800, 84600, 86400, 1800, 3600, 5400, 7200,
9000, 10800, 12600, 14400, 16200, 18000, 19800, 21600, 23400,
25200, 27000, 28800, 30600, 32400, 34200, 36000, 37800, 39600,
41400, 43200, 45000, 46800, 48600, 50400, 52200, 54000, 55800,
57600, 59400, 61200, 63000, 64800, 66600, 68400, 70200, 72000,
73800, 75600, 77400, 79200, 81000, 82800, 84600, 86400, 1800,
3600, 5400, 7200, 9000, 10800, 12600, 14400, 16200, 18000, 19800,
21600), class = c("hms", "difftime"), units = "secs"), variable = c("02/01/2019",
"02/01/2019", "02/01/2019", "02/01/2019", "02/01/2019", "02/01/2019",
"02/01/2019", "02/01/2019", "02/01/2019", "02/01/2019", "02/01/2019",
"02/01/2019", "02/01/2019", "02/01/2019", "02/01/2019", "02/01/2019",
"02/01/2019", "02/01/2019", "02/01/2019", "02/01/2019", "02/01/2019",
"02/01/2019", "02/01/2019", "02/01/2019", "02/01/2019", "02/01/2019",
"02/01/2019", "02/01/2019", "02/01/2019", "02/01/2019", "02/01/2019",
"02/01/2019", "02/01/2019", "02/01/2019", "02/01/2019", "02/01/2019",
"02/01/2019", "02/01/2019", "02/01/2019", "02/01/2019", "02/01/2019",
"02/01/2019", "02/01/2019", "02/01/2019", "02/01/2019", "02/01/2019",
"02/01/2019", "02/01/2019", "03/01/2019", "03/01/2019", "03/01/2019",
"03/01/2019", "03/01/2019", "03/01/2019", "03/01/2019", "03/01/2019",
"03/01/2019", "03/01/2019", "03/01/2019", "03/01/2019", "03/01/2019",
"03/01/2019", "03/01/2019", "03/01/2019", "03/01/2019", "03/01/2019",
"03/01/2019", "03/01/2019", "03/01/2019", "03/01/2019", "03/01/2019",
"03/01/2019", "03/01/2019", "03/01/2019", "03/01/2019", "03/01/2019",
"03/01/2019", "03/01/2019", "03/01/2019", "03/01/2019", "03/01/2019",
"03/01/2019", "03/01/2019", "03/01/2019", "03/01/2019", "03/01/2019",
"03/01/2019", "03/01/2019", "03/01/2019", "03/01/2019", "03/01/2019",
"03/01/2019", "03/01/2019", "03/01/2019", "03/01/2019", "03/01/2019",
"04/01/2019", "04/01/2019", "04/01/2019", "04/01/2019", "04/01/2019",
"04/01/2019", "04/01/2019", "04/01/2019", "04/01/2019", "04/01/2019",
"04/01/2019", "04/01/2019", "04/01/2019", "04/01/2019", "04/01/2019",
"04/01/2019", "04/01/2019", "04/01/2019", "04/01/2019", "04/01/2019",
"04/01/2019", "04/01/2019", "04/01/2019", "04/01/2019", "04/01/2019",
"04/01/2019", "04/01/2019", "04/01/2019", "04/01/2019", "04/01/2019",
"04/01/2019", "04/01/2019", "04/01/2019", "04/01/2019", "04/01/2019",
"04/01/2019", "04/01/2019", "04/01/2019", "04/01/2019", "04/01/2019",
"04/01/2019", "04/01/2019", "04/01/2019", "04/01/2019", "04/01/2019",
"04/01/2019", "04/01/2019", "04/01/2019", "05/01/2019", "05/01/2019",
"05/01/2019", "05/01/2019", "05/01/2019", "05/01/2019", "05/01/2019",
"05/01/2019", "05/01/2019", "05/01/2019", "05/01/2019", "05/01/2019",
"05/01/2019", "05/01/2019", "05/01/2019", "05/01/2019", "05/01/2019",
"05/01/2019", "05/01/2019", "05/01/2019", "05/01/2019", "05/01/2019",
"05/01/2019", "05/01/2019", "05/01/2019", "05/01/2019", "05/01/2019",
"05/01/2019", "05/01/2019", "05/01/2019", "05/01/2019", "05/01/2019",
"05/01/2019", "05/01/2019", "05/01/2019", "05/01/2019", "05/01/2019",
"05/01/2019", "05/01/2019", "05/01/2019", "05/01/2019", "05/01/2019",
"05/01/2019", "05/01/2019", "05/01/2019", "05/01/2019", "05/01/2019",
"05/01/2019", "06/01/2019", "06/01/2019", "06/01/2019", "06/01/2019",
"06/01/2019", "06/01/2019", "06/01/2019", "06/01/2019", "06/01/2019",
"06/01/2019", "06/01/2019", "06/01/2019", "06/01/2019", "06/01/2019",
"06/01/2019", "06/01/2019", "06/01/2019", "06/01/2019", "06/01/2019",
"06/01/2019", "06/01/2019", "06/01/2019", "06/01/2019", "06/01/2019",
"06/01/2019", "06/01/2019", "06/01/2019", "06/01/2019", "06/01/2019",
"06/01/2019", "06/01/2019", "06/01/2019", "06/01/2019", "06/01/2019",
"06/01/2019", "06/01/2019", "06/01/2019", "06/01/2019", "06/01/2019",
"06/01/2019", "06/01/2019", "06/01/2019", "06/01/2019", "06/01/2019",
"06/01/2019", "06/01/2019", "06/01/2019", "06/01/2019", "07/01/2019",
"07/01/2019", "07/01/2019", "07/01/2019", "07/01/2019", "07/01/2019",
"07/01/2019", "07/01/2019", "07/01/2019", "07/01/2019", "07/01/2019",
"07/01/2019", "07/01/2019", "07/01/2019", "07/01/2019", "07/01/2019",
"07/01/2019", "07/01/2019", "07/01/2019", "07/01/2019", "07/01/2019",
"07/01/2019", "07/01/2019", "07/01/2019", "07/01/2019", "07/01/2019",
"07/01/2019", "07/01/2019", "07/01/2019", "07/01/2019", "07/01/2019",
"07/01/2019", "07/01/2019", "07/01/2019", "07/01/2019", "07/01/2019",
"07/01/2019", "07/01/2019", "07/01/2019", "07/01/2019", "07/01/2019",
"07/01/2019", "07/01/2019", "07/01/2019", "07/01/2019", "07/01/2019",
"07/01/2019", "07/01/2019", "08/01/2019", "08/01/2019", "08/01/2019",
"08/01/2019", "08/01/2019", "08/01/2019", "08/01/2019", "08/01/2019",
"08/01/2019", "08/01/2019", "08/01/2019", "08/01/2019"), value = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0)), row.names = c(NA, -300L), class = c("tbl_df",
"tbl", "data.frame"))
CodePudding user response:
I've got an answer, that uses {lubridate}
:
library(lubridate)
a_df %>%
mutate(datetime = lubridate::dmy_hms(str_c(variable, time))) %>%
mutate(duration_elapsed = lubridate::interval(datetime[1], datetime),
duration_elapsed = as.duration(duration_elapsed)) %>%
mutate(hourly_group = duration_elapsed %/% dhours()) %>%
glimpse() %>%
group_by(hourly_group) %>%
summarise(
value = sum(value),
time = last(time),
variable = last(variable),
datetime = last(datetime)
) %>%
# select(-hourly_group, -time, -variable) %>%
select(-hourly_group) %>%
print(width = Inf, n = 50)
Note that a_df
is the data-frame you've provided.
This should work, and it should be robust if you have missing numbers, etc.