I have a data set where audio recorders where set up to be recorded. The technician recorded the date and time when the audio recorder was deployed. The technician then would return and swap batteries and memory cards in which the date would be recorded. So I have dates for each time a new memory was installed or as I think of it a date for when the recording started. I am struggling how to fill the "date_of_pickup" column based on knowing when they were deployed in sequential order. Basically I know the that the "date_of_deployment" for second observation per recorder is the date I need to insert into the "date_of_pickup".
data <- structure(list(recorder_id = c("smm03978", "smm03895", "smm03922",
"smm03886", "smm03918", "smm04004", "smm03988", "smm03890", "smm03753",
"smm03976", "smm03988", "smm04004", "smm03918", "smm03922", "smm03886",
"smm03976", "smm03895", "smm03753", "smm04004", "smm03753", "smm03890",
"smm03978", "smm03918", "smm03895", "smm03978", "smm03976", "smm04004",
"smm03988", "smm03922", "smm03886", "smm03753", "smm03890"),
card_id = c("msd-1285\n", "msd-1326", "msd- 0198", "msd-1463",
"msd-1478", "msd-0722", "msd-1365", "msd-0807", "msd-0247",
"msd-1430", "msd-2683", NA, "msd-2687", "msd-0530", "msd-2682",
"msd-2688", "msd-2684", NA, "msd-2826", "msd-2830", "msd-2829",
"msd-2828", "msd-2885", "msd-2881", "msd-2878", "msd-2882",
"msd-2879", "msd-2877", "msd-2884", "msd-2880", "msd-2886",
"msd-2883"), date_of_deployment = c("2022-04-02 13:50:00",
"2022-04-02 14:03:00", "2022-04-02 14:41:00", "2022-04-02 14:58:00",
"2022-04-02 17:34:00", "2022-04-02 18:27:00", "2022-04-02 18:49:00",
"2022-04-03 15:19:00", "2022-04-03 15:32:00", "2022-04-03 16:06:00",
"2022-06-24 01:42:00", "2022-06-23 15:44:00", "2022-06-23 16:15:00",
"2022-06-23 17:17:00", "2022-06-23 17:32:00", "2022-06-23 18:00:00",
"2022-06-23 18:26:00", "2022-06-29 01:25:00", "2022-07-14 13:46:00",
"2022-07-15 17:30:00", "2022-07-15 17:48:00", "2022-07-15 18:27:00",
"2022-09-10 15:00:00", "2022-09-10 15:27:00", "2022-09-10 15:37:00",
"2022-09-10 16:03:00", "2022-09-10 16:38:00", "2022-09-10 16:45:00",
"2022-09-10 18:29:00", "2022-09-10 18:38:00", "2022-09-10 19:04:00",
"2022-09-10 19:14:00"), date_of_pickup = c("", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "")), row.names = c(NA,
-32L), class = c("tbl_df", "tbl", "data.frame"))
CodePudding user response:
library(dplyr)
data |>
mutate(date_of_deployment = readr::parse_datetime(date_of_deployment)) |>
group_by(recorder_id) |>
arrange(date_of_deployment) |>
mutate(date_of_pickup = nth(date_of_deployment, 2)) |>
ungroup() |>
arrange(recorder_id)
# A tibble: 32 × 4
recorder_id card_id date_of_deployment date_of_pickup
<chr> <chr> <dttm> <dttm>
1 smm03753 msd-0247 2022-04-03 15:32:00 2022-06-29 01:25:00
2 smm03753 NA 2022-06-29 01:25:00 2022-06-29 01:25:00
3 smm03753 msd-2830 2022-07-15 17:30:00 2022-06-29 01:25:00
4 smm03753 msd-2886 2022-09-10 19:04:00 2022-06-29 01:25:00
5 smm03886 msd-1463 2022-04-02 14:58:00 2022-06-23 17:32:00
6 smm03886 msd-2682 2022-06-23 17:32:00 2022-06-23 17:32:00
7 smm03886 msd-2880 2022-09-10 18:38:00 2022-06-23 17:32:00
8 smm03890 msd-0807 2022-04-03 15:19:00 2022-07-15 17:48:00
9 smm03890 msd-2829 2022-07-15 17:48:00 2022-07-15 17:48:00
10 smm03890 msd-2883 2022-09-10 19:14:00 2022-07-15 17:48:00
# … with 22 more rows
# ℹ Use `print(n = ...)` to see more rows
EDIT: Maybe you want mutate(date_of_pickup = lead(date_of_deployment))
instead of mutate(date_of_pickup = nth(date_of_deployment, 2))
? This would result in this output:
# A tibble: 32 × 4
recorder_id card_id date_of_deployment date_of_pickup
<chr> <chr> <dttm> <dttm>
1 smm03753 msd-0247 2022-04-03 15:32:00 2022-06-29 01:25:00
2 smm03753 NA 2022-06-29 01:25:00 2022-07-15 17:30:00
3 smm03753 msd-2830 2022-07-15 17:30:00 2022-09-10 19:04:00
4 smm03753 msd-2886 2022-09-10 19:04:00 NA
5 smm03886 msd-1463 2022-04-02 14:58:00 2022-06-23 17:32:00
6 smm03886 msd-2682 2022-06-23 17:32:00 2022-09-10 18:38:00
7 smm03886 msd-2880 2022-09-10 18:38:00 NA
8 smm03890 msd-0807 2022-04-03 15:19:00 2022-07-15 17:48:00
9 smm03890 msd-2829 2022-07-15 17:48:00 2022-09-10 19:14:00
10 smm03890 msd-2883 2022-09-10 19:14:00 NA
# … with 22 more rows
# ℹ Use `print(n = ...)` to see more rows