I am looking to calculate the number of days that traps were working for various sites within a study area.
I have a data set that contains multiple survey sites, with each checked on multiple dates. I am seeking a way to calculate the number of days between each check for each site. There is a slight catch, as for some sites and dates, the trap was broken. For these, I will need all the days until the closest previous date when the trap was working to be classed as "non-working days", and as such not counted.
here is a subset of the data -
structure(list(Date.of.Survey = c("12/04/2022", "16/04/2022",
"12/04/2022", "13/04/2022", "14/04/2022", "15/04/2022", "18/04/2022",
"19/04/2022", "20/04/2022", "22/04/2022", "26/04/2022", "27/04/2022",
"28/04/2022", "29/04/2022", "01/05/2022", "03/05/2022", "04/05/2022",
"05/05/2022", "06/05/2022", "12/05/2022", "10/05/2022", "11/05/2022",
"12/05/2022", "13/05/2022", "19/05/2022", "17/05/2022", "18/05/2022",
"19/05/2022", "20/05/2022", "27/05/2022", "24/05/2022", "25/05/2022",
"26/05/2022", "27/05/2022", "04/06/2022", "31/05/2022", "01/06/2022",
"02/06/2022", "03/06/2022", "07/06/2022", "08/06/2022", "09/06/2022",
"10/06/2022", "18/06/2022", "14/06/2022", "15/06/2022", "16/06/2022",
"17/06/2022", "21/06/2022", "22/06/2022", "23/06/2022", "24/06/2022",
"28/06/2022", "29/06/2022", "30/06/2022", "01/07/2022", "05/07/2022",
"06/07/2022", "07/07/2022", "08/07/2022"), Location = c("Wandle - Merton Abbey Mills",
"Wandle - Merton Abbey Mills", "Medway - Allington Weir", "Medway - Allington Weir",
"Medway - Allington Weir", "Medway - Allington Weir", "Medway - Allington Weir",
"Medway - Allington Weir", "Medway - Allington Weir", "Medway - Allington Weir",
"Medway - Allington Weir", "Medway - Allington Weir", "Medway - Allington Weir",
"Medway - Allington Weir", "Wandle - Merton Abbey Mills", "Medway - Allington Weir",
"Medway - Allington Weir", "Medway - Allington Weir", "Medway - Allington Weir",
"Wandle - Merton Abbey Mills", "Medway - Allington Weir", "Medway - Allington Weir",
"Medway - Allington Weir", "Medway - Allington Weir", "Wandle - Merton Abbey Mills",
"Medway - Allington Weir", "Medway - Allington Weir", "Medway - Allington Weir",
"Medway - Allington Weir", "Wandle - Merton Abbey Mills", "Medway - Allington Weir",
"Medway - Allington Weir", "Medway - Allington Weir", "Medway - Allington Weir",
"Wandle - Merton Abbey Mills", "Medway - Allington Weir", "Medway - Allington Weir",
"Medway - Allington Weir", "Medway - Allington Weir", "Medway - Allington Weir",
"Medway - Allington Weir", "Medway - Allington Weir", "Medway - Allington Weir",
"Wandle - Merton Abbey Mills", "Medway - Allington Weir", "Medway - Allington Weir",
"Medway - Allington Weir", "Medway - Allington Weir", "Medway - Allington Weir",
"Medway - Allington Weir", "Medway - Allington Weir", "Medway - Allington Weir",
"Medway - Allington Weir", "Medway - Allington Weir", "Medway - Allington Weir",
"Medway - Allington Weir", "Medway - Allington Weir", "Medway - Allington Weir",
"Medway - Allington Weir", "Medway - Allington Weir"), Was.the.trap.working.when.you.checked.it. = c("Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"No", "Yes", "Yes", "Yes", "Yes"), Number.of.eels = c(0L, 1L,
0L, 0L, 0L, 20L, 2L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L,
0L, 2L, 1L, 0L, 1L, 2L, 0L, 1L, 16L, 7L, 2L, 1L, 1L, 0L, 0L,
0L, 1L, 1L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 2L, 3L, 3L,
0L, 0L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L)), row.names = c(NA,
-60L), class = c("tbl_df", "tbl", "data.frame"))
Ideally I would like to have a table that shows total number of days that each sites trap was functioning, with a column showing the total number of fish caught at each trap.
Thanks in advance
CodePudding user response:
I have seen these traps being laid - it's very interesting and the workers very kindly explained what they were doing.
Anyway, regarding the question. You can do something along the lines of:
library(dplyr)
dat |>
mutate(survey_date = as.Date(
Date.of.Survey,
format = "%d/%m/%Y"
)) |>
arrange(Location, survey_date) |>
group_by(Location) |>
mutate(
lag_date = lag(survey_date),
num_days = as.integer(survey_date - lag_date)
) |>
filter(
Was.the.trap.working.when.you.checked.it. == "Yes"
) |>
summarise(
total_days_trap_working = sum(num_days, na.rm = TRUE),
total_eels = sum(Number.of.eels)
)
# A tibble: 2 x 3
# Location total_days_trap_working total_eels
# <chr> <int> <int>
# 1 Medway - Allington Weir 82 78
# 2 Wandle - Merton Abbey Mills 67 7