I have a dataset that looks like this:
structure(list(Fish_ID = c("Fork1", "Fork10", "Fork15", "Fork20",
"Fork21", "Fork22", "Fork23", "Fork4", "Fork5", "Fork7", "Fork9",
"Fork12", "Fork13", "Fork14", "Fork16", "Fork17", "Fork18", "Fork19",
"Fork20", "Fork21", "Fork22", "Fork23", "Fork3", "Fork1", "Fork10",
"Fork12", "Fork13", "Fork15", "Fork16", "Fork17", "Fork18", "Fork19",
"Fork20", "Fork21", "Fork22", "Fork4", "Fork5", "Fork7", "Fork9",
"Fork1", "Fork10", "Fork12", "Fork13", "Fork14", "Fork15", "Fork16",
"Fork17", "Fork18", "Fork19", "Fork20", "Fork21", "Fork22", "Fork4",
"Fork5", "Fork7", "Fork8", "Fork9", "Fork2", "Fork1", "Fork13",
"Fork14", "Fork15", "Fork16", "Fork17", "Fork18", "Fork19", "Fork20",
"Fork21", "Fork4", "Fork5", "Fork7", "Fork9"), Date2 = structure(c(18428,
18428, 18428, 18428, 18428, 18428, 18428, 18428, 18428, 18428,
18428, 18438, 18438, 18438, 18438, 18438, 18438, 18438, 18438,
18438, 18438, 18438, 18438, 18445, 18445, 18445, 18445, 18445,
18445, 18445, 18445, 18445, 18445, 18445, 18445, 18445, 18445,
18445, 18445, 18456, 18456, 18456, 18456, 18456, 18456, 18456,
18456, 18456, 18456, 18456, 18456, 18456, 18456, 18456, 18456,
18456, 18456, 18463, 18471, 18471, 18471, 18471, 18471, 18471,
18471, 18471, 18471, 18471, 18471, 18471, 18471, 18471), class = "Date"),
Lat2 = c(32.9394, 32.92935, 32.9160666666667, 32.9455166666667,
32.9431, 32.90365, 32.9056166666667, 32.94325, 32.9288833333333,
32.9297, 32.9303, 32.9047333333333, 32.9093833333333, 32.9509833333333,
32.9074333333333, 32.9029, 32.90775, 32.9094, 32.9459166666667,
32.9437666666667, 32.9044333333333, 32.90585, 32.9475333333333,
32.9443666666667, 32.92935, 32.9047333333333, 32.9093333333333,
32.9161, 32.9075333333333, 32.9030333333333, 32.9088333333333,
32.9058666666667, 32.9461166666667, 32.9442666666667, 32.9042,
32.9442833333333, 32.9288833333333, 32.9298, 32.93135, 32.9432,
32.9289, 32.9051166666667, 32.9095166666667, 32.8966666666667,
32.9162666666667, 32.9074, 32.9052833333333, 32.90865, 32.9056333333333,
32.9461666666667, 32.9430666666667, 32.9037833333333, 32.9470666666667,
32.9291166666667, 32.9287666666667, 32.9297666666667, 32.9313,
32.9086333333333, 32.94365, 32.9084166666667, 32.91455, 32.9160166666667,
32.9063166666667, 32.8989833333333, 32.9086333333333, 32.9052333333333,
32.9453833333333, 32.9431666666667, 32.9467833333333, 32.9289833333333,
32.9291333333333, 32.9303833333333)), row.names = c(1L, 25L,
78L, 173L, 198L, 229L, 239L, 242L, 259L, 277L, 281L, 27L, 32L,
56L, 101L, 121L, 143L, 163L, 176L, 199L, 224L, 238L, 240L, 11L,
24L, 29L, 34L, 89L, 100L, 133L, 148L, 160L, 182L, 201L, 225L,
244L, 262L, 272L, 284L, 22L, 26L, 28L, 47L, 73L, 87L, 113L, 126L,
150L, 155L, 193L, 209L, 227L, 243L, 261L, 273L, 279L, 286L, 170L,
18L, 43L, 67L, 96L, 98L, 119L, 149L, 161L, 185L, 213L, 241L,
246L, 276L, 293L), class = "data.frame")
I'm trying to use the padr::thicken() function so that I can fill in the missing Lat2
variables for each Fish_ID on each sampled day; however when I do this, I get the error Error: interval is not valid
I'm imagining this is because the interval isn't exactly week to week, with a week missing in there. I'm wondering if there is another way to pad this dataframe so that I have a value for each individual Fish_ID even though there is not an equal interval between each Fish_ID. I've been trying the padr::pad_cust() function, but I'm not sure how to set it up with a dataset that looks like this.
CodePudding user response:
complete()
from tidyr
should do the job. For weekly observations:
library(tidyr)
tbl %>%
padr::thicken("week") %>%
complete(Fish_ID, Date2_week = full_seq(Date2_week, 7))
#> # A tibble: 147 × 4
#> Fish_ID Date2_week Date2 Lat2
#> <chr> <date> <date> <dbl>
#> 1 Fork1 2020-06-14 2020-06-15 32.9
#> 2 Fork1 2020-06-21 NA NA
#> 3 Fork1 2020-06-28 2020-07-02 32.9
#> 4 Fork1 2020-07-05 NA NA
#> 5 Fork1 2020-07-12 2020-07-13 32.9
#> 6 Fork1 2020-07-19 NA NA
#> 7 Fork1 2020-07-26 2020-07-28 32.9
#> 8 Fork10 2020-06-14 2020-06-15 32.9
#> 9 Fork10 2020-06-21 NA NA
#> 10 Fork10 2020-06-28 2020-07-02 32.9
#> # … with 137 more rows
Or for original observation dates with irregular intervals:
tbl %>%
complete(Fish_ID, Date2)
#> # A tibble: 126 × 3
#> Fish_ID Date2 Lat2
#> <chr> <date> <dbl>
#> 1 Fork1 2020-06-15 32.9
#> 2 Fork1 2020-06-25 NA
#> 3 Fork1 2020-07-02 32.9
#> 4 Fork1 2020-07-13 32.9
#> 5 Fork1 2020-07-20 NA
#> 6 Fork1 2020-07-28 32.9
#> 7 Fork10 2020-06-15 32.9
#> 8 Fork10 2020-06-25 NA
#> 9 Fork10 2020-07-02 32.9
#> 10 Fork10 2020-07-13 32.9
#> # … with 116 more rows