I have a temperature and times database from two weather stations that looks like this:
# A tibble: 6 × 7
Station Date Time Temperature Tmin Tmed Tmax
<chr> <date> <time> <dbl> <dbl> <dbl> <dbl>
1 F 2021-10-15 00:11:46 16.8 15.2 17.1 20.4
2 F 2021-10-15 00:41:46 16.5 15.2 17.1 20.4
3 F 2021-10-15 01:11:46 16.2 15.2 17.1 20.4
4 F 2021-10-15 01:41:46 15.6 15.2 17.1 20.4
5 F 2021-10-15 02:11:46 15.9 15.2 17.1 20.4
6 F 2021-10-15 02:41:46 16.1 15.2 17.1 20.4
here is a reproducible example of the first two days (sorry - I know it's a mess) obtained via dput()
:
structure(list(Station = c("F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F"), Date = structure(c(18915,
18915, 18915, 18915, 18915, 18915, 18915, 18915, 18915, 18915,
18915, 18915, 18915, 18915, 18915, 18915, 18915, 18915, 18915,
18915, 18915, 18915, 18915, 18915, 18915, 18915, 18915, 18915,
18915, 18915, 18915, 18915, 18915, 18915, 18915, 18915, 18915,
18915, 18915, 18915, 18915, 18915, 18915, 18915, 18915, 18915,
18915, 18915, 18916, 18916, 18916, 18916, 18916, 18916, 18916,
18916, 18916, 18916, 18916, 18916, 18916, 18916, 18916, 18916,
18916, 18916, 18916, 18916, 18916, 18916, 18916, 18916, 18916,
18916, 18916, 18916, 18916, 18916, 18916, 18916, 18916, 18916,
18916, 18916, 18916, 18916, 18916, 18916, 18916, 18916, 18916,
18916, 18916, 18916, 18916, 18916), class = "Date"), Time = structure(c(706,
2506, 4306, 6106, 7906, 9706, 11506, 13306, 15106, 16906, 18706,
20506, 22306, 24106, 25906, 27706, 29506, 31306, 33106, 34906,
36706, 38506, 40306, 42106, 43906, 45706, 47506, 49306, 51106,
52906, 54706, 56506, 58306, 60106, 61906, 63706, 65506, 67306,
69106, 70906, 72706, 74506, 76306, 78106, 79906, 81706, 83506,
85306, 706, 2506, 4306, 6106, 7906, 9706, 11506, 13306, 15106,
16906, 18706, 20506, 22306, 24106, 25906, 27706, 29506, 31306,
33106, 34906, 36706, 38506, 40306, 42106, 43906, 45706, 47506,
49306, 51106, 52906, 54706, 56506, 58306, 60106, 61906, 63706,
65506, 67306, 69106, 70906, 72706, 74506, 76306, 78106, 79906,
81706, 83506, 85306), class = c("hms", "difftime"), units = "secs"),
Temperature = c(16.8, 16.5, 16.2, 15.6, 15.9, 16.1, 16.4,
16.2, 16, 16, 16.2, 16.2, 15.9, 16, 16, 16.4, 16.2, 16.5,
16.1, 16.4, 16.8, 16.6, 18.6, 16.9, 18.6, 19.5, 18.5, 18.5,
20.4, 19.1, 19.8, 19.7, 18.1, 17.4, 17.4, 16.9, 15.8, 16.8,
16.9, 16.8, 17, 15.2, 16.2, 17.4, 18.1, 18.3, 18, 17.9, 17.6,
17.9, 17.7, 17.7, 17.7, 17.8, 18.1, 18.3, 18.1, 16.2, 18,
18.8, 18.6, 19.1, 18.9, 17.9, 16.2, 17.3, 19.3, 20.2, 20.7,
20.9, 22.2, 22.3, 21.2, 21.1, 20.1, 23.3, 21.4, 20.2, 19.8,
18.9, 19.8, 20.1, 20.4, 19.5, 18.8, 18, 17.9, 17.9, 17.8,
18, 17.9, 16.5, 16.8, 16.5, 16.7, 16.7), Tmin = c(15.2, 15.2,
15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2,
15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2,
15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2,
15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2,
15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 16.2, 16.2, 16.2, 16.2,
16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2,
16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2,
16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2,
16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2,
16.2, 16.2, 16.2, 16.2), Tmed = c(17.1, 17.1, 17.1, 17.1,
17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1,
17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1,
17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1,
17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1,
17.1, 17.1, 17.1, 17.1, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333), Tmax = c(20.4, 20.4,
20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4,
20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4,
20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4,
20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4,
20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 23.3, 23.3, 23.3, 23.3,
23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3,
23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3,
23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3,
23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3,
23.3, 23.3, 23.3, 23.3)), row.names = c(NA, -96L), class = c("tbl_df",
"tbl", "data.frame"))
and I would like to add a column to tell me if temperature at given time is close to the daily minimum temperature.
The best way to do this seemed dplyr::between
function, and I tried to write it like this:
TimeTempReprod %>%
group_by(Date, Station) %>%
mutate(y = between(Temperature, Tmin, Tmin 2))
What I get in the console when I run this code is:
Error in `mutate()`:
! Problem while computing `y = dplyr::between(Temperature, Tmin, Tmin 2)`.
ℹ The error occurred in group 1: Date = 2021-10-15, Station = "F".
Caused by error in `dplyr::between()`:
! `left` must be length 1
I tried to look for answers to this problem but I couldn't find it related to the between
function in other places...
I hope this question is understandable, and I am sorry if it has problems. It is the first question I post to stackexchange after learning from it for two years now, so I still have to learn how to use it properly. Thanks to who will find the time to help me!
CodePudding user response:
You need to capture one value, and Tmin
is capturing the entire vector of values for each group, so to solve the problem you can use a function that takes out one value out of the vector. Since the vector is made of the same values, many functions can work, e.g. min
, or first
:
TimeTempReprod %>%
group_by(Date, Station) %>%
mutate(y = between(Temperature, min(Tmin), min(Tmin) 2))
gives out:
# A tibble: 96 × 8
# Groups: Date, Station [2]
Station Date Time Temperature Tmin Tmed Tmax y
<chr> <date> <time> <dbl> <dbl> <dbl> <dbl> <lgl>
1 F 2021-10-15 00:11:46 16.8 15.2 17.1 20.4 TRUE
2 F 2021-10-15 00:41:46 16.5 15.2 17.1 20.4 TRUE
3 F 2021-10-15 01:11:46 16.2 15.2 17.1 20.4 TRUE
4 F 2021-10-15 01:41:46 15.6 15.2 17.1 20.4 TRUE
5 F 2021-10-15 02:11:46 15.9 15.2 17.1 20.4 TRUE
6 F 2021-10-15 02:41:46 16.1 15.2 17.1 20.4 TRUE
7 F 2021-10-15 03:11:46 16.4 15.2 17.1 20.4 TRUE
8 F 2021-10-15 03:41:46 16.2 15.2 17.1 20.4 TRUE
9 F 2021-10-15 04:11:46 16 15.2 17.1 20.4 TRUE
10 F 2021-10-15 04:41:46 16 15.2 17.1 20.4 TRUE
# … with 86 more rows