I have the following table :
Date | Values | Days |
---|---|---|
12/28/2020 | 0.485002 | 0 |
12/29/2020 | 0.385427 | 1 |
12/30/2020 | 0.323091 | 2 |
12/31/2020 | 0.19967 | 3 |
1/4/2021 | 0.009859 | 4 |
1/5/2021 | -0.15653 | 0 |
1/6/2021 | -0.26286 | 1 |
1/7/2021 | -0.18659 | 2 |
1/8/2021 | -0.18207 | 3 |
1/22/2021 | 1.077845 | 0 |
1/25/2021 | 1.300135 | 1 |
1/26/2021 | 1.390824 | 2 |
1/27/2021 | 1.256247 | 3 |
1/28/2021 | 1.09526 | 4 |
The column "Days" shows the number of days since the last time the "Values" sign switched from positive to negative and vice versa.
How can I create the column "Days" using "dplyr mutate" in R ?
Thanks.
CodePudding user response:
If using both dplyr
and data.table
allowed,
Data
df <- read.table(text = "Date Values Days
12/28/2020 0.485002 0
12/29/2020 0.385427 1
12/30/2020 0.323091 2
12/31/2020 0.19967 3
1/4/2021 0.009859 4
1/5/2021 -0.15653 0
1/6/2021 -0.26286 1
1/7/2021 -0.18659 2
1/8/2021 -0.18207 3
1/22/2021 1.077845 0
1/25/2021 1.300135 1
1/26/2021 1.390824 2
1/27/2021 1.256247 3
1/28/2021 1.09526 4", header = T)
Code
library(data.table)
library(dplyr)
df %>%
select(-Days) %>% # This line may not needed if Days is not at your data at the beginning
mutate(key = sign(Values) == sign(lag(Values)),
key = ifelse(!key | is.na(key), 0, 1)) %>%
data.table() %>%
.[,Days := cumsum(key), by = rleid(key)] %>%
select(-key)
Date Values days
1: 12/28/2020 0.485002 0
2: 12/29/2020 0.385427 1
3: 12/30/2020 0.323091 2
4: 12/31/2020 0.199670 3
5: 1/4/2021 0.009859 4
6: 1/5/2021 -0.156530 0
7: 1/6/2021 -0.262860 1
8: 1/7/2021 -0.186590 2
9: 1/8/2021 -0.182070 3
10: 1/22/2021 1.077845 0
11: 1/25/2021 1.300135 1
12: 1/26/2021 1.390824 2
13: 1/27/2021 1.256247 3
14: 1/28/2021 1.095260 4
CodePudding user response:
We can use rleid
from the data.table
package to create a grouping variable, and then create the count of days.
library(dplyr)
library(data.table)
dat2 <- dat %>%
mutate(G = rleid(Values > 0)) %>%
group_by(G) %>%
mutate(Days = 0:(n() - 1)) %>%
ungroup() %>%
select(-G)
dat2
# # A tibble: 14 x 3
# Date Values Days
# <chr> <dbl> <int>
# 1 12/28/2020 0.485 0
# 2 12/29/2020 0.385 1
# 3 12/30/2020 0.323 2
# 4 12/31/2020 0.200 3
# 5 1/4/2021 0.00986 4
# 6 1/5/2021 -0.157 0
# 7 1/6/2021 -0.263 1
# 8 1/7/2021 -0.187 2
# 9 1/8/2021 -0.182 3
# 10 1/22/2021 1.08 0
# 11 1/25/2021 1.30 1
# 12 1/26/2021 1.39 2
# 13 1/27/2021 1.26 3
# 14 1/28/2021 1.10 4
Data
dat <- read.table(text = "Date Values
'12/28/2020' 0.485002
'12/29/2020' 0.385427
'12/30/2020' 0.323091
'12/31/2020' 0.19967
'1/4/2021' 0.009859
'1/5/2021' -0.15653
'1/6/2021' -0.26286
'1/7/2021' -0.18659
'1/8/2021' -0.18207
'1/22/2021' 1.077845
'1/25/2021' 1.300135
'1/26/2021' 1.390824
'1/27/2021' 1.256247
'1/28/2021' 1.09526",
header = TRUE)