I have a data with entry and exit dates per individual. My data looks like this:
ID entry exit
1 1 01.01.2010 31/01/2016
2 2 01/02/2016 01/01/2021
3 3 01/10/2010 30/09/2019
4 4 01.09.2013 31/12/2015
5 5 01/01/2016 30/09/2020
I would like to create extra columns, in which I would like to identify if the individual was active (so between the dates of entry and exit) to a given date.
The data should look something like this:
ID entry exit active_31_12_2011 active_31_12_2016
1 1 01/01/2010 31/01/2016 yes no
2 2 01/02/2016 01/01/2021 no yes
3 3 01/10/2010 30/09/2019 yes yes
4 4 01/09/2013 31/12/2015 no no
5 5 01/01/2016 30/09/2020 no yes
Does anyone has an idea how I could do this?
CodePudding user response:
Using data.table::between
df %>%
mutate(entry = as.Date(entry, format = "%d/%m/%Y"),
exit = as.Date(exit, format = "%d/%m/%Y")) %>%
rowwise %>%
mutate(active_31_12_2011 = between(as.Date("2011/12/31"),lower = entry, upper = exit),
active_31_12_2016 = between(as.Date("2016/12/31"),lower = entry, upper = exit))
ID entry exit active_31_12_2011 active_31_12_2016
<int> <date> <date> <lgl> <lgl>
1 1 2010-01-01 2016-01-31 TRUE FALSE
2 2 2016-02-01 2021-01-01 FALSE TRUE
3 3 2010-10-01 2019-09-30 TRUE TRUE
4 4 2013-09-01 2015-12-31 FALSE FALSE
5 5 2016-01-01 2020-09-30 FALSE TRUE
To get yes/no output,
df %>%
mutate(entry = as.Date(entry, format = "%d/%m/%Y"),
exit = as.Date(exit, format = "%d/%m/%Y")) %>%
rowwise %>%
mutate(active_31_12_2011 = ifelse(between(as.Date("2011/12/31"),lower = entry, upper = exit), "Yes", "No"),
active_31_12_2016 = ifelse(between(as.Date("2016/12/31"),lower = entry, upper = exit), "Yes", "No"))
ID entry exit active_31_12_2011 active_31_12_2016
<int> <date> <date> <chr> <chr>
1 1 2010-01-01 2016-01-31 Yes No
2 2 2016-02-01 2021-01-01 No Yes
3 3 2010-10-01 2019-09-30 Yes Yes
4 4 2013-09-01 2015-12-31 No No
5 5 2016-01-01 2020-09-30 No Yes
CodePudding user response:
When the dates are dates
or are formatted like 2010-01-01 you can use ==
, >
, <
, >=
or <=
.
x$active_31_12_2011 <- c("no", "yes")[1 (x$entry <= "2011-12-31" & "2011-12-31" <= x$exit)]
x$active_31_12_2016 <- c("no", "yes")[1 (x$entry <= "2016-12-31" & "2016-12-31" <= x$exit)]
x
# ID entry exit active_31_12_2011 active_31_12_2016
#1 1 2010-01-01 2016-01-31 yes no
#2 2 2016-02-01 2021-01-01 no yes
#3 3 2010-10-01 2019-09-30 yes yes
#4 4 2013-09-01 2015-12-31 no no
#5 5 2016-01-01 2020-09-30 no yes
Data:
x <- read.table(header=TRUE, text="ID entry exit
1 1 01/01/2010 31/01/2016
2 2 01/02/2016 01/01/2021
3 3 01/10/2010 30/09/2019
4 4 01/09/2013 31/12/2015
5 5 01/01/2016 30/09/2020")
x$entry <- as.Date(x$entry, "%d/%m/%Y")
x$exit <- as.Date(x$exit, "%d/%m/%Y")
CodePudding user response:
This is how to do it using lubridate
:
library(tidyverse)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
data <- tribble(
~ID, ~entry, ~exit,
1, "01/01/2010", "31/01/2016",
2, "01/02/2016", "01/01/2021",
3, "01/10/2010", "30/09/2019",
4, "01/09/2013", "31/12/2015",
5, "01/01/2016", "30/09/2020"
)
data %>%
mutate_at(c("entry", "exit"), ~ .x %>% parse_date(format = "%d/%m/%Y")) %>%
mutate(
interval = interval(entry, exit),
active_31_12_2011 = ymd("2011-12-31") %within% interval,
active_31_12_2016 = ymd("2016-12-31") %within% interval
)
#> # A tibble: 5 x 6
#> ID entry exit interval active_31_12_2011
#> <dbl> <date> <date> <Interval> <lgl>
#> 1 1 2010-01-01 2016-01-31 2010-01-01 UTC--2016-01-31 UTC TRUE
#> 2 2 2016-02-01 2021-01-01 2016-02-01 UTC--2021-01-01 UTC FALSE
#> 3 3 2010-10-01 2019-09-30 2010-10-01 UTC--2019-09-30 UTC TRUE
#> 4 4 2013-09-01 2015-12-31 2013-09-01 UTC--2015-12-31 UTC FALSE
#> 5 5 2016-01-01 2020-09-30 2016-01-01 UTC--2020-09-30 UTC FALSE
#> # … with 1 more variable: active_31_12_2016 <lgl>
Created on 2021-10-06 by the reprex package (v2.0.1)
CodePudding user response:
You could voluntarily coerce the dates to integers and use data.table::between
.
dat_n <- as.data.frame(lapply(dat, as.integer))
library(data.table)
act <- t(apply(dat_n[-1], 1, \(z) between(as.numeric(x), z[1], z[2]))) |>
`colnames<-`(paste0('active_', x))
res <- cbind(dat, act)
res
# ID entry exit active_2011-12-31 active_2016-12-31
# 1 1 2010-01-01 2016-01-31 TRUE FALSE
# 2 2 2016-02-01 2021-01-01 FALSE TRUE
# 3 3 2010-10-01 2019-09-30 TRUE TRUE
# 4 4 2013-09-01 2015-12-31 FALSE FALSE
# 5 5 2016-01-01 2020-09-30 FALSE TRUE
Data
dat <- structure(list(ID = 1:5, entry = structure(c(14610, 16832, 14883,
15949, 16801), class = "Date"), exit = structure(c(16831, 18628,
18169, 16800, 18535), class = "Date")), row.names = c("1", "2",
"3", "4", "5"), class = "data.frame")
x <- c(as.Date('2011-12-31'), as.Date('2016-12-31'))