I have a list of names with multiple dates, I want to filter only by dates within my period of interest. i have a df like this
Name Date
Luis 01/05/22
Luis 02/05/22
Luis 03/05/22
Luis 04/05/22
Luis 05/05/22
Pedro 01/05/22
Pedro 02/05/22
Pedro 03/05/22
Pedro 04/05/22
Pedro 05/05/22
Pedro 06/05/22
Pedro 07/05/22
Pedro 08/05/22
Pedro 09/05/22
Pedro 10/05/22
John 03/05/22
John 04/05/22
John 05/05/22
John 06/05/22
John 07/05/22
John 08/05/22
Ingrid 01/05/22
Ingrid 02/05/22
Ingrid 03/05/22
Ingrid 04/05/22
Ingrid 05/05/22
and another with time periods:
Name Onset End
Luis 01/05/22 03/05/22
Pedro 04/05/22 10/05/22
John 03/05/22 06/05/22
Ingrid 01/05/22 04/05/22
and I expect a result already filtered like the following:
Name Date
Luis 01/05/22
Luis 02/05/22
Luis 03/05/22
Pedro 04/05/22
Pedro 05/05/22
Pedro 06/05/22
Pedro 07/05/22
Pedro 08/05/22
Pedro 09/05/22
Pedro 10/05/22
John 03/05/22
John 04/05/22
John 05/05/22
John 06/05/22
Ingrid 01/05/22
Ingrid 02/05/22
Ingrid 03/05/22
Ingrid 04/05/22
CodePudding user response:
Convert the 'date' columns to Date
class, do a join by 'Name' and filter
after creating a logical expression with <=
, >=
library(dplyr)
library(lubridate)
df1 %>%
left_join(df2) %>%
filter(dmy(Date) >= dmy(Onset), dmy(Date) <= dmy(End)) %>%
select(-c(Onset, End))
-output
Name Date
1 Luis 01/05/22
2 Luis 02/05/22
3 Luis 03/05/22
4 Pedro 04/05/22
5 Pedro 05/05/22
6 Pedro 06/05/22
7 Pedro 07/05/22
8 Pedro 08/05/22
9 Pedro 09/05/22
10 Pedro 10/05/22
11 John 03/05/22
12 John 04/05/22
13 John 05/05/22
14 John 06/05/22
15 Ingrid 01/05/22
16 Ingrid 02/05/22
17 Ingrid 03/05/22
18 Ingrid 04/05/22
data
df1 <- structure(list(Name = c("Luis", "Luis", "Luis", "Luis", "Luis",
"Pedro", "Pedro", "Pedro", "Pedro", "Pedro", "Pedro", "Pedro",
"Pedro", "Pedro", "Pedro", "John", "John", "John", "John", "John",
"John", "Ingrid", "Ingrid", "Ingrid", "Ingrid", "Ingrid"), Date = c("01/05/22",
"02/05/22", "03/05/22", "04/05/22", "05/05/22", "01/05/22", "02/05/22",
"03/05/22", "04/05/22", "05/05/22", "06/05/22", "07/05/22", "08/05/22",
"09/05/22", "10/05/22", "03/05/22", "04/05/22", "05/05/22", "06/05/22",
"07/05/22", "08/05/22", "01/05/22", "02/05/22", "03/05/22", "04/05/22",
"05/05/22")), class = "data.frame", row.names = c(NA, -26L))
df2 <- structure(list(Name = c("Luis", "Pedro", "John", "Ingrid"),
Onset = c("01/05/22",
"04/05/22", "03/05/22", "01/05/22"), End = c("03/05/22", "10/05/22",
"06/05/22", "04/05/22")), class = "data.frame", row.names = c(NA,
-4L))