The code below works fine, but I'd like to create another variable called JOV
after SPV
. This variable would have some condition as follows:
If I have "Category
","Week"
and "DTT"
in group_cols
, do:
SPV %>% filter(date2 == dmda, Category == CategoryChosse, DTT==DTest)
If I have "Category"
and "Week"
in group_cols
, do:
SPV %>% filter(date2 == dmda, Category == CategoryChosse)
If I only have "Category"
in group_cols
, do:
SPV %>% filter(date2 == dmda)
Executable code below
library(dplyr)
library(tidyverse)
library(lubridate)
df1 <- structure(
list(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28"),
date2 = c("2021-06-23","2021-06-24","2021-06-30","2021-07-01"),
DTT= c("Hol","Hol","Hol",0),
Week= c("Wednesday","Thursday","Wednesday","Thursday"),
Category = c("ABC","FDE","ABC","FDE"),
DR1 = c(4,1,1,2),
DR01 = c(4,1,2,3), DR02= c(4,2,0,2),DR03= c(9,5,0,1),
DR04 = c(5,4,3,2),DR05 = c(5,4,0,2)),
class = "data.frame", row.names = c(NA, -4L))
dmda<-"2021-07-01"
CategoryChosse<-"FDE"
DTest<-"Hol"
Wk<-"Thursday"
Dx<-subset(df1,df1$date2<df1$date1)
x<-Dx %>% select(starts_with("DR0"))
x<-cbind(Dx, setNames(Dx$DR1 - x, paste0(names(x), "_PV")))
PV<-select(x, date2,Week, Category, DTT, DR1, ends_with("PV"))
group_cols <-
if (any(PV$DTT == DTest & PV$Week == Wk, na.rm = TRUE)) {
c("Category", "Week", "DTT")
} else if (any(PV$Week == Wk & PV$Category == CategoryChosse & PV$DTT != DTest, na.rm=TRUE)) {
c("Category", "Week")
} else {
"Week"
}
med <- PV %>%
group_by(across(all_of(group_cols))) %>%
summarize(across(ends_with("PV"), median),.groups = 'drop')
SPV <- df1 %>%
inner_join(med, by = group_cols) %>%
mutate(across(matches("^DR0\\d $"), ~.x
get(paste0(cur_column(), '_PV')),
.names = '{col}_{col}_PV')) %>%
select(date1:Category, DR01_DR01_PV:last_col())
CodePudding user response:
Try:
SPV %>%
filter(
date2 == dmda,
!("Category" %in% group_cols) | Category == CategoryChosse,
!all(c("Category", "DTT") %in% group_cols) | DTT == DTest
)
That's a literal translation of your conditions. However, if I'm reading it right, it can be simplified a little with
SPV %>%
filter(
date2 == dmda,
!("Category" %in% group_cols) | Category == CategoryChosse,
!("DTT" %in% group_cols) | DTT == DTest
)
if you ever imagine allowing "DTT"
and not "Category"
in your group_cols
. (This works even if that will never happen.)
CodePudding user response:
You can store the different cases in a list and just pull the elements as needed.
- Change this function to return vector and list indices.
filter_condition <-
if (any(PV$DTT == DTest & PV$Week == Wk, na.rm = TRUE)) {
1:3 # if you had options out of order, you could have something like c(1, 3)
} else if (any(PV$Week == Wk & PV$Category == CategoryChosse & PV$DTT != DTest, na.rm=TRUE)) {
1:2
} else {
1
}
- Create the vectors and lists for grouping and filtering.
group_cols <- c("Week", "Category", "DTT")
filter_opts <- rlang::exprs(date2 == dmda,
Category == CategoryChosse,
DTT == DTest)
- Change your
group_by()
to pull from the variable based on which case you are in.
med <- PV %>%
group_by(across(all_of(group_cols[filter_condition]))) %>%
summarize(across(ends_with("PV"), median),.groups = 'drop')
- Same code as before.
inner_join(med, by = group_cols) %>%
mutate(across(matches("^DR0\\d $"), ~.x
get(paste0(cur_column(), '_PV')),
.names = '{col}_{col}_PV')) %>%
select(date1:Category, DR01_DR01_PV:last_col())
- Apply the filter conditions based on which case you are in.
SPV %>%
filter(!!!filter_opts[filter_condition])
As mentioned in another answer, you can shorten this since it seems like date2
is always desired. But this draws up a bit of a framework for things like this.