The code below uses the data.table
function to generate an output table. However, I would like to know if it is possible to optimize the code somehow and still get the same result? The idea is to reduce the code in order to decrease the processing time.
library(dplyr)
library(tidyr)
library(lubridate)
library(data.table)
df1 <- structure(
list(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28",
"2021-06-28","2021-06-28","2021-06-28"),
date2 = c("2021-06-25","2021-06-25","2021-06-27","2021-07-07","2021-07-07","2021-07-09","2021-07-09","2021-07-09"),
Code = c("FDE","ABC","ABC","ABC","CDE","FGE","ABC","CDE"),
Week= c("Wednesday","Wednesday","Friday","Wednesday","Wednesday","Friday","Friday","Friday"),
DR1 = c(4,1,4,3,6,4,3,5),
DR01 = c(4,1,4,3,3,4,3,6), DR02= c(4,2,6,7,3,2,7,4),DR03= c(9,5,4,3,3,2,1,5),
DR04 = c(5,4,3,3,6,2,1,9),DR05 = c(5,4,5,3,6,2,1,9),
DR06 = c(2,4,3,3,5,6,7,8),DR07 = c(2,5,4,4,9,4,7,8),
DR08 = c(4,0,0,1,2,4,4,4),DR09 = c(2,5,4,4,9,4,7,8),DR010 = c(2,5,4,4,9,4,7,8),DR011 = c(4,7,3,2,2,7,7,7),
DR012 = c(4,4,2,3,0,4,4,5),DR013 = c(4,4,1,4,0,3,2,0),DR014 = c(0,3,1,2,0,2,NA,NA)),
class = "data.frame", row.names = c(NA, -8L))
selection = startsWith(names(df1), "DR0")
df1[selection][is.na(df1[selection])] = 0
dt1 <- as.data.table(df1)
cols <- grep("^DR0", colnames(dt1), value = TRUE)
medi_ana <-
dt1[, (paste0(cols, "_PV")) := DR1 - .SD, .SDcols = cols
][, lapply(.SD, median), by = .(Code, Week), .SDcols = paste0(cols, "_PV") ]
f1 <- function(nm, pat) grep(pat, nm, value = TRUE)
nm1 <- f1(names(df1), "^DR0\\d $")
nm2 <- f1(names(medi_ana), "_PV")
nm3 <- paste0("i.", nm2)
setDT(df1)[medi_ana, (nm2) := Map(` `, mget(nm1), mget(nm3)), on = .(Code, Week)]
SPV1 <- df1[, c('date1', 'date2', 'Code', 'Week', nm2), with = FALSE]
dmda<-"2021-07-09"
code<-"CDE"
SPV2<-melt(SPV1[date2 == dmda & Code == code][,
lapply(.SD, sum, na.rm = TRUE), by = Code,
.SDcols = patterns("^DR0")],
id.var = "Code", variable.name = "name", value.name = "val")[,
name := readr::parse_number(as.character(name))][]
> SPV2
Code name val
1: CDE 1 5
2: CDE 2 5
3: CDE 3 5
4: CDE 4 5
5: CDE 5 5
6: CDE 6 5
7: CDE 7 5
8: CDE 8 5
9: CDE 9 5
10: CDE 10 5
11: CDE 11 5
12: CDE 12 5
13: CDE 13 5
14: CDE 14 5
result <- SPV2 %>%
group_by(Code) %>%
slice((as.Date(dmda) - min(as.Date(df1$date1) [
df1$Code == first(Code)])):max(name) 1) %>%
ungroup
> result
# A tibble: 3 x 3
Code name val
<chr> <dbl> <dbl>
1 CDE 12 5
2 CDE 13 5
3 CDE 14 5
CodePudding user response:
The dplyr
code can be converted to data.table
as
SPV2[na.omit(SPV2[, .I[(as.Date(dmda) - min(as.Date(df1$date1) [
df1$Code == first(Code)])):max(name) 1], .(Code)]$V1)]
-output
Code name val
<char> <num> <num>
1: CDE 12 5
2: CDE 13 5
3: CDE 14 5