Home > Blockchain >  Adjust code that uses data.table function
Adjust code that uses data.table function

Time:04-20

The code below uses the data.table function to generate an output table. However, I would like to know if it is possible to optimize the code somehow and still get the same result? The idea is to reduce the code in order to decrease the processing time.

library(dplyr)
library(tidyr)
library(lubridate)
library(data.table)

df1 <- structure(
  list(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28",
                "2021-06-28","2021-06-28","2021-06-28"),
       date2 = c("2021-06-25","2021-06-25","2021-06-27","2021-07-07","2021-07-07","2021-07-09","2021-07-09","2021-07-09"),
       Code = c("FDE","ABC","ABC","ABC","CDE","FGE","ABC","CDE"),
       Week= c("Wednesday","Wednesday","Friday","Wednesday","Wednesday","Friday","Friday","Friday"),
       DR1 = c(4,1,4,3,6,4,3,5),
       DR01 = c(4,1,4,3,3,4,3,6), DR02= c(4,2,6,7,3,2,7,4),DR03= c(9,5,4,3,3,2,1,5),
       DR04 = c(5,4,3,3,6,2,1,9),DR05 = c(5,4,5,3,6,2,1,9),
       DR06 = c(2,4,3,3,5,6,7,8),DR07 = c(2,5,4,4,9,4,7,8),
       DR08 = c(4,0,0,1,2,4,4,4),DR09 = c(2,5,4,4,9,4,7,8),DR010 = c(2,5,4,4,9,4,7,8),DR011 = c(4,7,3,2,2,7,7,7), 
       DR012 = c(4,4,2,3,0,4,4,5),DR013 = c(4,4,1,4,0,3,2,0),DR014 = c(0,3,1,2,0,2,NA,NA)),
  class = "data.frame", row.names = c(NA, -8L))

selection = startsWith(names(df1), "DR0")

df1[selection][is.na(df1[selection])] = 0

dt1 <- as.data.table(df1)

cols <- grep("^DR0", colnames(dt1), value = TRUE)

medi_ana <- 
  dt1[, (paste0(cols, "_PV")) := DR1 - .SD, .SDcols = cols
  ][, lapply(.SD, median), by = .(Code, Week), .SDcols = paste0(cols, "_PV") ]

f1 <- function(nm, pat) grep(pat, nm, value = TRUE)
nm1 <- f1(names(df1), "^DR0\\d $")
nm2 <- f1(names(medi_ana), "_PV")
nm3 <- paste0("i.", nm2)
setDT(df1)[medi_ana,  (nm2) := Map(` `, mget(nm1), mget(nm3)), on = .(Code, Week)]
SPV1 <- df1[, c('date1', 'date2', 'Code', 'Week', nm2), with = FALSE]

dmda<-"2021-07-09"
code<-"CDE"

SPV2<-melt(SPV1[date2 == dmda & Code == code][, 
   lapply(.SD, sum, na.rm = TRUE), by = Code, 
   .SDcols = patterns("^DR0")],
    id.var = "Code", variable.name = "name", value.name = "val")[, 
      name := readr::parse_number(as.character(name))][]

 > SPV2
    Code name val
 1:  CDE    1   5
 2:  CDE    2   5
 3:  CDE    3   5
 4:  CDE    4   5
 5:  CDE    5   5
 6:  CDE    6   5
 7:  CDE    7   5
 8:  CDE    8   5
 9:  CDE    9   5
10:  CDE   10   5
11:  CDE   11   5
12:  CDE   12   5
13:  CDE   13   5
14:  CDE   14   5

result <- SPV2 %>% 
    group_by(Code) %>% 
    slice((as.Date(dmda) - min(as.Date(df1$date1) [
      df1$Code == first(Code)])):max(name) 1) %>%
    ungroup

> result

# A tibble: 3 x 3
  Code   name   val
  <chr> <dbl> <dbl>
1 CDE      12     5
2 CDE      13     5
3 CDE      14     5

CodePudding user response:

The dplyr code can be converted to data.table as

SPV2[na.omit(SPV2[, .I[(as.Date(dmda) - min(as.Date(df1$date1) [ 
         df1$Code == first(Code)])):max(name) 1], .(Code)]$V1)]

-output

    Code  name   val
   <char> <num> <num>
1:    CDE    12     5
2:    CDE    13     5
3:    CDE    14     5
  • Related