I have data as follows:
library(data.table)
set.seed(1)
year = c(rep(2000,5), rep(2001,5), rep(2002,5), rep(2003,5), rep(2004,5))
DT <- data.table(panelID = sample(10,10),
some_type = as.factor(sample(0:5, 6)),
some_other_type = as.factor(sample(0:5, 6)),
Group = c(rep(1,20),rep(2,20),rep(3,20),rep(4,20),rep(5,20)),
wt = 15*round(runif(100)/10,2),
Income = round(rnorm(10,-5,5),2),
Income_proxy = round(rnorm(10,-6,6),2),
year = rep(year,4),
Happiness = sample(10,10),
Sex = round(rnorm(10,0.75,0.3),2),
Age = sample(100,100),
Height= 150*round(rnorm(10,0.75,0.3),2))
I am trying to write a function that automatically creates certain calculations, just by providing the grouping variables.
calulate_relative_dev <- function(DT, varA="Income", varB="Income_proxy", groups, years=NULL) {
if (is.null(years)) {
out_names <- paste0("rel_deviation_", groups[i])
for (i in seq_along(groups)) {
setDT(DT)[, (out_names[i]) := 100*mean((varA - varB) / varA), by=eval(groups[i])]
}
} else if (!is.null(years))
out_names <- paste0("rel_deviation_", groups[i], years[i])
for (i in seq_along(groups)) {
for (j in seq_along(years)) {
setDT(DT)[, (out_names[i]) := 100*mean((varA - varB) / varA), by=eval(groups[i], years[i])]
}
}
}
In order to do:
calulate_relative_dev(DT, groups = c("Group","some_type"))
and
calulate_relative_dev(DT, groups = c("Group","some_type"), years=year))
But when I do, I get the following error:
Error in h(simpleError(msg, call)) :
error in evaluating the argument 'x' in selecting a method for function 'mean': object 'Income' not found
Called from: h(simpleError(msg, call))
If I try to put Income
in quotes, I get:
Error in h(simpleError(msg, call)) :
error in evaluating the argument 'x' in selecting a method for function 'mean': non-numeric argument to binary operator
Called from: h(simpleError(msg, call))
How should I write the syntax here?
CodePudding user response:
Based on your comment/reply to my question, I understand years
is intended to be a logical. Here is one possible function:
calulate_relative_dev <- function(DT, varA="Income", varB="Income_proxy", groups, year=FALSE) {
dt <- copy(DT)
setnames(dt, old = c(varA, varB), new = c("varA", "varB"))
for (i in seq_along(groups)) {
out_names <- paste0("rel_deviation_", groups)
if(year) out_names <- paste0(out_names, "_by_year")
dt[, c(out_names[i]) := 100*mean((varA - varB) / varA), by=c(groups[i], if(year){"year"})]
}
setnames(dt, old = c("varA", "varB"), new = c(varA, varB))
return(dt[])
}
calulate_relative_dev(DT, groups = c("Group","some_type"))
calulate_relative_dev(DT, groups = c("Group","some_type"), year=TRUE)
I did temporary renames to make the data.table
code simpler to read/write. Returning dt[]
ensures the data.table
is printed after the function is evaluated.
CodePudding user response:
Here is a slighly simpler approach to this function:
calculate_relative_dev <- function(DT, varA="Income", varB="Income_proxy", groups, year=FALSE) {
for(g in groups) {
vname = paste0(g,"_rel_dev")
if(year) {g=c(g,"year");vname = paste0(vname,"_by_yr")}
DT[, c(vname):=100*mean((get(varA) - get(varB))/get(varA)),g]
}
DT[]
}