My dataframe crisisdata consists of many vectors: year, country, a, b, c
I want to create lags and leads for a, b and c in a for loop:
My attempt:
crisis_variables <- c("a", "b", "c")
for (var in crisis_variables){
# add lags
crisisdata[, (paste0("l",1:4, "_", var)):= shift(as.name(var),1:4), by=country]
# add leads
crisisdata[, (paste0("f",0:4, "_", var)):= shift(as.name(var),0:-4), by=country]
But in every lag and lead variables I only have NA values.
CodePudding user response:
You could use the high flexibility of shift
function to generate all lags and leads at once as follow:
# create column names
n = -4:4
cols = paste0(ifelse(n>0, "l", "f"), abs(n))
cols = paste0(cols, "_", rep(crisis_variables, each=length(cols)))
# f4_a, f3_a, ..., l3_c, l4_c
# generate all lags and leads
crisisdata[, (cols) := shift(.SD, n), by=country, .SDcols=crisis_variables]
CodePudding user response:
The as.name
needs eval
to return the value of the column (assuming 'crisisdata' is data.table
)
library(data.table)
for (var in crisis_variables){
# add lags
crisisdata[, (paste0("l",1:4, "_", var)):=
shift(eval(as.name(var)),1:4), by = country]
# add leads
crisisdata[, (paste0("f",0:4, "_", var)):=
shift(eval(as.name(var)),0:-4), by = country]
}
CodePudding user response:
You can take my original approach, and wrap it in a flexible function:
f <- function(df, var, shifts, stem) {
df[, (paste0(stem, abs(shifts),"_",var)):= shift(get(var), shifts), by=country]
}
for(col in c("a","b", "c")) {
f(crisisdata, col, 1:4, "l")
f(crisisdata, col, -1:-4, "f")
}
Alternatively, you can do this with purrr::walk()
instead of a for loop:
purrr::walk(c("a","b","c"), \(v) {
coln = c(paste0("l",1:4,"_",v),paste0("f",1:4,"_",v))
crisisdata[, (coln):= shift(get(v),c(1:4, -1:-4)), by=country]
})
Output:
year country a b c l1_a l2_a l3_a l4_a f1_a f2_a f3_a f4_a l1_b l2_b
<int> <char> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
1: 2000 A -0.56047565 1.84386201 -0.6647694 NA NA NA NA -0.23017749 1.55870831 0.07050839 0.1292877 NA NA
2: 2001 A -0.23017749 -0.65194990 0.4854600 -0.56047565 NA NA NA 1.55870831 0.07050839 0.12928774 1.7150650 1.84386201 NA
3: 2002 A 1.55870831 0.23538657 -0.3756029 -0.23017749 -0.5604756 NA NA 0.07050839 0.12928774 1.71506499 0.4609162 -0.65194990 1.8438620
4: 2003 A 0.07050839 0.07796085 -0.5618764 1.55870831 -0.2301775 -0.5604756 NA 0.12928774 1.71506499 0.46091621 -1.2650612 0.23538657 -0.6519499
5: 2004 A 0.12928774 -0.96185663 -0.3439172 0.07050839 1.5587083 -0.2301775 -0.5604756 1.71506499 0.46091621 -1.26506123 -0.6868529 0.07796085 0.2353866
---
120: 2026 D -1.02412879 -1.13730362 -0.9454088 -0.84970435 -0.6407060 0.1056762 0.3011534 0.11764660 -0.94747461 -0.49055744 -0.2560922 1.49606067 -0.5021987
121: 2027 D 0.11764660 -0.17905159 0.8569230 -1.02412879 -0.8497043 -0.6407060 0.1056762 -0.94747461 -0.49055744 -0.25609219 NA -1.13730362 1.4960607
122: 2028 D -0.94747461 1.90236182 -0.4610383 0.11764660 -1.0241288 -0.8497043 -0.6407060 -0.49055744 -0.25609219 NA NA -0.17905159 -1.1373036
123: 2029 D -0.49055744 -0.10097489 2.4167734 -0.94747461 0.1176466 -1.0241288 -0.8497043 -0.25609219 NA NA NA 1.90236182 -0.1790516
124: 2030 D -0.25609219 -1.35984070 -1.6510489 -0.49055744 -0.9474746 0.1176466 -1.0241288 NA NA NA NA -0.10097489 1.9023618
l3_b l4_b f1_b f2_b f3_b f4_b l1_c l2_c l3_c l4_c f1_c f2_c f3_c f4_c
<num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
1: NA NA -0.65194990 0.23538657 0.07796085 -0.96185663 NA NA NA NA 0.48545998 -0.37560287 -0.56187636 -0.34391723
2: NA NA 0.23538657 0.07796085 -0.96185663 -0.07130809 -0.6647694 NA NA NA -0.37560287 -0.56187636 -0.34391723 0.09049665
3: NA NA 0.07796085 -0.96185663 -0.07130809 1.44455086 0.4854600 -0.6647694 NA NA -0.56187636 -0.34391723 0.09049665 1.59850877
4: 1.8438620 NA -0.96185663 -0.07130809 1.44455086 0.45150405 -0.3756029 0.4854600 -0.6647694 NA -0.34391723 0.09049665 1.59850877 -0.08856511
5: -0.6519499 1.8438620 -0.07130809 1.44455086 0.45150405 0.04123292 -0.5618764 -0.3756029 0.4854600 -0.6647694 0.09049665 1.59850877 -0.08856511 1.08079950
---
120: -0.7886220 -0.7815365 -0.17905159 1.90236182 -0.10097489 -1.35984070 0.3781678 -0.2097932 0.8176594 1.0246732 0.85692301 -0.46103834 2.41677335 -1.65104890
121: -0.5021987 -0.7886220 1.90236182 -0.10097489 -1.35984070 NA -0.9454088 0.3781678 -0.2097932 0.8176594 -0.46103834 2.41677335 -1.65104890 NA
122: 1.4960607 -0.5021987 -0.10097489 -1.35984070 NA NA 0.8569230 -0.9454088 0.3781678 -0.2097932 2.41677335 -1.65104890 NA NA
123: -1.1373036 1.4960607 -1.35984070 NA NA NA -0.4610383 0.8569230 -0.9454088 0.3781678 -1.65104890 NA NA NA
124: -0.1790516 -1.1373036 NA NA NA NA 2.4167734 -0.4610383 0.8569230 -0.9454088 NA NA NA NA
Input:
set.seed(123)
crisisdata = data.frame(
year = rep(2000:2030, 4),
country = rep(LETTERS[1:4], each=31),
a = rnorm(124),
b = rnorm(124),
c = rnorm(124)
)