DATA BELOW
library(fixest)
analysis<-tibble(off_race = c("hispanic", "hispanic", "white","white", "hispanic", "white", "hispanic", "white", "white", "white","hispanic"), any_black_uof = c(FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), any_black_arrest = c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), prop_white_scale = c(0.866619646027524, -1.14647499712298, 1.33793539994219, 0.593565300512359, -0.712819809606193, 0.3473585867755, -1.37025501425243, 1.16596624239715, 0.104521426674564, 0.104521426674564, -1.53728347122581), prop_hisp_scale=c(-0.347382203637802, 1.54966785579018,
-0.833021026477168, -0.211470492567308, 1.48353691981021,
0.421968013870802, 2.63739845069911, -0.61002505397242, 0.66674880256898,0.66674880256898, 2.93190487813111))
I would like to run a series of regressions that iterate over these vectors
officer_race = c("black", "white", "hispanic")
primary_ind<-c("prop_white_scale","prop_hisp_scale","prop_black_scale")
outcome<-c("any_black_uof","any_white_uof","any_hisp_uof","any_black_arrest","any_white_arrest","any_hisp_arrest","any_black_stop","any_white_stop","any_hisp_stop")
Also of note, I would like to use the fixest package where the regressions would look like this
feols(any_black_uof~ prop_white_scale,data=analysis[analysis$off_race =="black"])
feols(any_black_uof~ prop_white_scale,data=analysis[analysis$off_race =="white"])
feols(any_black_uof~ prop_white_scale,data=analysis[analysis$off_race=="hispanic"])
feols(any_black_uof~ prop_hisp_scale,data=analysis[analysis$off_race =="black"])
feols(any_black_uof~ prop_hisp_scale,data=analysis[analysis$off_race =="white"])
etc. iterating through all possible combinations and creating a list of fixest regression objects.
I tried this and it works with the lm() function. Yet, it breaks with feols.
result <- lapply(split(analysis, analysis$off_race), function(x) {
sapply(primary_ind2, function(y) {
sapply(outcome2, function(z) {
feols(paste(y, z, sep = "~"), x)
}, simplify = FALSE)
}, simplify = FALSE)
})
CodePudding user response:
The reason lm()
works is because we can supply formulas as strings (probably because its such a common mistake to make?) so it works with paste()
.
Usually we use reformulate()
to create formulas based on strings and with this feols()
will work.
I changed your model input data a little because we only have a fraction of your data and your model inputs contain column names that are not present in the example data.
library(fixest)
library(tibble)
officer_race <- c("black", "white", "hispanic")
primary_ind <-c("prop_white_scale","prop_hisp_scale")
outcome <- c("any_black_uof","any_black_arrest")
result <- lapply(split(analysis, analysis$off_race), function(x) {
sapply(primary_ind, function(y) {
sapply(outcome, function(z) {
feols(reformulate(y, z), x)
}, simplify = FALSE)
}, simplify = FALSE)
})
#> The variable 'any_black_uofTRUE' has been removed because of collinearity (see $collin.var).
#> The variable 'any_black_uofTRUE' has been removed because of collinearity (see $collin.var).
#> The variable 'any_black_uofTRUE' has been removed because of collinearity (see $collin.var).
#> The variable 'any_black_arrestTRUE' has been removed because of collinearity (see $collin.var).
#> The variable 'any_black_uofTRUE' has been removed because of collinearity (see $collin.var).
#> The variable 'any_black_arrestTRUE' has been removed because of collinearity (see $collin.var).
result
#> $hispanic
#> $hispanic$prop_white_scale
#> $hispanic$prop_white_scale$any_black_uof
#> OLS estimation, Dep. Var.: prop_white_scale
#> Observations: 5
#> Standard-errors: IID
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -0.780043 0.434284 -1.79616 0.14689
#> ... 1 variable was removed because of collinearity (any_black_uofTRUE)
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> RMSE: 0.868568
#>
#> $hispanic$prop_white_scale$any_black_arrest
#> OLS estimation, Dep. Var.: prop_white_scale
#> Observations: 5
#> Standard-errors: IID
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -1.19171 0.178578 -6.67332 0.0068613 **
#> any_black_arrestTRUE 2.05833 0.399313 5.15468 0.0141561 *
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> RMSE: 0.276652 Adj. R2: 0.864731
#>
#>
#> $hispanic$prop_hisp_scale
#> $hispanic$prop_hisp_scale$any_black_uof
#> OLS estimation, Dep. Var.: prop_hisp_scale
#> Observations: 5
#> Standard-errors: IID
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 1.65103 0.576435 2.8642 0.045735 *
#> ... 1 variable was removed because of collinearity (any_black_uofTRUE)
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> RMSE: 1.15287
#>
#> $hispanic$prop_hisp_scale$any_black_arrest
#> OLS estimation, Dep. Var.: prop_hisp_scale
#> Observations: 5
#> Standard-errors: IID
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 2.15063 0.371203 5.79366 0.010230 *
#> any_black_arrestTRUE -2.49801 0.830036 -3.00952 0.057234 .
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> RMSE: 0.575066 Adj. R2: 0.668248
#>
#>
#>
#> $white
#> $white$prop_white_scale
#> $white$prop_white_scale$any_black_uof
#> OLS estimation, Dep. Var.: prop_white_scale
#> Observations: 6
#> Standard-errors: IID
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 0.608978 0.217505 2.79984 0.038001 *
#> ... 1 variable was removed because of collinearity (any_black_uofTRUE)
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> RMSE: 0.486355
#>
#> $white$prop_white_scale$any_black_arrest
#> OLS estimation, Dep. Var.: prop_white_scale
#> Observations: 6
#> Standard-errors: IID
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 0.608978 0.217505 2.79984 0.038001 *
#> ... 1 variable was removed because of collinearity (any_black_arrestTRUE)
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> RMSE: 0.486355
#>
#>
#> $white$prop_hisp_scale
#> $white$prop_hisp_scale$any_black_uof
#> OLS estimation, Dep. Var.: prop_hisp_scale
#> Observations: 6
#> Standard-errors: IID
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 0.016825 0.269335 0.062468 0.95261
#> ... 1 variable was removed because of collinearity (any_black_uofTRUE)
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> RMSE: 0.602251
#>
#> $white$prop_hisp_scale$any_black_arrest
#> OLS estimation, Dep. Var.: prop_hisp_scale
#> Observations: 6
#> Standard-errors: IID
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 0.016825 0.269335 0.062468 0.95261
#> ... 1 variable was removed because of collinearity (any_black_arrestTRUE)
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> RMSE: 0.602251
Created on 2022-10-19 by the reprex package (v2.0.1)
CodePudding user response:
Here is an option
library(dplyr)
library(purrr)
library(fixest)
library(tidyr)
fmla_dat <- crossing(outcome, primary_ind) %>%
transmute(fmla = map2(primary_ind, outcome, ~ reformulate(.x, response = .y)))
analysis %>%
group_by(off_race) %>%
summarise(out = map(fmla_dat$fmla,
~ possibly(feols, otherwise = NA)(.x, data = cur_data())), .groups = 'drop')