I am trying to run a loop which takes different columns of a dataset as the dependent variable and remaining variables as the independent variables and run the lm command. Here's my code
quant<-function(a){
i=1
colnames1<-colnames(a)
lm_model <- linear_reg() %>%
set_engine('lm') %>% # adds lm implementation of linear regression
set_mode('regression')
for (i in 1:ncol(a)) {
lm_fit <- lm_model %>%
fit(colnames1[i] ~ ., data = set1)
comp_matrix[i]<-tidy(lm_fit)[1,2]
i<-i 1
}
}
When I provide it with a dataset. It is showing this error.
> quant(set1)
Error in model.frame.default(formula = colnames1[i] ~ ., data = data, : variable lengths differ (found for 'Imp of Family')
I will be using comp_matrix for coefficient comparison among models later on. Is there a better way to do this fundamentally?
Packages used:
library(dplyr)
library(haven)
library(ggplot2)
library(tidyverse)
library(broom)
library(modelsummary)
library(parsnip)
CodePudding user response:
We could change the line of fit
with
fit(as.formula(paste(colnames1[i], "~ .")), data = a)
-full function
quant<-function(a){
a <- janitor::clean_names(a)
colnames1 <- colnames(a)
lm_model <- linear_reg() %>%
set_engine('lm') %>%
set_mode('regression')
out_lst <- vector('list', ncol(a))
for (i in seq_along(a)) {
lm_fit <- lm_model %>%
fit(as.formula(paste(colnames1[i], "~ .")), data = a)
out_lst[[i]]<-tidy(lm_fit)[1,2]
}
out_lst
}
-testing
> dat <- tibble(col1 = 1:5, col2 = 5:1)
> quant(dat)
[[1]]
# A tibble: 1 × 1
estimate
<dbl>
1 6
[[2]]
# A tibble: 1 × 1
estimate
<dbl>
1 6