Given the mtcars
data:
> head(mtcars)
mpg cyl disp hp drat wt qsec vs am gear carb
Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
What I want to do is to perform linear fit with y = mpg
as the outcome with all other variables as predictors x
, in one on one basis (i.e. from mpg ~ cyl
up to mpg ~ carb
).
How can I do that with dplyr?
So far I have this code (as suggested by Maurits):
library(broom)
library(tidyr)
library(dplyr)
lm(mpg ~ ., data = mtcars) %>%
# glance() # this fail
tidy()
It gave me this result:
# A tibble: 11 × 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 12.3 18.7 0.657 0.518
2 cyl -0.111 1.05 -0.107 0.916
3 disp 0.0133 0.0179 0.747 0.463
4 hp -0.0215 0.0218 -0.987 0.335
5 drat 0.787 1.64 0.481 0.635
6 wt -3.72 1.89 -1.96 0.0633
7 qsec 0.821 0.731 1.12 0.274
8 vs 0.318 2.10 0.151 0.881
9 am 2.52 2.06 1.23 0.234
10 gear 0.655 1.49 0.439 0.665
11 carb -0.199 0.829 -0.241 0.812
But when I do
> tidy(lm(mpg ~ cyl, data = mtcars))
# A tibble: 2 × 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 37.9 2.07 18.3 8.37e-18
2 cyl -2.88 0.322 -8.92 6.11e-10
Notice the difference in estimate from -2.88 to -0.11.
CodePudding user response:
You can use the following code
data(mtcars)
x = names(mtcars[,-1])
out <- unlist(lapply(1, function(n) combn(x, 1, FUN=function(row) paste0("mpg ~ ", paste0(row, collapse = " ")))))
out
#> [1] "mpg ~ cyl" "mpg ~ disp" "mpg ~ hp" "mpg ~ drat" "mpg ~ wt"
#> [6] "mpg ~ qsec" "mpg ~ vs" "mpg ~ am" "mpg ~ gear" "mpg ~ carb"
library(broom)
library(dplyr)
#To have the regression coefficients
tmp1 = bind_rows(lapply(out, function(frml) {
a = tidy(lm(frml, data=mtcars))
a$frml = frml
return(a)
}))
head(tmp1)
#To have the regression results i.e. R2, AIC, BIC
tmp2 = bind_rows(lapply(out, function(frml) {
a = glance(lm(frml, data=mtcars))
a$frml = frml
return(a)
}))
head(tmp2)
write.csv(tmp1, "Try_lm_coefficients.csv")
write.csv(tmp2, "Try_lm_results.csv")
CodePudding user response:
Here is another option
library(tidyverse)
library(broom)
idx_var <- which(!names(mtcars) %in% "mpg") %>% set_names(names(mtcars)[.])
idx_res <- which(names(mtcars) %in% "mpg")
map_dfr(
idx_var,
~ tidy(lm(mtcars[[idx_res]] ~ mtcars[[.x]])),
.id = "variable")
## A tibble: 20 x 6
# variable term estimate std.error statistic p.value
# <chr> <chr> <dbl> <dbl> <dbl> <dbl>
# 1 cyl (Intercept) 37.9 2.07 18.3 8.37e-18
# 2 cyl mtcars[[.x]] -2.88 0.322 -8.92 6.11e-10
# 3 disp (Intercept) 29.6 1.23 24.1 3.58e-21
# 4 disp mtcars[[.x]] -0.0412 0.00471 -8.75 9.38e-10
# 5 hp (Intercept) 30.1 1.63 18.4 6.64e-18
# 6 hp mtcars[[.x]] -0.0682 0.0101 -6.74 1.79e- 7
# 7 drat (Intercept) -7.52 5.48 -1.37 1.80e- 1
# 8 drat mtcars[[.x]] 7.68 1.51 5.10 1.78e- 5
# 9 wt (Intercept) 37.3 1.88 19.9 8.24e-19
#10 wt mtcars[[.x]] -5.34 0.559 -9.56 1.29e-10
#...