I am re-writting an earlier question I had which was deleted for lack of details.
I would like to extract the p-value of a number of regression that I am running with a loop and have the p-values stored for each of the independant variable as a list. My issue is with storing the new lists correctly. You will find below the code, the sample data and the expected results.
Many thanks!
ind_var <- list(c("Man","Below_Master","Worker","Rural", "Less_than_26"))
depending_var <- list(c("Q1","Q2","Q3","Q4", "Q5", "Q6"))
x <- list()
for (var_p in depending_var) {
{ for (var in ind_var)
x1 <- lm(paste(var, "~", var_p), data = Ech_final_nom_SD, weights = pond)
x[[var_p]] <- summary(x1)$coefficients[,4] #Where shit hits the fan
}
}
Here is the sample data:
ID | Man | Below_Master | Worker | Rural | Less than 26 | Q1 | Q2 | Q3 | Q4 | Q5 | Q6 |
---|---|---|---|---|---|---|---|---|---|---|---|
1 | 1 | 1 | 0 | 0 | 1 | 4 | 3 | 1 | 2 | 2 | 1 |
2 | 0 | 0 | 0 | 1 | 1 | 2 | 2 | 2 | 2 | 3 | 1 |
3 | 0 | 1 | 0 | 0 | 1 | 4 | 3 | 1 | 1 | 3 | 1 |
4 | 0 | 0 | 1 | 0 | 0 | 1 | 4 | 1 | 2 | 3 | 1 |
5 | 0 | 1 | 1 | 0 | 1 | 4 | 3 | 4 | 2 | 3 | 2 |
6 | 1 | 0 | 1 | 1 | 1 | 3 | 2 | 1 | 2 | 2 | 2 |
7 | 0 | 1 | 0 | 0 | 0 | 3 | 3 | 3 | 2 | 1 | 2 |
8 | 1 | 0 | 0 | 0 | 1 | 3 | 3 | 1 | 4 | 1 | 3 |
9 | 1 | 0 | 0 | 1 | 1 | 2 | 3 | 1 | 2 | 2 | 3 |
10 | 1 | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 2 | 3 |
Expected results (with different p-values for all) :
x_Man <- list(c("0.0234","0.9894", "0.0045", "0.9892", "0.0229", "0.0254","0.0894", "0.0047", "0.2392", "0.0029")
x_Below_Master <- list(c("0.0434","0.9854", "0.0545", "0.0092", "0.0729", "0.0254","0.0894", "0.0047", "0.2392", "0.0029")
x_Worker <- list(c("0.0234","0.9894", "0.0045", "0.9892", "0.0229", "0.0254","0.0894", "0.0047", "0.2392", "0.0029")
x_Rural <- list(c("0.0234","0.9894", "0.0045", "0.9892", "0.0229", "0.0254","0.0894", "0.0047", "0.2392", "0.0029")
x_Less_than_26 <- list(c("0.0234","0.9894", "0.0045", "0.9892", "0.0229", "0.0254","0.0894", "0.0047", "0.2392", "0.0029")
CodePudding user response:
This should work:
l <- lapply(independant_var, function(x) summary(lm(reformulate(depending_var, x), data = Ech_final_nom_SD))$coefficients[, 4])
names(l) <- independant_var
output
$Man
(Intercept) Q1 Q2 Q3 Q4 Q5
0.7006954 0.4183649 0.4814779 0.1129608 0.7429505 0.8469599
Q6
0.3121451
$Below_Master
(Intercept) Q1 Q2 Q3 Q4 Q5
0.4269917 0.3294021 0.8975661 0.4513400 0.1453265 0.4121115
Q6
0.9905834
$Worker
(Intercept) Q1 Q2 Q3 Q4 Q5
0.9251373 0.6595554 0.9057457 0.7430144 0.9231273 0.5541379
Q6
0.6826245
$Rural
(Intercept) Q1 Q2 Q3 Q4 Q5
0.9051546 0.8906284 0.5703472 0.8245217 0.6760297 0.6800893
Q6
0.9835419
$Less_than_26
(Intercept) Q1 Q2 Q3 Q4 Q5
0.23702999 0.05804822 0.37926448 0.26210405 0.16896185 0.12015247
Q6
0.47013899
CodePudding user response:
Here is a way with the nested for
loop in the question. The results object is of a different shape, it's a data.frame.
ind_var <- c("Man","Below_Master","Worker","Rural", "Less_than_26")
depending_var <- c("Q1","Q2","Q3","Q4", "Q5", "Q6")
n_rows <- length(depending_var) * length(ind_var)
x <- data.frame(
dependent = character(n_rows),
independent = character(n_rows),
pval_intercept = numeric(n_rows),
pval_indep = numeric(n_rows)
)
i <- 0L
for (var_p in depending_var) {
for (var in ind_var) {
i <- i 1L
fmla <- reformulate(var, response = var_p)
x1 <- lm(fmla, data = Ech_final_nom_SD, weights = pond)
x[i, "dependent"] <- var_p
x[i, "independent"] <- var
x[i, "pval_intercept"] <- summary(x1)$coefficients[1, 4]
x[i, "pval_indep"] <- summary(x1)$coefficients[2, 4]
}
}
x
#> dependent independent pval_intercept pval_indep
#> 1 Q1 Man 9.160936e-04 0.80277173
#> 2 Q1 Below_Master 2.026789e-03 0.18690481
#> 3 Q1 Worker 2.231686e-04 0.34553732
#> 4 Q1 Rural 2.331972e-04 0.54473730
#> 5 Q1 Less_than_26 1.768920e-02 0.05845715
#> 6 Q2 Man 3.287724e-05 0.27313922
#> 7 Q2 Below_Master 8.982180e-05 0.72446583
#> 8 Q2 Worker 3.896158e-05 0.56210550
#> 9 Q2 Rural 1.712317e-05 0.38768132
#> 10 Q2 Less_than_26 7.359017e-04 0.93892908
#> 11 Q3 Man 6.976609e-04 0.07359019
#> 12 Q3 Below_Master 3.373039e-02 0.26225475
#> 13 Q3 Worker 1.177118e-02 0.74104616
#> 14 Q3 Rural 3.748440e-03 0.63631632
#> 15 Q3 Less_than_26 3.502726e-02 0.90655491
#> 16 Q4 Man 1.337132e-03 0.47136168
#> 17 Q4 Below_Master 8.923744e-05 0.12647490
#> 18 Q4 Worker 2.199469e-04 0.46193445
#> 19 Q4 Rural 2.862687e-04 1.00000000
#> 20 Q4 Less_than_26 8.392753e-03 0.43036607
#> 21 Q5 Man 3.584230e-05 0.11143429
#> 22 Q5 Below_Master 3.701074e-04 1.00000000
#> 23 Q5 Worker 2.603420e-04 0.35588376
#> 24 Q5 Rural 1.346479e-04 0.74828807
#> 25 Q5 Less_than_26 2.975062e-03 0.62876991
#> 26 Q6 Man 2.911220e-03 0.06558768
#> 27 Q6 Below_Master 1.270703e-03 0.74043945
#> 28 Q6 Worker 1.257419e-03 0.78704595
#> 29 Q6 Rural 7.220649e-04 0.82866767
#> 30 Q6 Less_than_26 5.691830e-03 0.82866767
Created on 2022-08-22 by the reprex package (v2.0.1)
And a solution with a double lapply
loop.
res <- lapply(depending_var, \(y) {
lapply(ind_var, \(x) {
fit <- lm(reformulate(x, y), data = Ech_final_nom_SD, weights = pond)
data.frame(
dependent = y,
independent = x,
pval_intercept = summary(fit)$coefficients[1, 4],
pval_indep = summary(fit)$coefficients[2, 4]
)
})
})
res <- do.call(rbind, unlist(res, recursive = FALSE))
res
#> dependent independent pval_intercept pval_indep
#> 1 Q1 Man 9.160936e-04 0.80277173
#> 2 Q1 Below_Master 2.026789e-03 0.18690481
#> 3 Q1 Worker 2.231686e-04 0.34553732
#> 4 Q1 Rural 2.331972e-04 0.54473730
#> 5 Q1 Less_than_26 1.768920e-02 0.05845715
#> 6 Q2 Man 3.287724e-05 0.27313922
#> 7 Q2 Below_Master 8.982180e-05 0.72446583
#> 8 Q2 Worker 3.896158e-05 0.56210550
#> 9 Q2 Rural 1.712317e-05 0.38768132
#> 10 Q2 Less_than_26 7.359017e-04 0.93892908
#> 11 Q3 Man 6.976609e-04 0.07359019
#> 12 Q3 Below_Master 3.373039e-02 0.26225475
#> 13 Q3 Worker 1.177118e-02 0.74104616
#> 14 Q3 Rural 3.748440e-03 0.63631632
#> 15 Q3 Less_than_26 3.502726e-02 0.90655491
#> 16 Q4 Man 1.337132e-03 0.47136168
#> 17 Q4 Below_Master 8.923744e-05 0.12647490
#> 18 Q4 Worker 2.199469e-04 0.46193445
#> 19 Q4 Rural 2.862687e-04 1.00000000
#> 20 Q4 Less_than_26 8.392753e-03 0.43036607
#> 21 Q5 Man 3.584230e-05 0.11143429
#> 22 Q5 Below_Master 3.701074e-04 1.00000000
#> 23 Q5 Worker 2.603420e-04 0.35588376
#> 24 Q5 Rural 1.346479e-04 0.74828807
#> 25 Q5 Less_than_26 2.975062e-03 0.62876991
#> 26 Q6 Man 2.911220e-03 0.06558768
#> 27 Q6 Below_Master 1.270703e-03 0.74043945
#> 28 Q6 Worker 1.257419e-03 0.78704595
#> 29 Q6 Rural 7.220649e-04 0.82866767
#> 30 Q6 Less_than_26 5.691830e-03 0.82866767
Created on 2022-08-22 by the reprex package (v2.0.1)
But the simplest solution is to remember that lm
can fit multiple regression models and fit all responses at once.
resp <- as.matrix(Ech_final_nom_SD[depending_var])
out <- lapply(ind_var, \(x) {
fit <- lm(resp ~ get(x), data = Ech_final_nom_SD)
cfs <- sapply(summary(fit), \(s) s$coefficients[, 4])
rownames(cfs)[2] <- x
cfs
})
do.call(rbind.data.frame, out)
#> Response Q1 Response Q2 Response Q3 Response Q4 Response Q5 Response Q6
#> (Intercept) 0.0009160936 3.287724e-05 0.0006976609 1.337132e-03 0.0000358423 0.0029112201
#> Man 0.8027717312 2.731392e-01 0.0735901860 4.713617e-01 0.1114342894 0.0655876769
#> (Intercept)1 0.0020267889 8.982180e-05 0.0337303906 8.923744e-05 0.0003701074 0.0012707033
#> Below_Master 0.1869048103 7.244658e-01 0.2622547504 1.264749e-01 1.0000000000 0.7404394537
#> (Intercept)2 0.0002231686 3.896158e-05 0.0117711783 2.199469e-04 0.0002603420 0.0012574186
#> Worker 0.3455373232 5.621055e-01 0.7410461602 4.619345e-01 0.3558837647 0.7870459482
#> (Intercept)3 0.0002331972 1.712317e-05 0.0037484405 2.862687e-04 0.0001346479 0.0007220649
#> Rural 0.5447373008 3.876813e-01 0.6363163232 1.000000e 00 0.7482880706 0.8286676662
#> (Intercept)4 0.0176892034 7.359017e-04 0.0350272586 8.392753e-03 0.0029750616 0.0056918299
#> Less_than_26 0.0584571528 9.389291e-01 0.9065549125 4.303661e-01 0.6287699080 0.8286676662
Created on 2022-08-22 by the reprex package (v2.0.1)
Data
x<-'ID Man Below_Master Worker Rural Less_than_26 Q1 Q2 Q3 Q4 Q5 Q6
1 1 1 0 0 1 4 3 1 2 2 1
2 0 0 0 1 1 2 2 2 2 3 1
3 0 1 0 0 1 4 3 1 1 3 1
4 0 0 1 0 0 1 4 1 2 3 1
5 0 1 1 0 1 4 3 4 2 3 2
6 1 0 1 1 1 3 2 1 2 2 2
7 0 1 0 0 0 3 3 3 2 1 2
8 1 0 0 0 1 3 3 1 4 1 3
9 1 0 0 1 1 2 3 1 2 2 3
10 1 1 1 0 0 1 1 1 1 2 3'
Ech_final_nom_SD <- read.table(textConnection(x), header = TRUE)
pond <- rep(1, nrow(Ech_final_nom_SD))
Created on 2022-08-22 by the reprex package (v2.0.1)