Home > database >  Updating list in nested loop in R
Updating list in nested loop in R

Time:08-24

I am re-writting an earlier question I had which was deleted for lack of details.

I would like to extract the p-value of a number of regression that I am running with a loop and have the p-values stored for each of the independant variable as a list. My issue is with storing the new lists correctly. You will find below the code, the sample data and the expected results.

Many thanks!

ind_var <- list(c("Man","Below_Master","Worker","Rural", "Less_than_26"))
depending_var <- list(c("Q1","Q2","Q3","Q4", "Q5", "Q6"))


x <- list()

for (var_p in depending_var) {
 { for (var in ind_var)  
    
x1 <- lm(paste(var, "~", var_p), data = Ech_final_nom_SD, weights = pond)
 
x[[var_p]] <- summary(x1)$coefficients[,4]    #Where shit hits the fan

  }
}

Here is the sample data:

ID Man Below_Master Worker Rural Less than 26 Q1 Q2 Q3 Q4 Q5 Q6
1 1 1 0 0 1 4 3 1 2 2 1
2 0 0 0 1 1 2 2 2 2 3 1
3 0 1 0 0 1 4 3 1 1 3 1
4 0 0 1 0 0 1 4 1 2 3 1
5 0 1 1 0 1 4 3 4 2 3 2
6 1 0 1 1 1 3 2 1 2 2 2
7 0 1 0 0 0 3 3 3 2 1 2
8 1 0 0 0 1 3 3 1 4 1 3
9 1 0 0 1 1 2 3 1 2 2 3
10 1 1 1 0 0 1 1 1 1 2 3

Expected results (with different p-values for all) :

x_Man <- list(c("0.0234","0.9894", "0.0045", "0.9892", "0.0229", "0.0254","0.0894", "0.0047", "0.2392", "0.0029")

x_Below_Master <- list(c("0.0434","0.9854", "0.0545", "0.0092", "0.0729", "0.0254","0.0894", "0.0047", "0.2392", "0.0029")

x_Worker <- list(c("0.0234","0.9894", "0.0045", "0.9892", "0.0229", "0.0254","0.0894", "0.0047", "0.2392", "0.0029")

x_Rural <- list(c("0.0234","0.9894", "0.0045", "0.9892", "0.0229", "0.0254","0.0894", "0.0047", "0.2392", "0.0029")

x_Less_than_26 <- list(c("0.0234","0.9894", "0.0045", "0.9892", "0.0229", "0.0254","0.0894", "0.0047", "0.2392", "0.0029")

CodePudding user response:

This should work:

l <- lapply(independant_var, function(x) summary(lm(reformulate(depending_var, x), data = Ech_final_nom_SD))$coefficients[, 4])
names(l) <- independant_var

output

$Man
(Intercept)          Q1          Q2          Q3          Q4          Q5 
  0.7006954   0.4183649   0.4814779   0.1129608   0.7429505   0.8469599 
         Q6 
  0.3121451 

$Below_Master
(Intercept)          Q1          Q2          Q3          Q4          Q5 
  0.4269917   0.3294021   0.8975661   0.4513400   0.1453265   0.4121115 
         Q6 
  0.9905834 

$Worker
(Intercept)          Q1          Q2          Q3          Q4          Q5 
  0.9251373   0.6595554   0.9057457   0.7430144   0.9231273   0.5541379 
         Q6 
  0.6826245 

$Rural
(Intercept)          Q1          Q2          Q3          Q4          Q5 
  0.9051546   0.8906284   0.5703472   0.8245217   0.6760297   0.6800893 
         Q6 
  0.9835419 

$Less_than_26
(Intercept)          Q1          Q2          Q3          Q4          Q5 
 0.23702999  0.05804822  0.37926448  0.26210405  0.16896185  0.12015247 
         Q6 
 0.47013899

CodePudding user response:

Here is a way with the nested for loop in the question. The results object is of a different shape, it's a data.frame.

ind_var <- c("Man","Below_Master","Worker","Rural", "Less_than_26")
depending_var <- c("Q1","Q2","Q3","Q4", "Q5", "Q6")

n_rows <- length(depending_var) * length(ind_var)
x <- data.frame(
  dependent = character(n_rows),
  independent = character(n_rows),
  pval_intercept = numeric(n_rows),
  pval_indep = numeric(n_rows)
)

i <- 0L
for (var_p in depending_var) {
  for (var in ind_var) {
    i <- i   1L
    fmla <- reformulate(var, response = var_p)
    x1 <- lm(fmla, data = Ech_final_nom_SD, weights = pond)
    x[i, "dependent"] <- var_p
    x[i, "independent"] <- var
    x[i, "pval_intercept"] <- summary(x1)$coefficients[1, 4]
    x[i, "pval_indep"] <- summary(x1)$coefficients[2, 4]
  }
}
x
#>    dependent  independent pval_intercept pval_indep
#> 1         Q1          Man   9.160936e-04 0.80277173
#> 2         Q1 Below_Master   2.026789e-03 0.18690481
#> 3         Q1       Worker   2.231686e-04 0.34553732
#> 4         Q1        Rural   2.331972e-04 0.54473730
#> 5         Q1 Less_than_26   1.768920e-02 0.05845715
#> 6         Q2          Man   3.287724e-05 0.27313922
#> 7         Q2 Below_Master   8.982180e-05 0.72446583
#> 8         Q2       Worker   3.896158e-05 0.56210550
#> 9         Q2        Rural   1.712317e-05 0.38768132
#> 10        Q2 Less_than_26   7.359017e-04 0.93892908
#> 11        Q3          Man   6.976609e-04 0.07359019
#> 12        Q3 Below_Master   3.373039e-02 0.26225475
#> 13        Q3       Worker   1.177118e-02 0.74104616
#> 14        Q3        Rural   3.748440e-03 0.63631632
#> 15        Q3 Less_than_26   3.502726e-02 0.90655491
#> 16        Q4          Man   1.337132e-03 0.47136168
#> 17        Q4 Below_Master   8.923744e-05 0.12647490
#> 18        Q4       Worker   2.199469e-04 0.46193445
#> 19        Q4        Rural   2.862687e-04 1.00000000
#> 20        Q4 Less_than_26   8.392753e-03 0.43036607
#> 21        Q5          Man   3.584230e-05 0.11143429
#> 22        Q5 Below_Master   3.701074e-04 1.00000000
#> 23        Q5       Worker   2.603420e-04 0.35588376
#> 24        Q5        Rural   1.346479e-04 0.74828807
#> 25        Q5 Less_than_26   2.975062e-03 0.62876991
#> 26        Q6          Man   2.911220e-03 0.06558768
#> 27        Q6 Below_Master   1.270703e-03 0.74043945
#> 28        Q6       Worker   1.257419e-03 0.78704595
#> 29        Q6        Rural   7.220649e-04 0.82866767
#> 30        Q6 Less_than_26   5.691830e-03 0.82866767

Created on 2022-08-22 by the reprex package (v2.0.1)


And a solution with a double lapply loop.

res <- lapply(depending_var, \(y) {
  lapply(ind_var, \(x) {
    fit <- lm(reformulate(x, y), data = Ech_final_nom_SD, weights = pond)
    data.frame(
      dependent = y,
      independent = x,
      pval_intercept = summary(fit)$coefficients[1, 4],
      pval_indep = summary(fit)$coefficients[2, 4]
    )
  })
})
res <- do.call(rbind, unlist(res, recursive = FALSE))
res
#>    dependent  independent pval_intercept pval_indep
#> 1         Q1          Man   9.160936e-04 0.80277173
#> 2         Q1 Below_Master   2.026789e-03 0.18690481
#> 3         Q1       Worker   2.231686e-04 0.34553732
#> 4         Q1        Rural   2.331972e-04 0.54473730
#> 5         Q1 Less_than_26   1.768920e-02 0.05845715
#> 6         Q2          Man   3.287724e-05 0.27313922
#> 7         Q2 Below_Master   8.982180e-05 0.72446583
#> 8         Q2       Worker   3.896158e-05 0.56210550
#> 9         Q2        Rural   1.712317e-05 0.38768132
#> 10        Q2 Less_than_26   7.359017e-04 0.93892908
#> 11        Q3          Man   6.976609e-04 0.07359019
#> 12        Q3 Below_Master   3.373039e-02 0.26225475
#> 13        Q3       Worker   1.177118e-02 0.74104616
#> 14        Q3        Rural   3.748440e-03 0.63631632
#> 15        Q3 Less_than_26   3.502726e-02 0.90655491
#> 16        Q4          Man   1.337132e-03 0.47136168
#> 17        Q4 Below_Master   8.923744e-05 0.12647490
#> 18        Q4       Worker   2.199469e-04 0.46193445
#> 19        Q4        Rural   2.862687e-04 1.00000000
#> 20        Q4 Less_than_26   8.392753e-03 0.43036607
#> 21        Q5          Man   3.584230e-05 0.11143429
#> 22        Q5 Below_Master   3.701074e-04 1.00000000
#> 23        Q5       Worker   2.603420e-04 0.35588376
#> 24        Q5        Rural   1.346479e-04 0.74828807
#> 25        Q5 Less_than_26   2.975062e-03 0.62876991
#> 26        Q6          Man   2.911220e-03 0.06558768
#> 27        Q6 Below_Master   1.270703e-03 0.74043945
#> 28        Q6       Worker   1.257419e-03 0.78704595
#> 29        Q6        Rural   7.220649e-04 0.82866767
#> 30        Q6 Less_than_26   5.691830e-03 0.82866767

Created on 2022-08-22 by the reprex package (v2.0.1)


But the simplest solution is to remember that lm can fit multiple regression models and fit all responses at once.

  resp <- as.matrix(Ech_final_nom_SD[depending_var])
  out <- lapply(ind_var, \(x) {
    fit <- lm(resp ~ get(x), data = Ech_final_nom_SD)
    cfs <- sapply(summary(fit), \(s) s$coefficients[, 4])  
    rownames(cfs)[2] <- x
    cfs
  })
  do.call(rbind.data.frame, out)
#>               Response Q1  Response Q2  Response Q3  Response Q4  Response Q5  Response Q6
#> (Intercept)  0.0009160936 3.287724e-05 0.0006976609 1.337132e-03 0.0000358423 0.0029112201
#> Man          0.8027717312 2.731392e-01 0.0735901860 4.713617e-01 0.1114342894 0.0655876769
#> (Intercept)1 0.0020267889 8.982180e-05 0.0337303906 8.923744e-05 0.0003701074 0.0012707033
#> Below_Master 0.1869048103 7.244658e-01 0.2622547504 1.264749e-01 1.0000000000 0.7404394537
#> (Intercept)2 0.0002231686 3.896158e-05 0.0117711783 2.199469e-04 0.0002603420 0.0012574186
#> Worker       0.3455373232 5.621055e-01 0.7410461602 4.619345e-01 0.3558837647 0.7870459482
#> (Intercept)3 0.0002331972 1.712317e-05 0.0037484405 2.862687e-04 0.0001346479 0.0007220649
#> Rural        0.5447373008 3.876813e-01 0.6363163232 1.000000e 00 0.7482880706 0.8286676662
#> (Intercept)4 0.0176892034 7.359017e-04 0.0350272586 8.392753e-03 0.0029750616 0.0056918299
#> Less_than_26 0.0584571528 9.389291e-01 0.9065549125 4.303661e-01 0.6287699080 0.8286676662

Created on 2022-08-22 by the reprex package (v2.0.1)


Data

x<-'ID  Man     Below_Master    Worker  Rural   Less_than_26    Q1  Q2  Q3  Q4  Q5  Q6
1   1   1   0   0   1   4   3   1   2   2   1
2   0   0   0   1   1   2   2   2   2   3   1
3   0   1   0   0   1   4   3   1   1   3   1
4   0   0   1   0   0   1   4   1   2   3   1
5   0   1   1   0   1   4   3   4   2   3   2
6   1   0   1   1   1   3   2   1   2   2   2
7   0   1   0   0   0   3   3   3   2   1   2
8   1   0   0   0   1   3   3   1   4   1   3
9   1   0   0   1   1   2   3   1   2   2   3
10  1   1   1   0   0   1   1   1   1   2   3'
Ech_final_nom_SD <- read.table(textConnection(x), header = TRUE)
pond <- rep(1, nrow(Ech_final_nom_SD))

Created on 2022-08-22 by the reprex package (v2.0.1)

  • Related