I'm trying to loop over multiple multilevel ordered logistic regressions with random intercepts on the country
, dropping one observation at a time from the main dataset, while producing one massive, augmented tidy data frame with the results. Given that I am just having trouble with the looping, and I can produce everything one data frame at a time, I will first show what I am trying to do with two data frames. Then, I will (poorly) attempt to solve the problem with a loop, which is the part for which I would greatly appreciate your assistance.
First, let's load some libraries:
# load libraries
library(dplyr)
library(purrr)
library(tibble)
library(ordinal)
library(reprex)
library(magrittr)
Next, let's create some sample data:
# create original df in the right format
data0 = data.frame(country = rep(c("Algeria","Belgium","Canada","Denmark","England", "France"),times=10),
x1 = rep(0:1),times=30,
x2 = rnorm(n = 60, mean = 100, sd = 5),
y = rep(1:6),times=10)
data0$country = factor(data0$country)
data0$y = factor(data0$y)
data0 <- data0 %>% dplyr::select(country,x1,x2,y)
Building on this post, I'll create a custom tidier for clmm2
models:
tidy.clmm2 <-
function(fit){
results = as.data.frame(coefficients(summary(fit)))
colnames(results) = c("estimate","std.error","statistic","p.value")
results %>% tibble::rownames_to_column("term")
}
Estimate the first model, tidy it, and augment it with confidence intervals, odds ratios, and model observation number:
# model 1: get the dataset with one less observation, removing the top row
data1 <- data0 %>% dplyr::slice(-1)
# model 1: estimate
m1 <- clmm2(y ~ x1 x2, random=country, data=data1, Hess = TRUE)
summary(m1)
# model 1: get odds ratio and renames columns for model 1
or1 <- as.data.frame(exp(coef(m1)))
or1 <- or1 %>% rename(estimate.odds =1)
or1$term <- row.names(or1)
# model 1: tidy the model, get the CIs, and store the observation/model number
tidy_m1 <- tidy.clmm2(m1)
tidy_m1$obs <- m1$nobs[[1]]
tidy_m1$conf.low <- tidy_m1$estimate - (1.96 * tidy_m1$std.error)
tidy_m1$conf.high <- tidy_m1$estimate (1.96 * tidy_m1$std.error)
# model 1: merge over the odds ratios and make final data
tidy_m1 <- left_join(tidy_m1, or1, by=c("term"))
Do the same for model 2, and bind the final output with model 1:
# model 2: get the dataset with one less observation, removing the top row
data2 <- data1 %>% dplyr::slice(-1)
# model 2: estimate
m2 <- clmm2(y ~ x1 x2, random=country, data=data2, Hess = TRUE)
summary(m2)
# model 2: get odds ratio and renames columns for model 1
or2 <- as.data.frame(exp(coef(m2)))
or2 <- or2 %>% rename(estimate.odds =1)
or2$term <- row.names(or2)
# model 2: tidy the model, get the CIs, and store the observation/model number
tidy_m2 <- tidy.clmm2(m2)
tidy_m2$obs <- m2$nobs[[1]]
tidy_m2$conf.low <- tidy_m2$estimate - (1.96 * tidy_m2$std.error)
tidy_m2$conf.high <- tidy_m2$estimate (1.96 * tidy_m2$std.error)
# model 2: merge over the odds ratios and make final data
tidy_m2 <- left_join(tidy_m2, or2, by=c("term"))
## Bind everything together to get a final data frame
tidy_final <- dplyr::bind_rows(tidy_m1,tidy_m2)
Here is my (poor) attempt at a loop, building on this excellent post:
# create vector to store observation/data frame numbers
vector <- 1:59
# start of model
df_final <- purrr::map_dfr(1:59,
function(i) data.frame(model = i,
tidy.clmm2(glm(as.formula(paste0('y ~ ', i)),
random=country,
Hess = TRUE,
data = data[vector]))))
How do I fix the above in line with what I was doing one model at a time? Don't worry about the NaN
s, as I have only provided a highly-stylized example to facilitate responses.
CodePudding user response:
First I would improve your tidier function to do all of the extra tidying you were doing for each model (I'm assuming a tidier for this model class doesn't already exist in some package or other):
tidy.clmm2 <-
function(fit){
results = as.data.frame(coefficients(summary(fit)))
colnames(results) = c("estimate","std.error","statistic","p.value")
tidy_fit <- results %>% tibble::rownames_to_column("term")
or1 <- as.data.frame(exp(coef(fit)))
or1 <- or1 %>% rename(estimate.odds =1)
or1$term <- row.names(or1)
#tidy the model, get the CIs, and store the observation/model number
tidy_fit$obs <- fit$nobs[[1]]
tidy_fit$conf.low <- tidy_fit$estimate - (1.96 * tidy_fit$std.error)
tidy_fit$conf.high <- tidy_fit$estimate (1.96 * tidy_fit$std.error)
#merge over the odds ratios and make final data
left_join(tidy_fit, or1, by=c("term"))
}
I don't know purrr
, but you can then use the base function lapply
(which I guess is similar) to estimate the model and the get the tidy parameters on successively smaller datasets, befor passing the results to bind_rows
. Here I only do it for the first 10:
df_final <- lapply(1:10, function(i) {
mod <- clmm2(y ~ x1 x2, random=country, data=data0[(i 1):60,], Hess = TRUE)
tidy.clmm2(mod)
}) %>% bind_rows(, .id="Dataset")
> df_final
Dataset term estimate std.error statistic p.value obs conf.low conf.high estimate.odds
1 1 1|2 -6.438544e 01 NaN NaN NaN 59 NaN NaN 1.090838e-28
2 1 2|3 -3.518858e 01 NaN NaN NaN 59 NaN NaN 5.221475e-16
3 1 3|4 2.961620e-01 NaN NaN NaN 59 NaN NaN 1.344688e 00
4 1 4|5 2.971124e 01 2.207366e 01 1.346004e 00 1.783011e-01 59 -13.55313129 72.97561932 8.006253e 12
5 1 5|6 7.273474e 01 NaN NaN NaN 59 NaN NaN 3.875214e 31
6 1 x1 2.054609e 01 NaN NaN NaN 59 NaN NaN 8.376306e 08
7 1 x2 -8.899366e-02 3.978817e-03 -2.236686e 01 8.275733e-111 59 -0.09679214 -0.08119518 9.148514e-01
8 2 1|2 -7.093800e 01 NaN NaN NaN 58 NaN NaN 1.556034e-31
9 2 2|3 -2.937359e 01 7.293763e 00 -4.027221e 00 5.644000e-05 58 -43.66936969 -15.07781910 1.750693e-13
10 2 3|4 -4.496101e 00 5.083061e 00 -8.845263e-01 3.764122e-01 58 -14.45889969 5.46669816 1.115240e-02
11 2 4|5 2.637413e 01 1.028467e 00 2.564412e 01 4.917528e-145 58 24.35833179 28.38992198 2.845364e 11
12 2 5|6 8.163547e 01 5.497370e-02 1.484991e 03 0.000000e 00 58 81.52771782 81.74321472 2.843364e 35
13 2 x1 1.821630e 01 5.424801e 00 3.357967e 00 7.851802e-04 58 7.58369175 28.84891081 8.151530e 07
14 2 x2 -7.476304e-02 NaN NaN NaN 58 NaN NaN 9.279633e-01
15 3 1|2 -1.105259e 02 NaN NaN NaN 57 NaN NaN 9.981611e-49
16 3 2|3 -4.660975e 01 NaN NaN NaN 57 NaN NaN 5.723279e-21
17 3 3|4 -1.713909e 00 5.880686e 00 -2.914471e-01 7.707094e-01 57 -13.24005308 9.81223521 1.801602e-01
18 3 4|5 4.382707e 01 NaN NaN NaN 57 NaN NaN 1.081073e 19
19 3 5|6 1.265335e 02 3.425069e-03 3.694335e 04 0.000000e 00 57 126.52681215 126.54023843 8.970400e 54
20 3 x1 3.988252e 01 5.999608e-04 6.647520e 04 0.000000e 00 57 39.88133918 39.88369103 2.092937e 17
21 3 x2 -2.175413e-01 2.615869e-04 -8.316215e 02 0.000000e 00 57 -0.21805405 -0.21702863 8.044943e-01
22 4 1|2 -6.646351e 01 NaN NaN NaN 56 NaN NaN 1.365411e-29
23 4 2|3 -3.064726e 01 1.722949e 01 -1.778768e 00 7.527783e-02 56 -64.41706724 3.12253810 4.898489e-14
24 4 3|4 -3.327152e 00 NaN NaN NaN 56 NaN NaN 3.589518e-02
25 4 4|5 3.188512e 01 NaN NaN NaN 56 NaN NaN 7.039323e 13
26 4 5|6 7.214246e 01 NaN NaN NaN 56 NaN NaN 2.143249e 31
27 4 x1 2.524262e 01 1.420819e 01 1.776624e 00 7.563011e-02 56 -2.60544100 53.09068131 9.177632e 10
28 4 x2 -1.139253e-01 5.874164e-04 -1.939430e 02 0.000000e 00 56 -0.11507663 -0.11277396 8.923246e-01
29 5 1|2 -5.520472e 01 1.024174e-03 -5.390171e 04 0.000000e 00 55 -55.20673057 -55.20271580 1.058994e-24
30 5 2|3 -3.324923e 01 NaN NaN NaN 55 NaN NaN 3.631150e-15
31 5 3|4 7.955460e-01 5.817109e 00 1.367597e-01 8.912208e-01 55 -10.60598823 12.19708026 2.215650e 00
32 5 4|5 2.816002e 01 5.061437e 00 5.563641e 00 2.642027e-08 55 18.23960201 38.08043320 1.697228e 12
33 5 5|6 6.669283e 01 NaN NaN NaN 55 NaN NaN 9.211492e 28
34 5 x1 3.351064e 01 1.024452e-03 3.271080e 04 0.000000e 00 55 33.50863540 33.51265125 3.576741e 14
35 5 x2 -1.850497e-01 1.173298e-03 -1.577175e 02 0.000000e 00 55 -0.18734938 -0.18275005 8.310630e-01
36 6 1|2 -3.286871e 01 NaN NaN NaN 54 NaN NaN 5.312531e-15
37 6 2|3 -1.394418e 01 NaN NaN NaN 54 NaN NaN 8.792619e-07
38 6 3|4 1.402239e 01 NaN NaN NaN 54 NaN NaN 1.229831e 06
39 6 4|5 4.198923e 01 NaN NaN NaN 54 NaN NaN 1.720640e 18
40 6 5|6 6.091421e 01 NaN NaN NaN 54 NaN NaN 2.849095e 26
41 6 x1 2.791695e 01 9.151861e 00 3.050412e 00 2.285273e-03 54 9.97930271 45.85459762 1.330998e 12
42 6 x2 6.368881e-04 NaN NaN NaN 54 NaN NaN 1.000637e 00
43 7 1|2 -8.540551e 01 NaN NaN NaN 53 NaN NaN 8.106962e-38
44 7 2|3 -4.981897e 01 NaN NaN NaN 53 NaN NaN 2.311503e-22
45 7 3|4 1.323464e-02 NaN NaN NaN 53 NaN NaN 1.013323e 00
46 7 4|5 4.199212e 01 3.930682e 00 1.068317e 01 1.220384e-26 53 34.28798714 49.69626009 1.725630e 18
47 7 5|6 9.757102e 01 NaN NaN NaN 53 NaN NaN 2.368942e 42
48 7 x1 2.742401e 01 NaN NaN NaN 53 NaN NaN 8.130075e 11
49 7 x2 -1.149872e-01 NaN NaN NaN 53 NaN NaN 8.913776e-01
50 8 1|2 -3.428518e 01 2.418647e 01 -1.417536e 00 1.563264e-01 52 -81.69065068 13.12029594 1.288655e-15
51 8 2|3 -1.468742e 01 2.158542e 01 -6.804326e-01 4.962306e-01 52 -56.99484187 27.61999711 4.181514e-07
52 8 3|4 -2.123523e 00 2.585943e 01 -8.211793e-02 9.345529e-01 52 -52.80800668 48.56096089 1.196095e-01
53 8 4|5 1.428943e 01 2.595126e 01 5.506258e-01 5.818902e-01 52 -36.57503632 65.15390302 1.606283e 06
54 8 5|6 3.811502e 01 1.071360e 00 3.557631e 01 3.257489e-277 52 36.01515820 40.21488826 3.573915e 16
55 8 x1 8.597913e 00 2.399747e 01 3.582841e-01 7.201307e-01 52 -38.43712837 55.63295352 5.420333e 03
56 8 x2 -3.759098e-02 NaN NaN NaN 52 NaN NaN 9.631068e-01
57 9 1|2 -1.381079e 02 NaN NaN NaN 51 NaN NaN 1.048377e-60
58 9 2|3 -5.910938e 01 NaN NaN NaN 51 NaN NaN 2.133649e-26
59 9 3|4 -9.084277e 00 NaN NaN NaN 51 NaN NaN 1.134354e-04
60 9 4|5 5.875811e 01 NaN NaN NaN 51 NaN NaN 3.298542e 25
61 9 5|6 1.564752e 02 NaN NaN NaN 51 NaN NaN 9.043358e 67
62 9 x1 4.528686e 01 NaN NaN NaN 51 NaN NaN 4.654054e 19
63 9 x2 -2.132578e-01 2.314244e-05 -9.215010e 03 0.000000e 00 51 -0.21330318 -0.21321246 8.079478e-01
64 10 1|2 -5.537366e 01 2.398432e 01 -2.308744e 00 2.095778e-02 50 -102.38292260 -8.36439034 8.943892e-25
65 10 2|3 -2.567911e 01 1.644416e 01 -1.561594e 00 1.183836e-01 50 -57.90967366 6.55145288 7.042130e-12
66 10 3|4 -3.561160e 00 1.421535e 01 -2.505152e-01 8.021890e-01 50 -31.42323652 24.30091746 2.840587e-02
67 10 4|5 2.622061e 01 1.739894e 01 1.507024e 00 1.318045e-01 50 -7.88130243 60.32253224 2.440441e 11
68 10 5|6 6.147091e 01 NaN NaN NaN 50 NaN NaN 4.971402e 26
69 10 x1 1.720323e 01 NaN NaN NaN 50 NaN NaN 2.959845e 07
70 10 x2 -6.799849e-02 NaN NaN NaN 50 NaN NaN 9.342619e-01