r nested glm with two subgroups-CodePudding

I am trying to run a simple glm model like this.

         library(dplyr)
         library(purrr)
         library(tidyr)
         library(broom)

         data("mtcars")
         head(mtcars)
         
         mtcars$Name <- row.names(mtcars)
         row.names(mtcars) <- NULL

         glm(mpg ~ wt, data=mtcars)

No issues so far.

Next I am trying to run this model on every subgroup of gear i.e gear=3, gear=4, gear=5 so I am running my glm model within a dlply function like this.


Model1 <- plyr::dlply(mtcars, "gear",
            function(x)
              tryCatch(
                glm(mpg ~ wt,
                    data =x ),
                error = function(e) NA), .drop = TRUE)

SummaryCars <- map2_df(Model1,
                        names(Model1),
                          ~broom::tidy(.x, confint = TRUE)[2,] %>%
                              mutate(gear = .y))

Now I have a third subgroup carb. This variable has 6 levels

table(mtcars$carb)

1  2  3  4  6  8 
7 10  3 10  1  1

Exclude carb levels 6 and 8. I like to run my model on carb levels 1,2,3,&4. For each level of gear. But I like to exclude one level of carb during each iteration.

Model1 - Carb Level 1,2,3 (Exclude data from carb= 4)

 ```
 Model1 <- plyr::dlply(mtcars, "gear",
        function(x)
          tryCatch(
            glm(mpg ~ wt,
                data =x ),
            error = function(e) NA), .drop = TRUE)

  SummaryCars <- map2_df(Model1,
                    names(Model1),
                      ~broom::tidy(.x, confint = TRUE)[2,] %>%
                          mutate(gear = .y))
   ```

**Model2 ** - Carb Level 1,2,4 (Exclude data from carb= 3)

 ```
 Model1 <- plyr::dlply(mtcars, "gear",
        function(x)
          tryCatch(
            glm(mpg ~ wt,
                data =x ),
            error = function(e) NA), .drop = TRUE)

  SummaryCars <- map2_df(Model1,
                    names(Model1),
                      ~broom::tidy(.x, confint = TRUE)[2,] %>%
                          mutate(gear = .y))
   ```

**Model3 ** - Carb Level 1,3,4 (Exclude data from carb= 2)

 ```
 Model1 <- plyr::dlply(mtcars, "gear",
        function(x)
          tryCatch(
            glm(mpg ~ wt,
                data =x ),
            error = function(e) NA), .drop = TRUE)

  SummaryCars <- map2_df(Model1,
                    names(Model1),
                      ~broom::tidy(.x, confint = TRUE)[2,] %>%
                          mutate(gear = .y))
   ```

**Mode4l ** - Carb Level 2,3,4 (Exclude data from carb= 1)

 ```
 Model1 <- plyr::dlply(mtcars, "gear",
        function(x)
          tryCatch(
            glm(mpg ~ wt,
                data =x ),
            error = function(e) NA), .drop = TRUE)

  SummaryCars <- map2_df(Model1,
                    names(Model1),
                      ~broom::tidy(.x, confint = TRUE)[2,] %>%
                          mutate(gear = .y))
   ```

As you can see I can run the model within each levels of Gear (3,4,5) but I am not sure how to add another loop on top of this where data from one level is exclude and rest are considered.

Expected Final Results

         Model            SubModel     Estimate    Lower(CI)      Upper(CI)    stdError   p
         Exclude carb= 4  Gear = 3     xxx         xxxx           xxxx         xxx        x
         Exclude carb= 4  Gear = 4     xxx         xxxx           xxxx         xxx        x
         Exclude carb= 4  Gear = 5     xxx         xxxx           xxxx         xxx        x

         Exclude carb= 3  Gear = 3     xxx         xxxx           xxxx         xxx        x
         Exclude carb= 3  Gear = 4     xxx         xxxx           xxxx         xxx        x
         Exclude carb= 3  Gear = 5     xxx         xxxx           xxxx         xxx        x

         Exclude carb= 2  Gear = 3     xxx         xxxx           xxxx         xxx        x
         Exclude carb= 2  Gear = 4     xxx         xxxx           xxxx         xxx        x
         Exclude carb= 2  Gear = 5     xxx         xxxx           xxxx         xxx        x

         Exclude carb= 1  Gear = 3     xxx         xxxx           xxxx         xxx        x
         Exclude carb= 1  Gear = 4     xxx         xxxx           xxxx         xxx        x
         Exclude carb= 1  Gear = 5     xxx         xxxx           xxxx         xxx        x

Any help is much appreciated. Thanks in advance.

I have included the code in my question showing what I have tried.

CodePudding user response：

Using nested purrr::map_dfr() calls around broom::tidy(lm()):

library(dplyr)
library(purrr)
library(broom)

# rmv carb == 6 or 8, then split by gear
mtc_gears <- filter(mtcars, carb <= 4)
mtc_gears <- split(mtc_gears, mtc_gears$gear)

map_dfr(
  1:4,
  \(carb_drop) map_dfr(
    mtc_gears,
    \(mtc_gear) tidy(
      lm(
        mpg ~ wt, 
        data = filter(mtc_gear, carb != carb_drop)
      ),
      conf.int = TRUE
    ),
    .id = "Gear"
  ),
  .id = "Exclude Carb"
)

# A tibble: 24 × 9
   `Exclude Carb` Gear  term     estim…¹ std.e…² stati…³ p.value conf.…⁴ conf.…⁵
   <chr>          <chr> <chr>      <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
 1 1              3     (Interc…   25.1    3.51     7.14 3.15e-5   17.2   32.9  
 2 1              3     wt         -2.44   0.842   -2.90 1.59e-2   -4.32  -0.563
 3 1              4     (Interc…   37.8    4.39     8.61 1.35e-4   27.0   48.5  
 4 1              4     wt         -5.37   1.49    -3.60 1.14e-2   -9.02  -1.72 
 5 1              5     (Interc…   44.4    1.82    24.3  2.62e-2   21.2   67.5  
 6 1              5     wt         -8.92   0.769  -11.6  5.47e-2  -18.7    0.846
 7 2              3     (Interc…   28.9    3.01     9.58 5.11e-6   22.1   35.7  
 8 2              3     wt         -3.28   0.733   -4.47 1.55e-3   -4.93  -1.62 
 9 2              4     (Interc…   45.9    5.41     8.48 1.48e-4   32.6   59.1  
10 2              4     wt         -8.31   2.04    -4.07 6.61e-3  -13.3   -3.31 
# … with 14 more rows, and abbreviated variable names ¹estimate, ²std.error,
#   ³statistic, ⁴conf.low, ⁵conf.high
Warning messages:
1: In qt(a, object$df.residual) : NaNs produced
2: In qt(a, object$df.residual) : NaNs produced

CodePudding user response：

You could do:

mtcars %>%
  filter(!carb %in% c(6,8))%>%
  mutate(Exclude = list(sort(unique(carb))))%>%
  group_by(gear)%>%
  summarise(.groups = 'drop',
            Exclude = Exclude[[1]],
            result = map(Exclude, 
                  ~tidy(glm(mpg~wt, data = cur_data(), subset = carb!=.x))))%>%
  unnest(result)
# A tibble: 24 × 7
    gear Exclude term        estimate std.e…¹ stati…² p.value
   <dbl>   <dbl> <chr>          <dbl>   <dbl>   <dbl>   <dbl>
 1     3       1 (Intercept)    25.1    3.51     7.14 3.15e-5
 2     3       1 wt             -2.44   0.842   -2.90 1.59e-2
 3     3       2 (Intercept)    28.9    3.01     9.58 5.11e-6
 4     3       2 wt             -3.28   0.733   -4.47 1.55e-3
 5     3       3 (Intercept)    28.4    3.15     9.04 3.96e-6
 6     3       3 wt             -3.18   0.786   -4.04 2.36e-3
 7     3       4 (Intercept)    29.8    5.18     5.76 4.25e-4
 8     3       4 wt             -3.43   1.47    -2.33 4.82e-2
 9     4       1 (Intercept)    37.8    4.39     8.61 1.35e-4
10     4       1 wt             -5.37   1.49    -3.60 1.14e-2
# … with 14 more rows, and abbreviated variable names
#   ¹std.error, ²statistic
# ℹ Use `print(n = ...)` to see more rows