Home > Enterprise >  Predict new values with lm in R
Predict new values with lm in R

Time:09-28

I'm trying to fit multiple lm models to my data and then plot them with some new predicted values. The data I work with is:

    structure(list(group = c("Group_1", "Group_1", "Group_1", "Group_1", 
"Group_2", "Group_2", "Group_2", "Group_2", "Group_3", "Group_3", 
"Group_3", "Group_3"), numb = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 
3, 4), total = c(616597744.48, 516080403.54476, 990894258.72, 
923317167.70895, 3850620416.96513, 3823237639.55, 4150030206.48, 
4317861944.93, 6403590027.27012, 6078175252.18719, 6951291610.00877, 
6432993298.93888)), class = c("grouped_df", "tbl_df", "tbl", 
"data.frame"), row.names = c(NA, -12L), groups = structure(list(
    group = c("Group_1", "Group_2", "Group_3"), .rows = structure(list(
        1:4, 5:8, 9:12), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -3L), .drop = TRUE))

I used the forecasting

2. Model coefficients and etc

test_data_regression <- test_data %>% 
  group_by(group, numb) %>%
  summarize(total = sum(total)) %>% 
  nest() %>% 
  mutate(model = map(data, ~ lm(total ~ numb, data = .)),
         tidied = map(model, tidy),
         glanced = map(model, glance),
         augmented = map(model, augment)
         )  

test_data_regression %>% unnest(tidied)

  group   data             model  term           estimate  std.error statistic p.value glanced           augmented       
  <chr>   <list>           <list> <chr>             <dbl>      <dbl>     <dbl>   <dbl> <list>            <list>          
1 Group_1 <tbl_df [4 x 2]> <lm>   (Intercept)  412979362. 216915429.     1.90  0.197   <tbl_df [1 x 12]> <tbl_df [4 x 8]>
2 Group_1 <tbl_df [4 x 2]> <lm>   numb         139497212.  79206316.     1.76  0.220   <tbl_df [1 x 12]> <tbl_df [4 x 8]>
3 Group_2 <tbl_df [4 x 2]> <lm>   (Intercept) 3603308264. 130458653.    27.6   0.00131 <tbl_df [1 x 12]> <tbl_df [4 x 8]>
4 Group_2 <tbl_df [4 x 2]> <lm>   numb         172851715.  47636765.     3.63  0.0683  <tbl_df [1 x 12]> <tbl_df [4 x 8]>
5 Group_3 <tbl_df [4 x 2]> <lm>   (Intercept) 6226181004. 508447621.    12.2   0.00660 <tbl_df [1 x 12]> <tbl_df [4 x 8]>
6 Group_3 <tbl_df [4 x 2]> <lm>   numb          96132617. 185658821.     0.518 0.656   <tbl_df [1 x 12]> <tbl_df [4 x 8]>

test_data_regression %>% unnest(glanced)

  group   data  model tidied r.squared adj.r.squared  sigma statistic p.value    df logLik   AIC   BIC deviance df.residual
  <chr>   <lis> <lis> <list>     <dbl>         <dbl>  <dbl>     <dbl>   <dbl> <dbl>  <dbl> <dbl> <dbl>    <dbl>       <int>
1 Group_1 <tbl~ <lm>  <tbl_~     0.608         0.412 1.77e8     3.10   0.220      1  -80.3  167.  165.  6.27e16           2
2 Group_2 <tbl~ <lm>  <tbl_~     0.868         0.802 1.07e8    13.2    0.0683     1  -78.2  162.  161.  2.27e16           2
3 Group_3 <tbl~ <lm>  <tbl_~     0.118        -0.323 4.15e8     0.268  0.656      1  -83.7  173.  171.  3.45e17           2

test_data_regression %>% unnest(augmented)

   group   data             model  tidied           glanced    total  numb .fitted  .resid  .hat .sigma .cooksd .std.resid
   <chr>   <list>           <list> <list>           <list>     <dbl> <dbl>   <dbl>   <dbl> <dbl>  <dbl>   <dbl>      <dbl>
 1 Group_1 <tbl_df [4 x 2]> <lm>   <tbl_df [2 x 5]> <tbl_df~  6.17e8     1  5.52e8  6.41e7   0.7 2.21e8  0.510       0.661
 2 Group_1 <tbl_df [4 x 2]> <lm>   <tbl_df [2 x 5]> <tbl_df~  5.16e8     2  6.92e8 -1.76e8   0.3 1.36e8  0.302      -1.19 
 3 Group_1 <tbl_df [4 x 2]> <lm>   <tbl_df [2 x 5]> <tbl_df~  9.91e8     3  8.31e8  1.59e8   0.3 1.63e8  0.248       1.08 
 4 Group_1 <tbl_df [4 x 2]> <lm>   <tbl_df [2 x 5]> <tbl_df~  9.23e8     4  9.71e8 -4.77e7   0.7 2.35e8  0.282      -0.491
 5 Group_2 <tbl_df [4 x 2]> <lm>   <tbl_df [2 x 5]> <tbl_df~  3.85e9     1  3.78e9  7.45e7   0.7 6.49e7  1.90        1.28 
 6 Group_2 <tbl_df [4 x 2]> <lm>   <tbl_df [2 x 5]> <tbl_df~  3.82e9     2  3.95e9 -1.26e8   0.3 9.69e6  0.427      -1.41 
 7 Group_2 <tbl_df [4 x 2]> <lm>   <tbl_df [2 x 5]> <tbl_df~  4.15e9     3  4.12e9  2.82e7   0.3 1.47e8  0.0214      0.316
 8 Group_2 <tbl_df [4 x 2]> <lm>   <tbl_df [2 x 5]> <tbl_df~  4.32e9     4  4.29e9  2.31e7   0.7 1.45e8  0.184       0.397
 9 Group_3 <tbl_df [4 x 2]> <lm>   <tbl_df [2 x 5]> <tbl_df~  6.40e9     1  6.32e9  8.13e7   0.7 5.68e8  0.149       0.357
10 Group_3 <tbl_df [4 x 2]> <lm>   <tbl_df [2 x 5]> <tbl_df~  6.08e9     2  6.42e9 -3.40e8   0.3 4.23e8  0.206      -0.980
11 Group_3 <tbl_df [4 x 2]> <lm>   <tbl_df [2 x 5]> <tbl_df~  6.95e9     3  6.51e9  4.37e8   0.3 2.69e8  0.339       1.26 
12 Group_3 <tbl_df [4 x 2]> <lm>   <tbl_df [2 x 5]> <tbl_df~  6.43e9     4  6.61e9 -1.78e8   0.7 4.89e8  0.713      -0.782

3. Prediction

additional_test_data <- data.frame(group = rep(c("Group_1","Group_2","Group_3"), each = 3), numb = rep(c(5:7),3), total = rep(0,9)) %>%
  group_by(group, numb) %>%
  nest(-group) %>%
  pull(data)
test_data %>% 
  group_by(group, numb) %>%
  summarize(total = sum(total)) %>%
  nest() %>% 
  mutate(model = map(data, ~ lm(total ~ numb, data = .)),
         predicted = map2(model, additional_test_data, predict)) %>%
  unnest(predicted)

  group   data             model    predicted
  <chr>   <list>           <list>       <dbl>
1 Group_1 <tbl_df [4 x 2]> <lm>   1110465425.
2 Group_1 <tbl_df [4 x 2]> <lm>   1249962637.
3 Group_1 <tbl_df [4 x 2]> <lm>   1389459850.
4 Group_2 <tbl_df [4 x 2]> <lm>   4467566840.
5 Group_2 <tbl_df [4 x 2]> <lm>   4640418555.
6 Group_2 <tbl_df [4 x 2]> <lm>   4813270270.
7 Group_3 <tbl_df [4 x 2]> <lm>   6706844090.
8 Group_3 <tbl_df [4 x 2]> <lm>   6802976708.
9 Group_3 <tbl_df [4 x 2]> <lm>   6899109325.
  • Related