I'm trying to fit multiple lm models to my data and then plot them with some new predicted values. The data I work with is:
structure(list(group = c("Group_1", "Group_1", "Group_1", "Group_1",
"Group_2", "Group_2", "Group_2", "Group_2", "Group_3", "Group_3",
"Group_3", "Group_3"), numb = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2,
3, 4), total = c(616597744.48, 516080403.54476, 990894258.72,
923317167.70895, 3850620416.96513, 3823237639.55, 4150030206.48,
4317861944.93, 6403590027.27012, 6078175252.18719, 6951291610.00877,
6432993298.93888)), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -12L), groups = structure(list(
group = c("Group_1", "Group_2", "Group_3"), .rows = structure(list(
1:4, 5:8, 9:12), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -3L), .drop = TRUE))
2. Model coefficients and etc
test_data_regression <- test_data %>%
group_by(group, numb) %>%
summarize(total = sum(total)) %>%
nest() %>%
mutate(model = map(data, ~ lm(total ~ numb, data = .)),
tidied = map(model, tidy),
glanced = map(model, glance),
augmented = map(model, augment)
)
test_data_regression %>% unnest(tidied)
group data model term estimate std.error statistic p.value glanced augmented
<chr> <list> <list> <chr> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Group_1 <tbl_df [4 x 2]> <lm> (Intercept) 412979362. 216915429. 1.90 0.197 <tbl_df [1 x 12]> <tbl_df [4 x 8]>
2 Group_1 <tbl_df [4 x 2]> <lm> numb 139497212. 79206316. 1.76 0.220 <tbl_df [1 x 12]> <tbl_df [4 x 8]>
3 Group_2 <tbl_df [4 x 2]> <lm> (Intercept) 3603308264. 130458653. 27.6 0.00131 <tbl_df [1 x 12]> <tbl_df [4 x 8]>
4 Group_2 <tbl_df [4 x 2]> <lm> numb 172851715. 47636765. 3.63 0.0683 <tbl_df [1 x 12]> <tbl_df [4 x 8]>
5 Group_3 <tbl_df [4 x 2]> <lm> (Intercept) 6226181004. 508447621. 12.2 0.00660 <tbl_df [1 x 12]> <tbl_df [4 x 8]>
6 Group_3 <tbl_df [4 x 2]> <lm> numb 96132617. 185658821. 0.518 0.656 <tbl_df [1 x 12]> <tbl_df [4 x 8]>
test_data_regression %>% unnest(glanced)
group data model tidied r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual
<chr> <lis> <lis> <list> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 Group_1 <tbl~ <lm> <tbl_~ 0.608 0.412 1.77e8 3.10 0.220 1 -80.3 167. 165. 6.27e16 2
2 Group_2 <tbl~ <lm> <tbl_~ 0.868 0.802 1.07e8 13.2 0.0683 1 -78.2 162. 161. 2.27e16 2
3 Group_3 <tbl~ <lm> <tbl_~ 0.118 -0.323 4.15e8 0.268 0.656 1 -83.7 173. 171. 3.45e17 2
test_data_regression %>% unnest(augmented)
group data model tidied glanced total numb .fitted .resid .hat .sigma .cooksd .std.resid
<chr> <list> <list> <list> <list> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Group_1 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 6.17e8 1 5.52e8 6.41e7 0.7 2.21e8 0.510 0.661
2 Group_1 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 5.16e8 2 6.92e8 -1.76e8 0.3 1.36e8 0.302 -1.19
3 Group_1 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 9.91e8 3 8.31e8 1.59e8 0.3 1.63e8 0.248 1.08
4 Group_1 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 9.23e8 4 9.71e8 -4.77e7 0.7 2.35e8 0.282 -0.491
5 Group_2 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 3.85e9 1 3.78e9 7.45e7 0.7 6.49e7 1.90 1.28
6 Group_2 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 3.82e9 2 3.95e9 -1.26e8 0.3 9.69e6 0.427 -1.41
7 Group_2 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 4.15e9 3 4.12e9 2.82e7 0.3 1.47e8 0.0214 0.316
8 Group_2 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 4.32e9 4 4.29e9 2.31e7 0.7 1.45e8 0.184 0.397
9 Group_3 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 6.40e9 1 6.32e9 8.13e7 0.7 5.68e8 0.149 0.357
10 Group_3 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 6.08e9 2 6.42e9 -3.40e8 0.3 4.23e8 0.206 -0.980
11 Group_3 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 6.95e9 3 6.51e9 4.37e8 0.3 2.69e8 0.339 1.26
12 Group_3 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 6.43e9 4 6.61e9 -1.78e8 0.7 4.89e8 0.713 -0.782
3. Prediction
additional_test_data <- data.frame(group = rep(c("Group_1","Group_2","Group_3"), each = 3), numb = rep(c(5:7),3), total = rep(0,9)) %>%
group_by(group, numb) %>%
nest(-group) %>%
pull(data)
test_data %>%
group_by(group, numb) %>%
summarize(total = sum(total)) %>%
nest() %>%
mutate(model = map(data, ~ lm(total ~ numb, data = .)),
predicted = map2(model, additional_test_data, predict)) %>%
unnest(predicted)
group data model predicted
<chr> <list> <list> <dbl>
1 Group_1 <tbl_df [4 x 2]> <lm> 1110465425.
2 Group_1 <tbl_df [4 x 2]> <lm> 1249962637.
3 Group_1 <tbl_df [4 x 2]> <lm> 1389459850.
4 Group_2 <tbl_df [4 x 2]> <lm> 4467566840.
5 Group_2 <tbl_df [4 x 2]> <lm> 4640418555.
6 Group_2 <tbl_df [4 x 2]> <lm> 4813270270.
7 Group_3 <tbl_df [4 x 2]> <lm> 6706844090.
8 Group_3 <tbl_df [4 x 2]> <lm> 6802976708.
9 Group_3 <tbl_df [4 x 2]> <lm> 6899109325.