is there a way to make ggplot2() visualize a model with two dependents and multiple interactive predictors?
Lets say I have a model:
lm_comb1 <- lm(cbind(MD_EARN_WNE_P10, X40.year.NPV) ~ SAT_AVG*TUITIONFEE_OUT*AVGFACSAL* RET_FT4_POOLED*INEXPFTE*C100_4_POOLED,data = training_set_NORM_sample)
And here's some data:
training_set_NORM_sample <- dput(training_set_NORM_sample[1:35, ]) structure(list(MD_EARN_WNE_P10 = c(0.309428236924474, 0.29019425130815, 0.340154827610924, 0.131137552863594, 0.202243566769407, 0.130145987145485, 0.0872936229183093, 0.141208515518601, 0.118486130026521, 0.12969201729864, 0.113038491864382, 0.29583303466896, 0.129966788521731, 0.190655389099945, 0.32601008290923, 0.319511146154398, 0.177800874489284, 0.277662294220247, 0.0420997300074069, 0.111043413853248, 0.167395407736602, 0.139046185458629, 0.227701717917473, 0.205074905024729, 0.278653859938356, 0.130062361121067, 0.092848780254701, 0.128198695434019, 0.35046472176427, 0.229899887702196, 0.292464100542374, 1, 0.373019855207512, 0.268797935631854, 0.421379590471412 ), SAT_AVG = c(0.709756097560976, 0.639024390243902, 0.686585365853659, 0.401219512195122, 0.413414634146341, 0.423170731707317, 0.370731707317073, 0.523170731707317, 0.485365853658537, 0.501219512195122, 0.332926829268293, 0.592682926829268, 0.417073170731707, 0.35609756097561, 0.601219512195122, 0.598780487804878, 0.521951219512195, 0.629268292682927, 0.29390243902439, 0.55, 0.548780487804878, 0.353658536585366, 0.569512195121951, 0.715853658536585, 0.609756097560976, 0.376829268292683, 0.396341463414634, 0.424390243902439, 0.498780487804878, 0.56219512195122, 0.43780487804878, 1, 0.525609756097561, 0.302439024390244, 0.495121951219512), TUITIONFEE_OUT = c(0.377600752915868, 0.490739807065309, 0.505428388961716, 0.357298914322208, 0.442842257403113, 0.33259386239118, 0.319989244059023, 0.383348458875332, 0.4315485193775, 0.32217404456993, 0.318308628281402, 0.534435817283453, 0.263150818459884, 0.362407986286175, 0.476925145373265, 0.59944203556183, 0.472084971933717, 0.417162448321065, 0.232899734462707, 0.248899196665658, 0.251218446438775, 0.262176061308864, 0.330829215824678, 0.782326644482538, 0.447346307687137, 0.418473328627609, 0.285872743773319, 0.210581156935901, 0.658297200094115, 0.687808813149138, 0.56858592988471, 0.899969748916003, 0.728244428758697, 0.297721085005546, 0.306309031629189), AVGFACSAL = c(0.36104042974272, 0.395137121854679, 0.420412779191405, 0.152558665535765, 0.190839694656489, 0.224257845631891, 0.179700310998021, 0.106813683912921, 0.251795306757139, 0.284591461690698, 0.108283856375459, 0.326604467062482, 0.189652247667515, 0.221713316369805, 0.476844783715013, 0.449703138252757, 0.185128640090472, 0.381962114786542, 0.141588917161436, 0.221260955612101, 0.215889171614362, 0.11314673452078, 0.230251625671473, 0.235849590048063, 0.213740458015267, 0.193214588634436, 0.122420130053718, 0.20056545094713, 0.322533220243144, 0.247271699180096, 0.368221656771275, 0.977777777777778, 0.401413627367826, 0.367825841108284, 0.42199604184337), RET_FT4_POOLED = c(0.807788198978986, 0.861569512050338, 0.900035616763623, 0.504333372907515, 0.59028849578535, 0.720408405556215, 0.587914044877122, 0.718390122284222, 0.713403775376944, 0.71827139973881, 0.669595156120147, 0.889113142585777, 0.700581740472516, 0.681704855752107, 0.869167754956666, 0.799952510981835, 0.649412323400214, 0.820847678974237, 0.669595156120147, 0.724088804463968, 0.706874035379318, 0.505876765997863, 0.828802089516799, 0.858482725869643, 0.785943250623293, 0.664846254303692, 0.421108868574142, 0.60845304523329, 0.779176065534845, 0.825952748426926, 0.74154101863944, 0.987771577822629, 0.811112430250505, 0.845779413510626, 0.870948593137837), INEXPFTE = c(0.0462962253598817, 0.0577496188586444, 0.0476445847282979, 0.0263236522152165, 0.0314566111745283, 0.0329658543312214, 0.0389032322319178, 0.0382596970788101, 0.0737077584291613, 0.036421025212788, 0.0401060300776073, 0.100889457515188, 0.03451340315179, 0.0730718844088287, 0.109469926223291, 0.0868466011384443, 0.0815297749925304, 0.0664679879566993, 0.0396233787127765, 0.0208152977499253, 0.0442966697055827, 0.00727807613633752, 0.0612201120057612, 0.0886086616767155, 0.0546238766864068, 0.0409564158156425, 0.0186471971745742, 0.0292272215369765, 0.06528051237656, 0.0768718062652744, 0.0510537888132139, 0.787104781312965, 0.0540109860643995, 0.0352105662343234, 0.0351416160393476 ), C100_4_POOLED = c(0.242140845070423, 0.533295774647887, 0.531830985915493, 0.11369014084507, 0.377577464788732, 0.18230985915493, 0.193464788732394, 0.378366197183099, 0.312225352112676, 0.200112676056338, 0.248112676056338, 0.694760563380282, 0.224901408450704, 0.32056338028169, 0.532845070422535, 0.494760563380282, 0.478197183098592, 0.504338028169014, 0.128338028169014, 0.338704225352113, 0.251267605633803, 0.106929577464789, 0.495887323943662, 0.711098591549296, 0.632, 0.388169014084507, 0.339154929577465, 0.279211267605634, 0.489915492957746, 0.597521126760563, 0.412957746478873, 0.894760563380282, 0.698816901408451, 0.114591549295775, 0.21487323943662), X40.year.NPV = c(0.300526315789474, 0.263157894736842, 0.312631578947368, 0.108421052631579, 0.178947368421053, 0.122105263157895, 0.0831578947368421, 0.127894736842105, 0.105789473684211, 0.125789473684211, 0.0784210526315789, 0.25, 0.123684210526316, 0.130526315789474, 0.326315789473684, 0.31, 0.163157894736842, 0.267368421052632, 0.0421052631578947, 0.107894736842105, 0.156842105263158, 0.127894736842105, 0.208421052631579, 0.187368421052632, 0.251052631578947, 0.126842105263158, 0.0794736842105263, 0.121052631578947, 0.316842105263158, 0.183684210526316, 0.263157894736842, 1, 0.337368421052632, 0.281052631578947, 0.418421052631579)), row.names = c(NA, 35L), class = "data.frame")
This is a question about ggplot2
's functionality, so it doesn't particularly matter which type of plot we make; I'm trying to determine how (whether?) ggplot2
handles a model with multiple dependent variables and multiple, interactive predictors.
CodePudding user response:
It is possible to show the output of an lm
with two dependent variables with a single predictor variable.
Let's create an example data set to demonstrate:
library(tidyverse)
set.seed(1)
dv1 <- rnorm(100, seq(1, 5, length = 100)) * 0.25
dv2 <- rnorm(100, seq(5, 1, length = 100)) * 0.7
iv1 <- 1:100
df <- data.frame(dv1, dv2, iv1)
head(df)
#> dv1 dv2 iv1
#> 1 0.09338655 3.065743 1
#> 2 0.30601184 3.501198 2
#> 3 0.06129487 2.805789 3
#> 4 0.67912323 3.525772 4
#> 5 0.37278098 2.928659 5
#> 6 0.09538795 4.595687 6
And now me make our model:
mod <- lm(cbind(dv1, dv2) ~ iv1, data = df)
We can get predictions for both dvs from our model in a data frame like so:
pred <- as.data.frame(predict(mod))
pred$iv1 <- df$iv1
Now we can plot both the original points and the prediction lines by pivoting both the original and prediction data frames to long format:
ggplot(pivot_longer(df, 1:2), aes(iv1, value, colour = name))
geom_point()
geom_line(data = pivot_longer(pred, 1:2), linetype = 2)
Created on 2022-04-22 by the reprex package (v2.0.1)