I have the following data for two groups ("Healthy" and "CAD"):
df <- structure(list(VE = c(22.75, 23.75, 26.75, 32.5, 39.5, 48.25,
55.75, 24, 26.5, 29, 36.5, 44.75, 55.25, 63.75, 78, 19, 20.75,
25, 29, 39.25, 51, 25, 25.5, 28.5, 34.25, 35.5, 40.25, 47, 52.5,
59.5, 64.5, 83, 92.25, 18.25, 19.75, 20.75, 23.5, 26.25, 28,
32.75, 37.25, 24.25, 26.75, 29.5, 31, 32, 39, 43.25, 46.25, 51.75,
52.75, 64.5, 72.75, 15.75, 16.25, 20.25, 28, 34.75, 45.75, 64.75,
18.25, 21.25, 22.25, 24.75, 29.5, 21.25, 20.75, 26, 30.75, 34,
38.5, 46.75, 53.75, 70.5, 82.75, 98.25, 17.25, 22, 25.75, 29.75,
33.75, 38.75, 44.5, 50.75, 56.25, 65, 71.75, 81, 99, 30, 29,
29.75, 38.25, 40.75, 47.25, 52.75, 65.5, 22.75, 24.75, 28.75,
34, 48, 53.75, 61.5, 73.75, 22, 21.75, 24.25, 28, 34.5, 40, 48.5,
23.75, 23.75, 25.75, 29, 31.25, 33.5, 35.25, 38.5, 40.5, 50,
62.5, 77.25, 19.5, 21.25, 24.25, 30.75, 38.5, 17, 17.25, 20.75,
24.25, 28.5, 32.25, 38.75, 54.25, 21, 23.25, 28.25, 32.75, 37,
46.5, 56.25, 65.5, 24.5, 25.75, 26.75, 29.5, 33.25, 37.75, 40,
45.75, 50, 56.5, 23.75, 26.75, 29.75, 38.25, 43.75, 50.5, 58.5,
69.75, 78.25, 92.75, 109, 123.25, 22.75, 23.75, 28.5, 32.75,
39.75, 46.5, 56.5, 65.75, 76, 26.75, 26.5, 30.5, 31, 40.25, 44.75,
52.5, 60, 65, 70.75, 78, 40.5, 43.75, 50.25, 63, 75, 82, 95.25,
25.75, 27.5, 33, 37.75, 44, 53.5, 58.75, 69), percent_power = c(21.7391304347826,
34.7826086956522, 47.8260869565217, 60.8695652173913, 73.9130434782609,
86.9565217391304, 100, 19.2307692307692, 30.7692307692308, 42.3076923076923,
53.8461538461538, 65.3846153846154, 76.9230769230769, 88.4615384615385,
100, 25, 40, 55, 70, 85, 100, 13.1578947368421, 21.0526315789474,
28.9473684210526, 36.8421052631579, 44.7368421052632, 52.6315789473684,
60.5263157894737, 68.4210526315789, 76.3157894736842, 84.2105263157895,
92.1052631578947, 100, 22.2222222222222, 33.3333333333333, 44.4444444444444,
55.5555555555556, 66.6666666666667, 77.7777777777778, 88.8888888888889,
100, 15.3846153846154, 23.0769230769231, 30.7692307692308, 38.4615384615385,
46.1538461538462, 53.8461538461538, 61.5384615384615, 69.2307692307692,
76.9230769230769, 84.6153846153846, 92.3076923076923, 100, 21.7391304347826,
34.7826086956522, 47.8260869565217, 60.8695652173913, 73.9130434782609,
86.9565217391304, 100, 29.4117647058824, 47.0588235294118, 64.7058823529412,
82.3529411764706, 100, 14.2857142857143, 22.8571428571429, 31.4285714285714,
40, 48.5714285714286, 57.1428571428571, 65.7142857142857, 74.2857142857143,
82.8571428571429, 91.4285714285714, 100, 12.1951219512195, 19.5121951219512,
26.8292682926829, 34.1463414634146, 41.4634146341463, 48.780487804878,
56.0975609756098, 63.4146341463415, 70.7317073170732, 78.0487804878049,
85.3658536585366, 92.6829268292683, 100, 22.2222222222222, 33.3333333333333,
44.4444444444444, 55.5555555555556, 66.6666666666667, 77.7777777777778,
88.8888888888889, 100, 41.6666666666667, 50, 58.3333333333333,
66.6666666666667, 75, 83.3333333333333, 91.6666666666667, 100,
25, 37.5, 50, 62.5, 75, 87.5, 100, 15.3846153846154, 23.0769230769231,
30.7692307692308, 38.4615384615385, 46.1538461538462, 53.8461538461538,
61.5384615384615, 69.2307692307692, 76.9230769230769, 84.6153846153846,
92.3076923076923, 100, 33.3333333333333, 50, 66.6666666666667,
83.3333333333333, 100, 19.2307692307692, 30.7692307692308, 42.3076923076923,
53.8461538461538, 65.3846153846154, 76.9230769230769, 88.4615384615385,
100, 19.2307692307692, 30.7692307692308, 42.3076923076923, 53.8461538461538,
65.3846153846154, 76.9230769230769, 88.4615384615385, 100, 18.1818181818182,
27.2727272727273, 36.3636363636364, 45.4545454545455, 54.5454545454545,
63.6363636363636, 72.7272727272727, 81.8181818181818, 90.9090909090909,
100, 15.3846153846154, 23.0769230769231, 30.7692307692308, 38.4615384615385,
46.1538461538462, 53.8461538461538, 61.5384615384615, 69.2307692307692,
76.9230769230769, 84.6153846153846, 92.3076923076923, 100, 17.2413793103448,
27.5862068965517, 37.9310344827586, 48.2758620689655, 58.6206896551724,
68.9655172413793, 79.3103448275862, 89.6551724137931, 100, 14.2857142857143,
22.8571428571429, 31.4285714285714, 40, 48.5714285714286, 57.1428571428571,
65.7142857142857, 74.2857142857143, 82.8571428571429, 91.4285714285714,
100, 33.3333333333333, 44.4444444444444, 55.5555555555556, 66.6666666666667,
77.7777777777778, 88.8888888888889, 100, 19.2307692307692, 30.7692307692308,
42.3076923076923, 53.8461538461538, 65.3846153846154, 76.9230769230769,
88.4615384615385, 100), group = c("CAD", "CAD", "CAD", "CAD",
"CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD",
"CAD", "CAD", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy",
"Healthy", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy",
"Healthy", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy",
"Healthy", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy",
"Healthy", "Healthy", "Healthy", "CAD", "CAD", "CAD", "CAD",
"CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "Healthy",
"Healthy", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy",
"Healthy", "Healthy", "Healthy", "Healthy", "Healthy", "CAD",
"CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD",
"CAD", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy",
"Healthy", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy",
"Healthy", "Healthy", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD",
"CAD", "CAD", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy",
"Healthy", "Healthy", "Healthy", "CAD", "CAD", "CAD", "CAD",
"CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD",
"CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD",
"CAD", "CAD", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy",
"Healthy", "Healthy", "Healthy", "CAD", "CAD", "CAD", "CAD",
"CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD",
"CAD", "CAD", "CAD", "CAD", "CAD", "Healthy", "Healthy", "Healthy",
"Healthy", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy",
"Healthy", "Healthy", "Healthy", "CAD", "CAD", "CAD", "CAD",
"CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD",
"CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "Healthy", "Healthy",
"Healthy", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy",
"Healthy", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy",
"Healthy")), row.names = c(NA, -202L), class = "data.frame")
I fitted a separate linear regression model for each group:
df_healthy <- subset(df, group == "Healthy")
df_CAD <- subset(df, group == "CAD")
lm_healthy <- glm(VE ~ poly(percent_power, 2), data = df_healthy)
lm_CAD <- glm(VE ~ poly(percent_power,2), data = df_CAD)
I then made predictions from two models:
VE_predict_healthy <- predict(lm_healthy, newdata = data.frame(percent_power = 70))
VE_predict_CAD <- predict(lm_CAD, newdata = data.frame(percent_power = 80))
Now I want to compare these 2 predicted values statistically. How should I proceed? I tried anova
and summary
but they do not give what I need.
CodePudding user response:
You need to pool the results of two independent models.
## independent models for two different groups
## let `df` be your data frame
## for Gaussian response, glm() is identical to lm()
lm_healthy <- lm(VE ~ poly(percent_power, 2), data = df,
subset = df$group == "Healthy")
lm_CAD <- lm(VE ~ poly(percent_power, 2), data = df,
subset = df$group == "CAD")
## pooled deviance (i.e., (weighted) residual sum of squares)
deviance.pooled <- deviance(lm_healthy) deviance(lm_CAD)
## pooled residual degree of freedom
rdf.pooled <- df.residual(lm_healthy) df.residual(lm_CAD)
## pooled residual standard error
se.pooled <- sqrt(deviance.pooled / rdf.pooled)
## make prediction, using pooled residual standard error for `scale`
VE_predict_healthy <- predict(lm_healthy, newdata = data.frame(percent_power = 70),
se.fit = TRUE, scale = se.pooled, df = rdf.pooled)
VE_predict_CAD <- predict(lm_CAD, newdata = data.frame(percent_power = 80),
se.fit = TRUE, scale = se.pooled, df = rdf.pooled)
## difference in mean
mean.diff <- VE_predict_healthy$fit - VE_predict_CAD$fit
#[1] -4.278526
## standard error of the difference
se.diff <- sqrt(VE_predict_healthy$se.fit ^ 2 VE_predict_CAD$se.fit ^ 2)
#[1] 2.440557
## t-statistic for the difference
t.value.diff <- mean.diff / se.diff
#[1] -1.753094
## p-value for the difference
p.value.diff <- 2 * pt(-abs(t.value.diff), df = rdf.pooled)
#[1] 0.08114925