I am trying to calculate the value of x where y = 0. I could able to do it for single x using the following code
lm.model <- lm(y ~ x)
cc <- coef(lm.model)
f <- function(x) cc[2]*x cc[1]
plot(x, y)
abline(coef(lm.model))
abline(h=0, col="blue")
(threshold <- uniroot(f, interval = c(0, 100))$root)
abline(v=threshold, col="blue")
x = c(33.05, 14.22, 15.35, 13.52, 8.7, 13.73, 8.28, 21.02, 9.97,
11.98, 12.87, 5.05, 11.23, 11.65, 10.05, 12.58, 13.88, 9.66,
4.62, 4.56, 5.35, 3.7, 3.29, 4.87, 3.75, 6.55, 4.51, 7.77, 4.7,
4.18, 25.14, 18.08, 10.41)
y = c(16.22699279, 15.78620732, 9.656361014, -17.32805679, -20.85685895,
7.601993251, -4.776053714, 10.50972236, 3.853479771, 7.713563136,
8.579366561, 14.16989395, 7.484692081, -1.2807472, -12.13759458,
-0.29138513, -5.238157067, -2.033194068, -38.12157566, -33.61912493,
-9.763657548, -0.240863712, 9.090638907, 7.345492608, 6.949676888,
-19.94866471, 0.995659732, -1.162616185, 5.497998429, 1.656653092,
2.116687436, 22.23175649, 10.33039543)
But I have multiple x variables. Now how can I apply it for multiple x variables at a time? Here is an example data
df = structure(list(y = c(16.2269927925813, 15.7862073196372, 9.65636101412767,
-17.3280567922775, -20.8568589521297, 7.6019932507973, -4.77605371404423,
10.5097223644541, 3.85347977129367, 7.71356313645697, 8.57936656085966,
14.1698939499927, 7.4846920807874, -1.28074719969249, -12.1375945758837,
-0.291385130176774, -5.23815706681139, -2.03319406769161, -38.1215756639013,
-33.6191249261727, -9.76365754821171, -0.240863712421707, 9.09063890677045,
7.34549260800693, 6.94967688778232, -19.9486647079697, 0.995659731521127,
-1.16261618452931, 5.49799842947493, 1.65665309209479, 2.11668743610013,
22.2317564898722, 10.3303954315884), x1 = c(8.56, 8.66, 9.09,
8.36, 8.3, 8.63, 8.78, 8.44, 8.34, 8.46, 8.33, 8.19, 8.58, 8.65,
8.75, 8.34, 8.77, 9.06, 9.31, 9.11, 9.26, 9.81, 9.68, 9.79, 9.26,
9.53, 8.89, 8.89, 10.37, 9.58, 10.27, 10.16, 10.27), x2 = c(164,
328.3, 0, 590.2, 406.6, 188.4, 423.8, 355.3, 337.6, 0, 0, 200.1,
0, 315.8, 547.5, 225.6, 655.7, 387.2, 0, 487.4, 400.4, 0, 234.9,
275.5, 0, 0, 613.2, 207.4, 184.4, 162.8, 220, 174.8, 0), x3 = c(4517.7,
2953.4, 2899.3, 2573.8, 3310.7, 3880.3, 3016.8, 3552.3, 2960.1,
323, 2638.5, 3343.1, 3274.7, 3218, 3268.3, 3507.9, 3709.2, 3537.5,
2634.4, 1964.6, 3333.7, 2809.7, 3326.8, 3524.5, 3893.9, 3166.7,
3992.1, 4324.7, 3077.9, 3069.9, 4218.9, 3897.4, 2693.9), x4 = c(14.7,
14.5, 15.5, 17, 16.2, 15.9, 15.7, 15.3, 13.5, 14, 15.4, 16.2,
15.6, 15.7, 15.1, 15.8, 15.3, 14.9, 15.7, 16.3, 15.21000004,
16.7, 15.6, 16.2, 15.7, 16.3, 17.3, 16.9, 15.7, 14.9, 13.81999969,
14.90754509, 12.42847157), x5 = c(28.3, 29.1, 28.3, 29.1, 28.7,
29.3, 28.9, 28.4, 29.3, 29.3, 29.1, 29, 29.9, 29.5, 28.4, 30.3,
29.1, 29.1, 29, 29.5, 29.3, 28.5, 29, 28.7, 29.4, 28.8, 29.2,
30.1, 28.3, 28.7, 24.96999931, 25.79496384, 25.3072052), x6 = c(33.05,
14.22, 15.35, 13.52, 8.7, 13.73, 8.28, 21.02, 9.97, 11.98, 12.87,
5.05, 11.23, 11.65, 10.05, 12.58, 13.88, 9.66, 4.62, 4.56, 5.35,
3.7, 3.29, 4.87, 3.75, 6.55, 4.51, 7.77, 4.7, 4.18, 25.14, 18.08,
10.41), x7 = c(13.8425, 11.1175, 8.95, 13.5375, 5.4025, 13.5625,
13.735, 14.14, 8.0875, 5.565, 12.255, 3.3075, 6.345, 4.8125,
4.0325, 11.475, 10.32, 17.71, 2.3375, 3.92, 5.7, 2.42, 8.3075,
7.4725, 7.7925, 10.8725, 8.005, 11.7475, 13.405, 8.425, 47.155,
26.1, 6.6675), x8 = c(0.95, 3.01, 1.92, 1.51, 2.61, 1.32, 3.55,
1.21, 2.14, 1.1, 1.32, 0.76, 1.34, 5.41, 9.38, 6.55, 4.44, 7.37,
9.84, 12.68, 15.52, 23.01, 18.59, 21.64, 19.69, 25.22, 22.38,
25.03, 37.42, 22.26, 2.1, 3.01, 0.82), x9 = c(26.2, 25.8, 25.8,
25.5, 26, 24.7, 22.9, 25.3, 26.3, 26.1, 22.5, 25.9, 26.4, 25.2,
25.8, 25.4, 25, 23.2, 26.4, 25.8, 26.6, 26.2, 25.8, 26.8, 25,
25.4, 25.6, 26.1, 25.7, 25.8, 24.78000069, 24.98148918, 26.39899826
), x10 = c(35.4, 39, 37.5, 36.4, 37.1, 36.2, 37.3, 36.4, 37.5,
36, 36.6, 35.6, 37.3, 38.3, 37, 37.5, 37.5, 39.6, 37.8, 36.8,
36.6, 38.4, 38.9, 38.4, 38.4, 37.7, 39.1, 37.7, 37.8, 39.4, 36.25,
35.57029343, 35.57416534), x11 = c(653.86191565, 383.1, 457.1,
591.4, 549.2, 475.2, 626.4, 308.8, 652.4, 77, 380.9, 530.5, 393,
712.1, 623.4, 515.7, 706.4, 713.4, 343.7, 559.5, 630.1, 292.3,
578.6, 628.88904574, 480.96959685, 591.35600287, 804.8, 419.6,
403.7, 361.2, 515.07101438, 434.66682808, 299.9531298), x12 = c(163.9793854,
167.9, 135, 215.8, 213, 188.4, 260.6, 191.8, 337.6, 55, 147.6,
200.1, 140.7, 315.8, 189.6, 225.6, 469.3, 201.8, 140, 297.2,
204.6, 142.5, 234.9, 275.494751, 153.7796173, 147.6174622, 433.6,
207.4, 184.4, 162.8, 219.9721832, 174.8355713, 106.8163605),
x13 = c(92, 67, 67, 50, 70, 87, 68, 86, 70, 11, 66, 79, 70,
61, 75, 78, 78, 77, 69, 35, 72, 76, 69, 84, 93, 73, 81, 99,
80, 76, 101, 86, 80), x14 = c(70, 42, 46, 34, 55, 60, 51,
65, 49, 1, 40, 56, 54, 41, 48, 57, 46, 50, 41, 22, 47, 47,
49, 57, 70, 52, 56, 70, 48, 50, 74, 66, 47), x15 = c(21,
12, 13, 10, 14, 16, 10, 13, 10, 0, 9, 14, 16, 20, 14, 14,
13, 15, 10, 7, 17, 8, 14, 14, 14, 11, 17, 19, 12, 11, 17,
17, 9), x16 = c(1076.8, 783.7, 711.8, 1041.9, 957.4, 939.3,
662.9, 768.1, 770.3, 0, 399.2, 606.2, 724.1, 960.8, 943.8,
737.8, 1477.4, 1191.7, 371.3, 956.4, 1251.7, 345.7, 1210.7,
845, 598.1, 821.7, 1310.6, 940.1, 581, 520, 313.5, 606.8,
201.2), x17 = c(163.9793854, 167.9, 128.4, 215.8, 213, 188.4,
260.6, 191.8, 337.6, 55, 147.6, 200.1, 140.7, 315.8, 189.6,
225.6, 469.3, 201.8, 140, 297.2, 204.6, 142.5, 234.9, 157.7472534,
153.7796173, 147.6174622, 133.1873627, 150.2, 184.4, 162.8,
219.9721832, 174.8355713, 106.8163605)), row.names = c(NA,
33L), class = "data.frame")
CodePudding user response:
You can use purrr::map
to loop through every x.
library(dplyr)
library(purrr)
thresholds <- df %>%
select(-y) %>%
map_dbl(function(x){
lm.model <- lm(df$y ~ x)
cc <- coef(lm.model)
f <- function(x) cc[2]*x cc[1]
plot(x, df$y)
abline(coef(lm.model))
abline(h=0, col="blue")
threshold <- tryCatch(uniroot(f, interval = c(0, 100))$root, error = function(cond){NA})
abline(v=threshold, col="blue")
return(threshold)})
For some x's, uniroot(f, interval = c(0, 100))$root
yields an error: Error
in uniroot(f, interval = c(0, 100)) : f() values at end points not of opposite sign
So the tryCatch
is used to return NA
for the threshold associated with that x, instead of breaking the code.
Result:
> thresholds
x1 x2 x3 x4 x5 x6 x7 x8 x9
9.023314 NA NA 15.459841 28.727293 10.514728 10.493577 9.669244 25.522480
x10 x11 x12 x13 x14 x15 x16 x17
37.370852 NA NA 73.398380 50.239522 13.022176 NA NA
Edit: binding the graphs together
graphs <- df %>%
select(-y) %>%
imap(function(x, name){
lm.model <- lm(df$y ~ x)
cc <- coef(lm.model)
f <- function(x) cc[2]*x cc[1]
threshold <- tryCatch(uniroot(f, interval = c(0, 100))$root, error = function(cond){NA})
g = ggplot(mapping = aes(x))
geom_point(aes(y = df$y))
geom_line(aes(y = cc[2]*x cc[1]))
geom_hline(yintercept = 0, color = "blue")
labs(title = name, y = "y", x = "x")
if(!is.na(threshold)) {g = g geom_vline(xintercept = threshold, color = "blue")}
return(g)})
ggpubr::ggarrange(plotlist = graphs)
Result:
Obs2: i assumed that you don't need the thresholds
vector defined in the first attempt, if you still need it, it's easy to add it back to the answer
Obs1: let me know if you want any aesthetic change on the graphs
Edit 2: graph with common axis
To use a common axis is better to use facets instead of ggarrange
. In order to do that, we need to first save the fitted data for all variables, then plot, so the ggplot
expression goes out of the map
. Also, we now save the treshold
info.
graphs <- df %>%
select(-y) %>%
imap(function(x, name){
lm.model <- lm(df$y ~ x)
cc <- coef(lm.model)
f <- function(x) cc[2]*x cc[1]
threshold <- tryCatch(uniroot(f, interval = c(0, 100))$root, error = function(cond){NA})
list(threshold = threshold,
data = tibble(y = df$y, "name" = name, "x" = x, "fitted" = cc[2]*x cc[1]))})
Now we use the purrr::transpose()
function to build a dataset for the data
and other for the treshold
. This functions does something like:
list(x1 = list(treshold, data), x2 = ...) >>> list(treshold = list(x1, x2, ...), data = list(x1, x2, ...))
df2 = graphs %>%
transpose() %>%
`$`(data) %>%
bind_rows() %>%
mutate(name = factor(name, paste0("x", 1:17)))
thresholds = graphs %>%
transpose() %>%
`$`(threshold) %>%
{tibble(int = as.numeric(.), name = names(.))} #both datasets have the name column, to be used inside `facet_wrap()`
ggplot(df2, aes(x))
geom_point(aes(y = y))
geom_line(aes(y = fitted))
facet_wrap(vars(name), scales = "free_x")
geom_hline(yintercept = 0, color = "blue")
geom_vline(aes(xintercept = int), thresholds, color = "blue", linetype = 2)
geom_label(aes(label = round(int, 2), x = int*1, y = min(df$y)), thresholds, size = 4)
Result:
Obs1: the labels position and size can be easily altered. Another option is using the thresholds as a axis break
Obs2: this method can be slow for large datasets. A more efficient option is to save only threshold
and cc
inside map
, and then building the dataset after it.