Home > Software engineering >  Identifying threshold for multiple independent variables in R
Identifying threshold for multiple independent variables in R

Time:10-27

I am trying to calculate the value of x where y = 0. I could able to do it for single x using the following code

lm.model <- lm(y ~ x)
cc <- coef(lm.model)

f <- function(x) cc[2]*x   cc[1] 
plot(x, y)
abline(coef(lm.model))
abline(h=0, col="blue")

(threshold <- uniroot(f, interval = c(0, 100))$root)
abline(v=threshold, col="blue")

enter image description here

x = c(33.05, 14.22, 15.35, 13.52, 8.7, 13.73, 8.28, 21.02, 9.97, 
11.98, 12.87, 5.05, 11.23, 11.65, 10.05, 12.58, 13.88, 9.66, 
4.62, 4.56, 5.35, 3.7, 3.29, 4.87, 3.75, 6.55, 4.51, 7.77, 4.7, 
4.18, 25.14, 18.08, 10.41)
y = c(16.22699279, 15.78620732, 9.656361014, -17.32805679, -20.85685895, 
7.601993251, -4.776053714, 10.50972236, 3.853479771, 7.713563136, 
8.579366561, 14.16989395, 7.484692081, -1.2807472, -12.13759458, 
-0.29138513, -5.238157067, -2.033194068, -38.12157566, -33.61912493, 
-9.763657548, -0.240863712, 9.090638907, 7.345492608, 6.949676888, 
-19.94866471, 0.995659732, -1.162616185, 5.497998429, 1.656653092, 
2.116687436, 22.23175649, 10.33039543)

But I have multiple x variables. Now how can I apply it for multiple x variables at a time? Here is an example data

df = structure(list(y = c(16.2269927925813, 15.7862073196372, 9.65636101412767, 
-17.3280567922775, -20.8568589521297, 7.6019932507973, -4.77605371404423, 
10.5097223644541, 3.85347977129367, 7.71356313645697, 8.57936656085966, 
14.1698939499927, 7.4846920807874, -1.28074719969249, -12.1375945758837, 
-0.291385130176774, -5.23815706681139, -2.03319406769161, -38.1215756639013, 
-33.6191249261727, -9.76365754821171, -0.240863712421707, 9.09063890677045, 
7.34549260800693, 6.94967688778232, -19.9486647079697, 0.995659731521127, 
-1.16261618452931, 5.49799842947493, 1.65665309209479, 2.11668743610013, 
22.2317564898722, 10.3303954315884), x1 = c(8.56, 8.66, 9.09, 
8.36, 8.3, 8.63, 8.78, 8.44, 8.34, 8.46, 8.33, 8.19, 8.58, 8.65, 
8.75, 8.34, 8.77, 9.06, 9.31, 9.11, 9.26, 9.81, 9.68, 9.79, 9.26, 
9.53, 8.89, 8.89, 10.37, 9.58, 10.27, 10.16, 10.27), x2 = c(164, 
328.3, 0, 590.2, 406.6, 188.4, 423.8, 355.3, 337.6, 0, 0, 200.1, 
0, 315.8, 547.5, 225.6, 655.7, 387.2, 0, 487.4, 400.4, 0, 234.9, 
275.5, 0, 0, 613.2, 207.4, 184.4, 162.8, 220, 174.8, 0), x3 = c(4517.7, 
2953.4, 2899.3, 2573.8, 3310.7, 3880.3, 3016.8, 3552.3, 2960.1, 
323, 2638.5, 3343.1, 3274.7, 3218, 3268.3, 3507.9, 3709.2, 3537.5, 
2634.4, 1964.6, 3333.7, 2809.7, 3326.8, 3524.5, 3893.9, 3166.7, 
3992.1, 4324.7, 3077.9, 3069.9, 4218.9, 3897.4, 2693.9), x4 = c(14.7, 
14.5, 15.5, 17, 16.2, 15.9, 15.7, 15.3, 13.5, 14, 15.4, 16.2, 
15.6, 15.7, 15.1, 15.8, 15.3, 14.9, 15.7, 16.3, 15.21000004, 
16.7, 15.6, 16.2, 15.7, 16.3, 17.3, 16.9, 15.7, 14.9, 13.81999969, 
14.90754509, 12.42847157), x5 = c(28.3, 29.1, 28.3, 29.1, 28.7, 
29.3, 28.9, 28.4, 29.3, 29.3, 29.1, 29, 29.9, 29.5, 28.4, 30.3, 
29.1, 29.1, 29, 29.5, 29.3, 28.5, 29, 28.7, 29.4, 28.8, 29.2, 
30.1, 28.3, 28.7, 24.96999931, 25.79496384, 25.3072052), x6 = c(33.05, 
14.22, 15.35, 13.52, 8.7, 13.73, 8.28, 21.02, 9.97, 11.98, 12.87, 
5.05, 11.23, 11.65, 10.05, 12.58, 13.88, 9.66, 4.62, 4.56, 5.35, 
3.7, 3.29, 4.87, 3.75, 6.55, 4.51, 7.77, 4.7, 4.18, 25.14, 18.08, 
10.41), x7 = c(13.8425, 11.1175, 8.95, 13.5375, 5.4025, 13.5625, 
13.735, 14.14, 8.0875, 5.565, 12.255, 3.3075, 6.345, 4.8125, 
4.0325, 11.475, 10.32, 17.71, 2.3375, 3.92, 5.7, 2.42, 8.3075, 
7.4725, 7.7925, 10.8725, 8.005, 11.7475, 13.405, 8.425, 47.155, 
26.1, 6.6675), x8 = c(0.95, 3.01, 1.92, 1.51, 2.61, 1.32, 3.55, 
1.21, 2.14, 1.1, 1.32, 0.76, 1.34, 5.41, 9.38, 6.55, 4.44, 7.37, 
9.84, 12.68, 15.52, 23.01, 18.59, 21.64, 19.69, 25.22, 22.38, 
25.03, 37.42, 22.26, 2.1, 3.01, 0.82), x9 = c(26.2, 25.8, 25.8, 
25.5, 26, 24.7, 22.9, 25.3, 26.3, 26.1, 22.5, 25.9, 26.4, 25.2, 
25.8, 25.4, 25, 23.2, 26.4, 25.8, 26.6, 26.2, 25.8, 26.8, 25, 
25.4, 25.6, 26.1, 25.7, 25.8, 24.78000069, 24.98148918, 26.39899826
), x10 = c(35.4, 39, 37.5, 36.4, 37.1, 36.2, 37.3, 36.4, 37.5, 
36, 36.6, 35.6, 37.3, 38.3, 37, 37.5, 37.5, 39.6, 37.8, 36.8, 
36.6, 38.4, 38.9, 38.4, 38.4, 37.7, 39.1, 37.7, 37.8, 39.4, 36.25, 
35.57029343, 35.57416534), x11 = c(653.86191565, 383.1, 457.1, 
591.4, 549.2, 475.2, 626.4, 308.8, 652.4, 77, 380.9, 530.5, 393, 
712.1, 623.4, 515.7, 706.4, 713.4, 343.7, 559.5, 630.1, 292.3, 
578.6, 628.88904574, 480.96959685, 591.35600287, 804.8, 419.6, 
403.7, 361.2, 515.07101438, 434.66682808, 299.9531298), x12 = c(163.9793854, 
167.9, 135, 215.8, 213, 188.4, 260.6, 191.8, 337.6, 55, 147.6, 
200.1, 140.7, 315.8, 189.6, 225.6, 469.3, 201.8, 140, 297.2, 
204.6, 142.5, 234.9, 275.494751, 153.7796173, 147.6174622, 433.6, 
207.4, 184.4, 162.8, 219.9721832, 174.8355713, 106.8163605), 
    x13 = c(92, 67, 67, 50, 70, 87, 68, 86, 70, 11, 66, 79, 70, 
    61, 75, 78, 78, 77, 69, 35, 72, 76, 69, 84, 93, 73, 81, 99, 
    80, 76, 101, 86, 80), x14 = c(70, 42, 46, 34, 55, 60, 51, 
    65, 49, 1, 40, 56, 54, 41, 48, 57, 46, 50, 41, 22, 47, 47, 
    49, 57, 70, 52, 56, 70, 48, 50, 74, 66, 47), x15 = c(21, 
    12, 13, 10, 14, 16, 10, 13, 10, 0, 9, 14, 16, 20, 14, 14, 
    13, 15, 10, 7, 17, 8, 14, 14, 14, 11, 17, 19, 12, 11, 17, 
    17, 9), x16 = c(1076.8, 783.7, 711.8, 1041.9, 957.4, 939.3, 
    662.9, 768.1, 770.3, 0, 399.2, 606.2, 724.1, 960.8, 943.8, 
    737.8, 1477.4, 1191.7, 371.3, 956.4, 1251.7, 345.7, 1210.7, 
    845, 598.1, 821.7, 1310.6, 940.1, 581, 520, 313.5, 606.8, 
    201.2), x17 = c(163.9793854, 167.9, 128.4, 215.8, 213, 188.4, 
    260.6, 191.8, 337.6, 55, 147.6, 200.1, 140.7, 315.8, 189.6, 
    225.6, 469.3, 201.8, 140, 297.2, 204.6, 142.5, 234.9, 157.7472534, 
    153.7796173, 147.6174622, 133.1873627, 150.2, 184.4, 162.8, 
    219.9721832, 174.8355713, 106.8163605)), row.names = c(NA, 
33L), class = "data.frame")

CodePudding user response:

You can use purrr::map to loop through every x.

library(dplyr)
library(purrr)

thresholds <- df %>%
  select(-y) %>%
  map_dbl(function(x){
    lm.model <- lm(df$y ~ x)
    cc <- coef(lm.model)
    
    f <- function(x) cc[2]*x   cc[1] 
    plot(x, df$y)
    abline(coef(lm.model))
    abline(h=0, col="blue")
    
    threshold <- tryCatch(uniroot(f, interval = c(0, 100))$root, error = function(cond){NA})
    abline(v=threshold, col="blue")
    
    return(threshold)})

For some x's, uniroot(f, interval = c(0, 100))$root yields an error: Error

in uniroot(f, interval = c(0, 100)) : f() values at end points not of opposite sign

So the tryCatch is used to return NA for the threshold associated with that x, instead of breaking the code.

Result:

> thresholds
       x1        x2        x3        x4        x5        x6        x7        x8        x9 
 9.023314        NA        NA 15.459841 28.727293 10.514728 10.493577  9.669244 25.522480 
      x10       x11       x12       x13       x14       x15       x16       x17 
37.370852        NA        NA 73.398380 50.239522 13.022176        NA        NA

Edit: binding the graphs together

graphs <- df %>%
  select(-y) %>%
  imap(function(x, name){
    lm.model <- lm(df$y ~ x)
    cc <- coef(lm.model)
    
    f <- function(x) cc[2]*x   cc[1] 
    threshold <- tryCatch(uniroot(f, interval = c(0, 100))$root, error = function(cond){NA})
    
    g = ggplot(mapping = aes(x))  
      geom_point(aes(y = df$y))  
      geom_line(aes(y = cc[2]*x   cc[1]))  
      geom_hline(yintercept = 0, color = "blue")  
      labs(title = name, y = "y", x = "x")
    
    if(!is.na(threshold)) {g = g   geom_vline(xintercept = threshold, color = "blue")}
    
    return(g)})

ggpubr::ggarrange(plotlist = graphs)

Result:

enter image description here

Obs2: i assumed that you don't need the thresholds vector defined in the first attempt, if you still need it, it's easy to add it back to the answer
Obs1: let me know if you want any aesthetic change on the graphs

  • Related