Is there a shorter way to write many similar variables in lm() formula?-CodePudding

I do have a huge data set and want to do some regression analysis.

This is the data set.

#            Winst Item1 Item2 Item3 Item4 Item5  ...  Item100
# Event1 992.19788    92    91    79    36    71  ...       93
# Event2  43.43687    62    16    58    51    30  ...       71
# Event3 219.52095   NaN   NaN   NaN   NaN   NaN  ...      NaN
# Event4 874.76596    89    69    82    65    56  ...       91
# Event5 602.40975    36    37    27    94    54  ...       52

how can I write the following code shorter?

lm(Winst ~ Item1   Item2   Item3   Item4   ...   Item50, data=data1)

In maths we use the sigma sign, but here in r this word has another meaning. Neither the sum() would work.

What are the options?

Data

data1 <- structure(list(Winst = c(992.197884479538, 43.4368695132434, 
219.520953251049, 874.765956075862, 602.409749291837), Item1 = c(92, 
62, NaN, 89, 36), Item2 = c(91, 16, NaN, 69, 37), Item3 = c(79, 
58, NaN, 82, 27), Item4 = c(36, 51, NaN, 65, 94), Item5 = c(71, 
30, NaN, 56, 54), Item6 = c(23, 35, NaN, 67, 59), Item7 = c(44, 
64, NaN, 9, 98), Item8 = c(70, 79, NaN, 75, 57), Item9 = c(91, 
26, NaN, 76, 29), Item10 = c(99, 74, NaN, 70, 78), Item11 = c(27, 
72, NaN, 74, 16), Item12 = c(10, 48, NaN, 2, 60), Item13 = c(54, 
95, NaN, 10, 17), Item14 = c(58, 82, NaN, 83, 61), Item15 = c(21, 
88, NaN, 1, 62), Item16 = c(61, 4, NaN, 23, 8), Item17 = c(7, 
46, NaN, 18, 20), Item18 = c(67, 24, NaN, 32, 92), Item19 = c(83, 
73, NaN, 42, 41), Item20 = c(42, 65, NaN, 51, 30), Item21 = c(77, 
49, NaN, 85, 85), Item22 = c(83, 19, NaN, 71, 50), Item23 = c(25, 
42, NaN, 20, 81), Item24 = c(74, 93, NaN, 17, 58), Item25 = c(23, 
84, NaN, 64, 7), Item26 = c(22, 43, NaN, 49, 65), Item27 = c(69, 
32, NaN, 45, 42), Item28 = c(92, 20, NaN, 33, 3), Item29 = c(32, 
55, NaN, 40, 22), Item30 = c(68, 76, NaN, 38, 46), Item31 = c(28, 
91, NaN, 80, 97), Item32 = c(100, 61, NaN, 35, 91), Item33 = c(86, 
36, NaN, 63, 69), Item34 = c(54, 60, NaN, 8, 34), Item35 = c(11, 
63, NaN, 22, 53), Item36 = c(63, 80, NaN, 26, 96), Item37 = c(41, 
66, NaN, 100, 2), Item38 = c(24, 98, NaN, 24, 47), Item39 = c(82, 
31, NaN, 97, 87), Item40 = c(34, 8, NaN, 95, 32), Item41 = c(60, 
27, NaN, 14, 68), Item42 = c(48, 44, NaN, 19, 56), Item43 = c(45, 
57, NaN, 57, 40), Item44 = c(72, 75, NaN, 28, 72), Item45 = c(11, 
12, NaN, 37, 88), Item46 = c(21, 70, NaN, 15, 5), Item47 = c(46, 
22, NaN, 92, 43), Item48 = c(68, 52, NaN, 7, 48), Item49 = c(6, 
5, NaN, 16, 49), Item50 = c(93, 71, NaN, 91, 52), Item51 = c(92, 
62, NaN, 89, 36), Item52 = c(91, 16, NaN, 69, 37), Item53 = c(79, 
58, NaN, 82, 27), Item54 = c(36, 51, NaN, 65, 94), Item55 = c(71, 
30, NaN, 56, 54), Item56 = c(23, 35, NaN, 67, 59), Item57 = c(44, 
64, NaN, 9, 98), Item58 = c(70, 79, NaN, 75, 57), Item59 = c(91, 
26, NaN, 76, 29), Item60 = c(99, 74, NaN, 70, 78), Item61 = c(27, 
72, NaN, 74, 16), Item62 = c(10, 48, NaN, 2, 60), Item63 = c(54, 
95, NaN, 10, 17), Item64 = c(58, 82, NaN, 83, 61), Item65 = c(21, 
88, NaN, 1, 62), Item66 = c(61, 4, NaN, 23, 8), Item67 = c(7, 
46, NaN, 18, 20), Item68 = c(67, 24, NaN, 32, 92), Item69 = c(83, 
73, NaN, 42, 41), Item70 = c(42, 65, NaN, 51, 30), Item71 = c(77, 
49, NaN, 85, 85), Item72 = c(83, 19, NaN, 71, 50), Item73 = c(25, 
42, NaN, 20, 81), Item74 = c(74, 93, NaN, 17, 58), Item75 = c(23, 
84, NaN, 64, 7), Item76 = c(22, 43, NaN, 49, 65), Item77 = c(69, 
32, NaN, 45, 42), Item78 = c(92, 20, NaN, 33, 3), Item79 = c(32, 
55, NaN, 40, 22), Item80 = c(68, 76, NaN, 38, 46), Item81 = c(28, 
91, NaN, 80, 97), Item82 = c(100, 61, NaN, 35, 91), Item83 = c(86, 
36, NaN, 63, 69), Item84 = c(54, 60, NaN, 8, 34), Item85 = c(11, 
63, NaN, 22, 53), Item86 = c(63, 80, NaN, 26, 96), Item87 = c(41, 
66, NaN, 100, 2), Item88 = c(24, 98, NaN, 24, 47), Item89 = c(82, 
31, NaN, 97, 87), Item90 = c(34, 8, NaN, 95, 32), Item91 = c(60, 
27, NaN, 14, 68), Item92 = c(48, 44, NaN, 19, 56), Item93 = c(45, 
57, NaN, 57, 40), Item94 = c(72, 75, NaN, 28, 72), Item95 = c(11, 
12, NaN, 37, 88), Item96 = c(21, 70, NaN, 15, 5), Item97 = c(46, 
22, NaN, 92, 43), Item98 = c(68, 52, NaN, 7, 48), Item99 = c(6, 
5, NaN, 16, 49), Item100 = c(93, 71, NaN, 91, 52)), class = "data.frame", row.names = c("Event1", 
"Event2", "Event3", "Event4", "Event5"))

CodePudding user response：

You could use reformulate and grep the terms from column names with regular expressions.

fo <- reformulate(grep('^Item([1-9]|[1-4][0-9]|50)$', names(data1), value=TRUE), 
                  'Winst')
fo
# Winst ~ Item1   Item2   Item3   Item4   Item5   Item6   Item7   
#   Item8   Item9   Item10   Item11   Item12   Item13   Item14   
#   Item15   Item16   Item17   Item18   Item19   Item20   Item21   
#   Item22   Item23   Item24   Item25   Item26   Item27   Item28   
#   Item29   Item30   Item31   Item32   Item33   Item34   Item35   
#   Item36   Item37   Item38   Item39   Item40   Item41   Item42   
#   Item43   Item44   Item45   Item46   Item47   Item48   Item49   
#   Item50

See demo there.

And then just

lm(fo, data1)

do.call('lm', list(fo, quote(data1)))

to display the "Call:" right.

CodePudding user response：

My take on this issue:

items <- paste0("Item", 1:50)

model <- formula(paste("Winst ~", paste0(items, collapse = " ")))

model

Winst ~ Item1   Item2   Item3   Item4   Item5   Item6   Item7  
    Item8   Item9   Item10   Item11   Item12   Item13   Item14  
    Item15   Item16   Item17   Item18   Item19   Item20   Item21  
    Item22   Item23   Item24   Item25   Item26   Item27   Item28  
    Item29   Item30   Item31   Item32   Item33   Item34   Item35  
    Item36   Item37   Item38   Item39   Item40   Item41   Item42  
    Item43   Item44   Item45   Item46   Item47   Item48   Item49  
    Item50

Then, you just call the model from lm()

lm(model, data = data1)

CodePudding user response：

We could use either group_map or group_modify together with brooms tidy after bringing data in long format with pivot_longer from tidyr package and grouping by name:

library(dplyr)
library(tidyr)
library(broom)

data1 %>%
  pivot_longer(
    -Winst
  ) %>% 
  group_by(name) %>% 
  #group_map(~ broom::tidy(lm(Winst ~ value, data = .x))) %>% 
  group_modify(~ broom::tidy(lm(Winst ~ value, data = .x)))

Groups:   name [100]
   name    term        estimate std.error statistic p.value
   <chr>   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
 1 Item1   (Intercept)   174.     1029.       0.169   0.881
 2 Item1   value           2.35     16.7      0.141   0.901
 3 Item10  (Intercept)   164.      449.       0.364   0.751
 4 Item10  value           2.29      5.96     0.384   0.738
 5 Item100 (Intercept)   -99.5     646.      -0.154   0.892
 6 Item100 value           5.46      8.08     0.676   0.569
 7 Item11  (Intercept)   462.      280.       1.65    0.241
 8 Item11  value          -4.56      6.18    -0.739   0.537
 9 Item12  (Intercept)  -281.      227.      -1.23    0.342
10 Item12  value           9.39      3.24     2.90    0.101
# ... with 190 more rows