I do have a huge data set and want to do some regression analysis.
This is the data set.
# Winst Item1 Item2 Item3 Item4 Item5 ... Item100
# Event1 992.19788 92 91 79 36 71 ... 93
# Event2 43.43687 62 16 58 51 30 ... 71
# Event3 219.52095 NaN NaN NaN NaN NaN ... NaN
# Event4 874.76596 89 69 82 65 56 ... 91
# Event5 602.40975 36 37 27 94 54 ... 52
how can I write the following code shorter?
lm(Winst ~ Item1 Item2 Item3 Item4 ... Item50, data=data1)
In maths we use the sigma sign, but here in r this word has another meaning. Neither the sum()
would work.
What are the options?
Data
data1 <- structure(list(Winst = c(992.197884479538, 43.4368695132434,
219.520953251049, 874.765956075862, 602.409749291837), Item1 = c(92,
62, NaN, 89, 36), Item2 = c(91, 16, NaN, 69, 37), Item3 = c(79,
58, NaN, 82, 27), Item4 = c(36, 51, NaN, 65, 94), Item5 = c(71,
30, NaN, 56, 54), Item6 = c(23, 35, NaN, 67, 59), Item7 = c(44,
64, NaN, 9, 98), Item8 = c(70, 79, NaN, 75, 57), Item9 = c(91,
26, NaN, 76, 29), Item10 = c(99, 74, NaN, 70, 78), Item11 = c(27,
72, NaN, 74, 16), Item12 = c(10, 48, NaN, 2, 60), Item13 = c(54,
95, NaN, 10, 17), Item14 = c(58, 82, NaN, 83, 61), Item15 = c(21,
88, NaN, 1, 62), Item16 = c(61, 4, NaN, 23, 8), Item17 = c(7,
46, NaN, 18, 20), Item18 = c(67, 24, NaN, 32, 92), Item19 = c(83,
73, NaN, 42, 41), Item20 = c(42, 65, NaN, 51, 30), Item21 = c(77,
49, NaN, 85, 85), Item22 = c(83, 19, NaN, 71, 50), Item23 = c(25,
42, NaN, 20, 81), Item24 = c(74, 93, NaN, 17, 58), Item25 = c(23,
84, NaN, 64, 7), Item26 = c(22, 43, NaN, 49, 65), Item27 = c(69,
32, NaN, 45, 42), Item28 = c(92, 20, NaN, 33, 3), Item29 = c(32,
55, NaN, 40, 22), Item30 = c(68, 76, NaN, 38, 46), Item31 = c(28,
91, NaN, 80, 97), Item32 = c(100, 61, NaN, 35, 91), Item33 = c(86,
36, NaN, 63, 69), Item34 = c(54, 60, NaN, 8, 34), Item35 = c(11,
63, NaN, 22, 53), Item36 = c(63, 80, NaN, 26, 96), Item37 = c(41,
66, NaN, 100, 2), Item38 = c(24, 98, NaN, 24, 47), Item39 = c(82,
31, NaN, 97, 87), Item40 = c(34, 8, NaN, 95, 32), Item41 = c(60,
27, NaN, 14, 68), Item42 = c(48, 44, NaN, 19, 56), Item43 = c(45,
57, NaN, 57, 40), Item44 = c(72, 75, NaN, 28, 72), Item45 = c(11,
12, NaN, 37, 88), Item46 = c(21, 70, NaN, 15, 5), Item47 = c(46,
22, NaN, 92, 43), Item48 = c(68, 52, NaN, 7, 48), Item49 = c(6,
5, NaN, 16, 49), Item50 = c(93, 71, NaN, 91, 52), Item51 = c(92,
62, NaN, 89, 36), Item52 = c(91, 16, NaN, 69, 37), Item53 = c(79,
58, NaN, 82, 27), Item54 = c(36, 51, NaN, 65, 94), Item55 = c(71,
30, NaN, 56, 54), Item56 = c(23, 35, NaN, 67, 59), Item57 = c(44,
64, NaN, 9, 98), Item58 = c(70, 79, NaN, 75, 57), Item59 = c(91,
26, NaN, 76, 29), Item60 = c(99, 74, NaN, 70, 78), Item61 = c(27,
72, NaN, 74, 16), Item62 = c(10, 48, NaN, 2, 60), Item63 = c(54,
95, NaN, 10, 17), Item64 = c(58, 82, NaN, 83, 61), Item65 = c(21,
88, NaN, 1, 62), Item66 = c(61, 4, NaN, 23, 8), Item67 = c(7,
46, NaN, 18, 20), Item68 = c(67, 24, NaN, 32, 92), Item69 = c(83,
73, NaN, 42, 41), Item70 = c(42, 65, NaN, 51, 30), Item71 = c(77,
49, NaN, 85, 85), Item72 = c(83, 19, NaN, 71, 50), Item73 = c(25,
42, NaN, 20, 81), Item74 = c(74, 93, NaN, 17, 58), Item75 = c(23,
84, NaN, 64, 7), Item76 = c(22, 43, NaN, 49, 65), Item77 = c(69,
32, NaN, 45, 42), Item78 = c(92, 20, NaN, 33, 3), Item79 = c(32,
55, NaN, 40, 22), Item80 = c(68, 76, NaN, 38, 46), Item81 = c(28,
91, NaN, 80, 97), Item82 = c(100, 61, NaN, 35, 91), Item83 = c(86,
36, NaN, 63, 69), Item84 = c(54, 60, NaN, 8, 34), Item85 = c(11,
63, NaN, 22, 53), Item86 = c(63, 80, NaN, 26, 96), Item87 = c(41,
66, NaN, 100, 2), Item88 = c(24, 98, NaN, 24, 47), Item89 = c(82,
31, NaN, 97, 87), Item90 = c(34, 8, NaN, 95, 32), Item91 = c(60,
27, NaN, 14, 68), Item92 = c(48, 44, NaN, 19, 56), Item93 = c(45,
57, NaN, 57, 40), Item94 = c(72, 75, NaN, 28, 72), Item95 = c(11,
12, NaN, 37, 88), Item96 = c(21, 70, NaN, 15, 5), Item97 = c(46,
22, NaN, 92, 43), Item98 = c(68, 52, NaN, 7, 48), Item99 = c(6,
5, NaN, 16, 49), Item100 = c(93, 71, NaN, 91, 52)), class = "data.frame", row.names = c("Event1",
"Event2", "Event3", "Event4", "Event5"))
CodePudding user response:
You could use reformulate
and grep
the terms from column names with regular expressions.
fo <- reformulate(grep('^Item([1-9]|[1-4][0-9]|50)$', names(data1), value=TRUE),
'Winst')
fo
# Winst ~ Item1 Item2 Item3 Item4 Item5 Item6 Item7
# Item8 Item9 Item10 Item11 Item12 Item13 Item14
# Item15 Item16 Item17 Item18 Item19 Item20 Item21
# Item22 Item23 Item24 Item25 Item26 Item27 Item28
# Item29 Item30 Item31 Item32 Item33 Item34 Item35
# Item36 Item37 Item38 Item39 Item40 Item41 Item42
# Item43 Item44 Item45 Item46 Item47 Item48 Item49
# Item50
See demo there.
And then just
lm(fo, data1)
or
do.call('lm', list(fo, quote(data1)))
to display the "Call:" right.
CodePudding user response:
My take on this issue:
items <- paste0("Item", 1:50)
model <- formula(paste("Winst ~", paste0(items, collapse = " ")))
model
Winst ~ Item1 Item2 Item3 Item4 Item5 Item6 Item7
Item8 Item9 Item10 Item11 Item12 Item13 Item14
Item15 Item16 Item17 Item18 Item19 Item20 Item21
Item22 Item23 Item24 Item25 Item26 Item27 Item28
Item29 Item30 Item31 Item32 Item33 Item34 Item35
Item36 Item37 Item38 Item39 Item40 Item41 Item42
Item43 Item44 Item45 Item46 Item47 Item48 Item49
Item50
Then, you just call the model from lm()
lm(model, data = data1)
CodePudding user response:
We could use either group_map
or group_modify
together with broom
s tidy
after bringing data in long format with pivot_longer
from tidyr
package and grouping by name
:
library(dplyr)
library(tidyr)
library(broom)
data1 %>%
pivot_longer(
-Winst
) %>%
group_by(name) %>%
#group_map(~ broom::tidy(lm(Winst ~ value, data = .x))) %>%
group_modify(~ broom::tidy(lm(Winst ~ value, data = .x)))
Groups: name [100]
name term estimate std.error statistic p.value
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 Item1 (Intercept) 174. 1029. 0.169 0.881
2 Item1 value 2.35 16.7 0.141 0.901
3 Item10 (Intercept) 164. 449. 0.364 0.751
4 Item10 value 2.29 5.96 0.384 0.738
5 Item100 (Intercept) -99.5 646. -0.154 0.892
6 Item100 value 5.46 8.08 0.676 0.569
7 Item11 (Intercept) 462. 280. 1.65 0.241
8 Item11 value -4.56 6.18 -0.739 0.537
9 Item12 (Intercept) -281. 227. -1.23 0.342
10 Item12 value 9.39 3.24 2.90 0.101
# ... with 190 more rows