# The outlier quantity that should be removed
filter(HF2,
serum_creatinine >= quantile(serum_creatinine,probs=.75)
((quantile(serum_creatinine,probs=.75)-quantile(serum_creatinine,probs=.25))*1.5) |
creatinine_phosphokinase >= quantile(creatinine_phosphokinase,probs=.75)
((quantile(creatinine_phosphokinase,probs=.75)-quantile(creatinine_phosphokinase,probs=.25))*1.5)) %>% nrow
[1] 56
# The code being used to remove them
filter(HF2,
serum_creatinine < quantile(serum_creatinine,probs=.75)
((quantile(serum_creatinine,probs=.75)-quantile(serum_creatinine,probs=.25))*1.5) |
creatinine_phosphokinase < quantile(creatinine_phosphokinase,probs=.75)
((quantile(creatinine_phosphokinase,probs=.75)-quantile(creatinine_phosphokinase,probs=.25))*1.5)) %>% nrow
[1] 297
Why am I not able to filter out the outliers as I intend? My dataset has 299 observations.
I've tried changing the signs but I don't think that is what is wrong. Perhaps it could be my parentheses?
All outliers above [Q3 (1.5)IQR] is what I am trying to remove within serum_creatinine
and creatine_phosphokinase
Data
structure(list(age = c(75, 55, 65, 50, 65, 90, 75, 60, 65, 80,
75, 62, 45, 50, 49, 82, 87, 45, 70, 48, 65, 65, 68, 53, 75, 80,
95, 70, 58, 82, 94, 85, 50, 50, 65, 69, 90, 82, 60, 60, 70, 50,
70, 72, 60, 50, 51, 60, 80, 57, 68, 53, 60, 70, 60, 95, 70, 60,
49, 72, 45, 50, 55, 45, 45, 60, 42, 72, 70, 65, 41, 58, 85, 65,
69, 60, 70, 42, 75, 55, 70, 67, 60, 79, 59, 51, 55, 65, 44, 57,
70, 60, 42, 60, 58, 58, 63, 70, 60, 63, 65, 75, 80, 42, 60, 72,
55, 45, 63, 45, 85, 55, 50, 70, 60, 58, 60, 85, 65, 86, 60, 66,
60, 60, 60, 43, 46, 58, 61, 53, 53, 60, 46, 63, 81, 75, 65, 68,
62, 50, 80, 46, 50, 61, 72, 50, 52, 64, 75, 60, 72, 62, 50, 50,
65, 60, 52, 50, 85, 59, 66, 45, 63, 50, 45, 80, 53, 59, 65, 70,
51, 52, 70, 50, 65, 60, 69, 49, 63, 55, 40, 59, 65, 75, 58, 60.667,
50, 60, 60.667, 40, 80, 64, 50, 73, 45, 77, 45, 65, 50, 60, 63,
45, 70, 60, 78, 50, 40, 85, 60, 49, 70, 50, 78, 48, 65, 73, 70,
54, 68, 55, 73, 65, 42, 47, 58, 75, 58, 55, 65, 72, 60, 70, 40,
53, 53, 77, 75, 70, 65, 55, 70, 65, 40, 73, 54, 61, 55, 64, 40,
53, 50, 55, 50, 70, 53, 52, 65, 58, 45, 53, 55, 62, 65, 68, 61,
50, 55, 56, 45, 40, 44, 51, 67, 42, 60, 45, 70, 70, 50, 55, 70,
70, 42, 65, 50, 55, 60, 45, 65, 90, 45, 60, 52, 63, 62, 55, 45,
45, 50), anaemia = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L,
1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L,
2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L,
1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L), levels = c("0", "1"), class = "factor"), creatinine_phosphokinase = c(582L,
7861L, 146L, 111L, 160L, 47L, 246L, 315L, 157L, 123L, 81L, 231L,
981L, 168L, 80L, 379L, 149L, 582L, 125L, 582L, 52L, 128L, 220L,
63L, 582L, 148L, 112L, 122L, 60L, 70L, 582L, 23L, 249L, 159L,
94L, 582L, 60L, 855L, 2656L, 235L, 582L, 124L, 571L, 127L, 588L,
582L, 1380L, 582L, 553L, 129L, 577L, 91L, 3964L, 69L, 260L, 371L,
75L, 607L, 789L, 364L, 7702L, 318L, 109L, 582L, 582L, 68L, 250L,
110L, 161L, 113L, 148L, 582L, 5882L, 224L, 582L, 47L, 92L, 102L,
203L, 336L, 69L, 582L, 76L, 55L, 280L, 78L, 47L, 68L, 84L, 115L,
66L, 897L, 582L, 154L, 144L, 133L, 514L, 59L, 156L, 61L, 305L,
582L, 898L, 5209L, 53L, 328L, 748L, 1876L, 936L, 292L, 129L,
60L, 369L, 143L, 754L, 400L, 96L, 102L, 113L, 582L, 737L, 68L,
96L, 582L, 582L, 358L, 168L, 200L, 248L, 270L, 1808L, 1082L,
719L, 193L, 4540L, 582L, 59L, 646L, 281L, 1548L, 805L, 291L,
482L, 84L, 943L, 185L, 132L, 1610L, 582L, 2261L, 233L, 30L, 115L,
1846L, 335L, 231L, 58L, 250L, 910L, 129L, 72L, 130L, 582L, 2334L,
2442L, 776L, 196L, 66L, 582L, 835L, 582L, 3966L, 171L, 115L,
198L, 95L, 1419L, 69L, 122L, 835L, 478L, 176L, 395L, 99L, 145L,
104L, 582L, 1896L, 151L, 244L, 582L, 62L, 121L, 231L, 582L, 418L,
582L, 167L, 582L, 1211L, 1767L, 308L, 97L, 59L, 64L, 167L, 101L,
212L, 2281L, 972L, 212L, 582L, 224L, 131L, 135L, 582L, 1202L,
427L, 1021L, 582L, 582L, 118L, 86L, 582L, 582L, 675L, 57L, 2794L,
56L, 211L, 166L, 93L, 129L, 707L, 582L, 109L, 119L, 232L, 720L,
180L, 81L, 582L, 90L, 1185L, 582L, 80L, 2017L, 143L, 624L, 207L,
2522L, 572L, 245L, 88L, 446L, 191L, 326L, 132L, 66L, 56L, 66L,
655L, 258L, 157L, 582L, 298L, 1199L, 135L, 582L, 582L, 582L,
582L, 213L, 64L, 257L, 582L, 618L, 582L, 1051L, 84L, 2695L, 582L,
64L, 1688L, 54L, 170L, 253L, 582L, 892L, 337L, 615L, 320L, 190L,
103L, 61L, 1820L, 2060L, 2413L, 196L), diabetes = structure(c(1L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L,
1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L,
1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L,
2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L), levels = c("Absent",
"Present"), class = "factor"), ejection_fraction = c(20L, 38L,
20L, 20L, 20L, 40L, 15L, 60L, 65L, 35L, 38L, 25L, 30L, 38L, 30L,
50L, 38L, 14L, 25L, 55L, 25L, 30L, 35L, 60L, 30L, 38L, 40L, 45L,
38L, 30L, 38L, 45L, 35L, 30L, 50L, 35L, 50L, 50L, 30L, 38L, 20L,
30L, 45L, 50L, 60L, 38L, 25L, 38L, 20L, 30L, 25L, 20L, 62L, 50L,
38L, 30L, 35L, 40L, 20L, 20L, 25L, 40L, 35L, 35L, 80L, 20L, 15L,
25L, 25L, 25L, 40L, 35L, 35L, 50L, 20L, 20L, 60L, 40L, 38L, 45L,
40L, 50L, 25L, 50L, 25L, 50L, 35L, 60L, 40L, 25L, 45L, 45L, 60L,
25L, 38L, 60L, 25L, 60L, 25L, 40L, 25L, 45L, 25L, 30L, 50L, 30L,
45L, 35L, 38L, 35L, 60L, 35L, 25L, 60L, 40L, 40L, 60L, 60L, 60L,
38L, 60L, 38L, 38L, 30L, 40L, 50L, 17L, 60L, 30L, 35L, 60L, 45L,
40L, 60L, 35L, 40L, 60L, 25L, 35L, 30L, 38L, 35L, 30L, 40L, 25L,
30L, 30L, 60L, 30L, 35L, 45L, 60L, 45L, 35L, 35L, 25L, 35L, 25L,
50L, 45L, 40L, 35L, 40L, 35L, 30L, 38L, 60L, 20L, 40L, 35L, 35L,
40L, 60L, 20L, 35L, 60L, 40L, 50L, 60L, 40L, 30L, 25L, 25L, 38L,
25L, 30L, 50L, 25L, 40L, 45L, 35L, 60L, 40L, 30L, 20L, 45L, 38L,
30L, 20L, 35L, 45L, 60L, 60L, 25L, 40L, 45L, 40L, 38L, 40L, 35L,
17L, 62L, 50L, 30L, 35L, 35L, 50L, 70L, 35L, 35L, 20L, 50L, 35L,
25L, 25L, 60L, 25L, 35L, 25L, 25L, 30L, 35L, 35L, 38L, 45L, 50L,
50L, 30L, 40L, 45L, 35L, 30L, 35L, 40L, 38L, 38L, 25L, 25L, 35L,
40L, 30L, 35L, 45L, 35L, 60L, 30L, 38L, 38L, 25L, 50L, 40L, 40L,
25L, 60L, 38L, 35L, 20L, 38L, 38L, 35L, 30L, 40L, 38L, 40L, 30L,
38L, 35L, 38L, 30L, 38L, 40L, 40L, 30L, 38L, 40L, 40L, 35L, 55L,
35L, 38L, 55L, 35L, 38L, 35L, 38L, 38L, 60L, 38L, 45L), platelets = c(265000,
263358.03, 162000, 210000, 327000, 204000, 127000, 454000, 263358.03,
388000, 368000, 253000, 136000, 276000, 427000, 47000, 262000,
166000, 237000, 87000, 276000, 297000, 289000, 368000, 263358.03,
149000, 196000, 284000, 153000, 2e 05, 263358.03, 360000, 319000,
302000, 188000, 228000, 226000, 321000, 305000, 329000, 263358.03,
153000, 185000, 218000, 194000, 310000, 271000, 451000, 140000,
395000, 166000, 418000, 263358.03, 351000, 255000, 461000, 223000,
216000, 319000, 254000, 390000, 216000, 254000, 385000, 263358.03,
119000, 213000, 274000, 244000, 497000, 374000, 122000, 243000,
149000, 266000, 204000, 317000, 237000, 283000, 324000, 293000,
263358.03, 196000, 172000, 302000, 406000, 173000, 304000, 235000,
181000, 249000, 297000, 263358.03, 210000, 327000, 219000, 254000,
255000, 318000, 221000, 298000, 263358.03, 149000, 226000, 286000,
621000, 263000, 226000, 304000, 850000, 306000, 228000, 252000,
351000, 328000, 164000, 271000, 507000, 203000, 263358.03, 210000,
162000, 228000, 127000, 217000, 237000, 271000, 3e 05, 267000,
227000, 249000, 250000, 263358.03, 295000, 231000, 263358.03,
172000, 305000, 221000, 211000, 263358.03, 348000, 329000, 229000,
338000, 266000, 218000, 242000, 225000, 228000, 235000, 244000,
184000, 263358.03, 235000, 194000, 277000, 262000, 235000, 362000,
242000, 174000, 448000, 75000, 334000, 192000, 220000, 70000,
270000, 305000, 263358.03, 325000, 176000, 189000, 281000, 337000,
105000, 132000, 267000, 279000, 303000, 221000, 265000, 224000,
219000, 389000, 153000, 365000, 201000, 275000, 350000, 309000,
260000, 160000, 126000, 223000, 263358.03, 259000, 279000, 263358.03,
73000, 377000, 220000, 212000, 277000, 362000, 226000, 186000,
283000, 268000, 389000, 147000, 481000, 244000, 290000, 203000,
358000, 151000, 271000, 371000, 263358.03, 194000, 365000, 130000,
504000, 265000, 189000, 141000, 237000, 274000, 62000, 185000,
255000, 330000, 305000, 406000, 248000, 173000, 257000, 263358.03,
533000, 249000, 255000, 220000, 264000, 282000, 314000, 246000,
301000, 223000, 404000, 231000, 274000, 236000, 263358.03, 334000,
294000, 253000, 233000, 308000, 203000, 283000, 198000, 208000,
147000, 362000, 263358.03, 133000, 302000, 222000, 263358.03,
221000, 215000, 189000, 150000, 422000, 327000, 25100, 232000,
451000, 241000, 51000, 215000, 263358.03, 279000, 336000, 279000,
543000, 263358.03, 390000, 222000, 133000, 382000, 179000, 155000,
270000, 742000, 140000, 395000), serum_creatinine = c(1.9, 1.1,
1.3, 1.9, 2.7, 2.1, 1.2, 1.1, 1.5, 9.4, 4, 0.9, 1.1, 1.1, 1,
1.3, 0.9, 0.8, 1, 1.9, 1.3, 1.6, 0.9, 0.8, 1.83, 1.9, 1, 1.3,
5.8, 1.2, 1.83, 3, 1, 1.2, 1, 3.5, 1, 1, 2.3, 3, 1.83, 1.2, 1.2,
1, 1.1, 1.9, 0.9, 0.6, 4.4, 1, 1, 1.4, 6.8, 1, 2.2, 2, 2.7, 0.6,
1.1, 1.3, 1, 2.3, 1.1, 1, 1.18, 2.9, 1.3, 1, 1.2, 1.83, 0.8,
0.9, 1, 1.3, 1.2, 0.7, 0.8, 1.2, 0.6, 0.9, 1.7, 1.18, 2.5, 1.8,
1, 0.7, 1.1, 0.8, 0.7, 1.1, 0.8, 1, 1.18, 1.7, 0.7, 1, 1.3, 1.1,
1.2, 1.1, 1.1, 1.18, 1.1, 1, 2.3, 1.7, 1.3, 0.9, 1.1, 1.3, 1.2,
1.2, 1.6, 1.3, 1.2, 1, 0.7, 3.2, 0.9, 1.83, 1.5, 1, 0.75, 0.9,
3.7, 1.3, 2.1, 0.8, 0.7, 3.4, 0.7, 6.1, 1.18, 1.3, 1.18, 1.18,
0.9, 2.1, 1, 0.8, 1.1, 0.9, 0.9, 0.9, 1.7, 0.7, 0.7, 1, 1.83,
0.9, 2.5, 0.9, 0.9, 1.18, 0.8, 1.7, 1.4, 1, 1.3, 1.1, 1.2, 0.8,
0.9, 0.9, 1.1, 1.3, 0.7, 2.4, 1, 0.8, 1.5, 0.9, 1.1, 0.8, 0.9,
1, 1, 1, 1.2, 0.7, 0.9, 1, 1.2, 2.5, 1.2, 1.5, 0.6, 2.1, 1, 0.9,
2.1, 1.5, 0.7, 1.18, 1.6, 1.8, 1.18, 0.8, 1, 1.8, 0.7, 1, 0.9,
3.5, 0.7, 1, 0.8, 0.9, 1, 0.8, 1, 0.8, 1.4, 1.6, 0.8, 1.3, 0.9,
9, 1.1, 0.7, 1.83, 1.1, 1.1, 0.8, 1, 1.4, 1.3, 1, 5, 1.2, 1.7,
1.1, 0.9, 1.4, 1.1, 1.1, 1.1, 1.2, 1, 1.18, 1.3, 1.3, 1.1, 0.9,
1.8, 1.4, 1.1, 2.4, 1, 1.2, 0.5, 0.8, 1, 1.2, 1, 1, 1.7, 1, 0.8,
0.7, 1, 0.7, 1.4, 1, 1.2, 0.9, 1.83, 1.7, 0.9, 1, 1.6, 0.9, 1.2,
0.7, 1, 0.8, 1.1, 1.1, 0.7, 1.3, 1, 2.7, 3.8, 1.1, 0.8, 1.2,
1.7, 1, 1.1, 0.9, 0.8, 1.4, 1, 0.9, 1.1, 1.2, 0.8, 1.4, 1.6),
serum_sodium = c(130L, 136L, 129L, 137L, 116L, 132L, 137L,
131L, 138L, 133L, 131L, 140L, 137L, 137L, 138L, 136L, 140L,
127L, 140L, 121L, 137L, 136L, 140L, 135L, 134L, 144L, 138L,
136L, 134L, 132L, 134L, 132L, 128L, 138L, 140L, 134L, 134L,
145L, 137L, 142L, 134L, 136L, 139L, 134L, 142L, 135L, 130L,
138L, 133L, 140L, 138L, 139L, 146L, 134L, 132L, 132L, 138L,
138L, 136L, 136L, 139L, 131L, 139L, 145L, 137L, 127L, 136L,
140L, 142L, 135L, 140L, 139L, 132L, 137L, 134L, 139L, 140L,
140L, 131L, 140L, 136L, 137L, 132L, 133L, 141L, 140L, 137L,
140L, 139L, 144L, 136L, 133L, 137L, 135L, 142L, 141L, 134L,
136L, 137L, 140L, 141L, 137L, 144L, 140L, 143L, 138L, 137L,
138L, 133L, 142L, 132L, 135L, 136L, 137L, 126L, 139L, 136L,
138L, 140L, 134L, 135L, 136L, 140L, 145L, 134L, 135L, 124L,
137L, 136L, 145L, 138L, 131L, 137L, 145L, 137L, 137L, 137L,
130L, 136L, 138L, 134L, 140L, 132L, 141L, 139L, 141L, 136L,
137L, 134L, 136L, 135L, 139L, 134L, 137L, 136L, 140L, 136L,
136L, 134L, 139L, 134L, 139L, 137L, 142L, 139L, 135L, 133L,
134L, 138L, 133L, 136L, 140L, 145L, 139L, 137L, 138L, 135L,
140L, 145L, 140L, 136L, 136L, 136L, 134L, 137L, 136L, 134L,
144L, 136L, 140L, 134L, 135L, 130L, 142L, 135L, 145L, 137L,
138L, 134L, 113L, 137L, 136L, 138L, 136L, 137L, 136L, 141L,
136L, 141L, 130L, 136L, 140L, 138L, 130L, 134L, 134L, 141L,
137L, 134L, 140L, 134L, 145L, 139L, 134L, 138L, 125L, 132L,
140L, 130L, 134L, 127L, 134L, 137L, 137L, 137L, 137L, 148L,
132L, 136L, 137L, 139L, 136L, 136L, 141L, 134L, 137L, 138L,
135L, 142L, 130L, 139L, 143L, 133L, 132L, 139L, 142L, 139L,
139L, 135L, 135L, 138L, 133L, 129L, 140L, 141L, 140L, 134L,
140L, 140L, 132L, 130L, 134L, 133L, 140L, 137L, 137L, 142L,
140L, 136L, 136L, 137L, 136L, 128L, 138L, 141L, 135L, 140L,
132L, 142L, 144L, 141L, 139L, 140L, 136L, 143L, 139L, 138L,
140L, 136L), sex = structure(c(2L, 2L, 2L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L,
1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L,
2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 2L, 2L), levels = c("Female", "Male"), class = "factor"),
smoking = structure(c(1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L,
2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L,
2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 1L, 1L, 2L, 2L), levels = c("No", "Yes"), class = "factor"),
time = c(4L, 6L, 7L, 7L, 8L, 8L, 10L, 10L, 10L, 10L, 10L,
10L, 11L, 11L, 12L, 13L, 14L, 14L, 15L, 15L, 16L, 20L, 20L,
22L, 23L, 23L, 24L, 26L, 26L, 26L, 27L, 28L, 28L, 29L, 29L,
30L, 30L, 30L, 30L, 30L, 31L, 32L, 33L, 33L, 33L, 35L, 38L,
40L, 41L, 42L, 43L, 43L, 43L, 44L, 45L, 50L, 54L, 54L, 55L,
59L, 60L, 60L, 60L, 61L, 63L, 64L, 65L, 65L, 66L, 67L, 68L,
71L, 72L, 72L, 73L, 73L, 74L, 74L, 74L, 74L, 75L, 76L, 77L,
78L, 78L, 79L, 79L, 79L, 79L, 79L, 80L, 80L, 82L, 82L, 83L,
83L, 83L, 85L, 85L, 86L, 87L, 87L, 87L, 87L, 87L, 88L, 88L,
88L, 88L, 88L, 90L, 90L, 90L, 90L, 91L, 91L, 94L, 94L, 94L,
95L, 95L, 95L, 95L, 95L, 96L, 97L, 100L, 104L, 104L, 105L,
106L, 107L, 107L, 107L, 107L, 107L, 107L, 108L, 108L, 108L,
109L, 109L, 109L, 110L, 111L, 112L, 112L, 113L, 113L, 115L,
115L, 117L, 118L, 119L, 120L, 120L, 120L, 120L, 121L, 121L,
121L, 121L, 123L, 126L, 129L, 130L, 134L, 135L, 140L, 145L,
145L, 146L, 146L, 146L, 146L, 146L, 147L, 147L, 147L, 147L,
148L, 150L, 154L, 162L, 170L, 171L, 172L, 172L, 172L, 174L,
174L, 174L, 175L, 180L, 180L, 180L, 185L, 186L, 186L, 186L,
186L, 186L, 186L, 187L, 187L, 187L, 187L, 187L, 187L, 187L,
188L, 192L, 192L, 193L, 194L, 195L, 196L, 196L, 197L, 197L,
198L, 200L, 201L, 201L, 205L, 205L, 205L, 206L, 207L, 207L,
207L, 208L, 209L, 209L, 209L, 209L, 209L, 210L, 210L, 211L,
212L, 212L, 212L, 213L, 213L, 213L, 214L, 214L, 214L, 214L,
214L, 215L, 215L, 215L, 215L, 216L, 220L, 230L, 230L, 231L,
233L, 233L, 235L, 237L, 237L, 240L, 241L, 244L, 244L, 244L,
244L, 244L, 245L, 245L, 245L, 245L, 245L, 246L, 246L, 246L,
247L, 250L, 250L, 250L, 250L, 250L, 250L, 250L, 256L, 256L,
257L, 258L, 258L, 270L, 270L, 271L, 278L, 280L, 285L), DEATH_EVENT = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,
1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0,
0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), hypertension = structure(c(2L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L,
1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L,
2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L), levels = c("Absent",
"Present"), class = "factor")), row.names = c(NA, -299L), class = "data.frame")
CodePudding user response:
So to correct your code you need to use a !
operator to the whole condition, here:
HF2 %>%
filter(!(serum_creatinine >= quantile(serum_creatinine,probs=.75)
((quantile(serum_creatinine,probs=.75)-quantile(serum_creatinine,probs=.25))*1.5) |
creatinine_phosphokinase >= quantile(creatinine_phosphokinase,probs=.75)
((quantile(creatinine_phosphokinase,probs=.75)-quantile(creatinine_phosphokinase,probs=.25))*1.5))) %>%
nrow()
[1] 243
Now, if you want to write code that is more clean, I suggest creating a function like:
is_outlier <- function(x){
condition <- quantile(x, 0.75, na.rm = TRUE) 1.5 * IQR(x,na.rm = TRUE)
output <- ifelse(x >= condition, TRUE, FALSE)
return(output)
}
And then
HF2 %>%
filter(!(is_outlier(serum_creatinine) | is_outlier(creatinine_phosphokinase))) %>%
nrow()
[1] 243