I'm trying removing outliers from this nested dataset
df_join
# A tibble: 12 x 2
# Groups: signals [12]
signals data
<chr> <list>
1 P3FCz <tibble [75 x 5]>
2 P3Cz <tibble [75 x 5]>
3 P3Pz <tibble [75 x 5]>
4 LPPearlyFCz <tibble [75 x 5]>
5 LPPearlyCz <tibble [75 x 5]>
6 LPPearlyPz <tibble [75 x 5]>
7 LPP1FCz <tibble [75 x 5]>
8 LPP1Cz <tibble [75 x 5]>
9 LPP1Pz <tibble [75 x 5]>
10 LPP2FCz <tibble [75 x 5]>
11 LPP2Cz <tibble [75 x 5]>
12 LPP2Pz <tibble [75 x 5]>
for instance, the first element of it contains this series of variable:
df_join[[2]][[1]]
# A tibble: 75 x 5
ID GR SES COND value
<fct> <fct> <fct> <fct> <dbl>
1 01 RP V NEG-CTR -11.6
2 01 RP V NEG-NOC -11.1
3 01 RP V NEU-NOC -4.00
4 04 RP V NEG-CTR -0.314
5 04 RP V NEG-NOC 0.239
6 04 RP V NEU-NOC 5.04
7 06 RP V NEG-CTR -0.214
8 06 RP V NEG-NOC -2.96
9 06 RP V NEU-NOC -1.97
10 07 RP V NEG-CTR -2.83
the entire content of it is the following one:
> dput(head(df_join))
structure(list(signals = c("P3FCz", "P3Cz", "P3Pz", "LPPearlyFCz",
"LPPearlyCz", "LPPearlyPz"), data = list(structure(list(ID = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L,
6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 11L, 11L,
11L, 12L, 12L, 12L, 13L, 13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L,
16L, 16L, 16L, 17L, 17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L,
20L, 20L, 21L, 21L, 21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L,
24L, 25L, 25L, 25L), .Label = c("01", "04", "06", "07", "08",
"09", "10", "11", "12", "13", "15", "16", "17", "18", "19", "21",
"22", "23", "25", "27", "28", "30", "44", "46", "49"), class = "factor"),
GR = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"), SES = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"),
COND = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR", "NEG-NOC", "NEU-NOC"
), class = "factor"), value = c(-11.6312151716924, -11.1438413285935,
-3.99591470944713, -0.314155675382471, 0.238885648959708,
5.03749946898385, -0.213621915029167, -2.96032491743069,
-1.97168681693488, -2.83109425298642, 1.09291198163802, -6.692991645215,
4.23849942428043, 2.9898889629932, 3.5510699900835, 9.57481668808606,
5.4167795618285, 1.7067607715475, -6.13036076093477, -2.82955734597919,
-2.50672211111696, 0.528517585832501, 8.16418133488309, 1.88777321897925,
-7.73588468896919, -9.83058052401056, -6.97442700196932,
1.27327945355082, 2.11962397764132, 0.524299677616254, -1.83310726842883,
0.658810483381172, -0.261373488428192, 4.37524298634374,
0.625555654900511, 3.19617639836154, 0.0405517582137798,
-3.29357103412113, -0.381435057304614, -5.73445509910268,
-6.1129152355645, -2.45744234877604, 2.95352732001065, 0.527721249096473,
1.91803490989119, -3.46703346467546, -2.40438419043702, -5.35374408162217,
-7.27028665849262, -7.1532211375959, -5.39955520296854, 2.65765002364624,
0.372495441513391, 6.24433066412776, 1.85698518142405, -0.564454675803529,
-0.068523080368053, -7.04782633579147, -4.52263283590558,
-6.62134671432544, 4.56661945182626, 3.05859761335498, 2.02997952225347,
-6.10523962206958, -0.521871236969702, -3.97851995684846,
-2.61258020387919, -4.13974828699279, -3.9210032516844, -4.63162466544638,
-4.36762718685405, -6.71005969834916, -4.22719611676328,
-0.229916506217565, -5.69725200870146)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -75L)), structure(list(
ID = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L,
4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L,
9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L, 13L,
13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L,
17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L,
21L, 21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L,
25L, 25L), .Label = c("01", "04", "06", "07", "08", "09",
"10", "11", "12", "13", "15", "16", "17", "18", "19", "21",
"22", "23", "25", "27", "28", "30", "44", "46", "49"), class = "factor"),
GR = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"), SES = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"),
COND = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR", "NEG-NOC", "NEU-NOC"
), class = "factor"), value = c(-5.16524399006139, -5.53112490175437,
0.621502123415388, 2.23100741241039, 3.96990710862955, 7.75899775608441,
-1.30019374375434, -3.59899040898949, -1.92340529575071,
2.19344184533265, 5.87900720863083, -5.92378937757888, 2.44958531767688,
3.10043497883256, 1.65779442628225, 13.7118233181713, 6.86178446511352,
5.31481098188172, -4.13240668697805, 0.162182285588285, 0.142083484505352,
5.42592103255673, 14.5496375672716, 4.52018125654081, -2.40677805475299,
-5.3832670295207, -1.55736964635117, 3.48359241788107, 4.23167123533126,
2.00051785325202, 1.48755216347718, 2.37269462739372, 1.30346907198835,
3.89476490634811, 1.87516303240986, 4.36353100770575, 1.9413417416824,
-2.22114447555529, -0.015852062711641, -2.76146409940467,
-3.51627712447581, 1.01799377568815, 1.74783962328435, 1.1303870721987,
2.16398550183836, -3.31557794753334, -1.83920975041768, -6.06703163736936,
-8.1566939611461, -9.23030396302541, -4.35545141573936, 0.906302081219897,
0.45401759063429, 3.80236232314171, 4.0336657306528, 2.0185967445137,
0.835589319243251, -4.6805488231028, -1.20746167339041, -5.50475999427345,
4.96594373869991, 4.1349308440931, 3.00187233307059, -5.61465293602653,
0.544596077279702, -5.20450410570445, -0.0325220589039272,
-2.28038421035601, -2.01375702882255, -1.6547144697087, -0.619979893871085,
-4.48258340054462, -1.42281778522059, 2.62315679073783, -4.13736508533355
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-75L)), structure(list(ID = structure(c(1L, 1L, 1L, 2L, 2L, 2L,
3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L,
8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L,
13L, 13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L,
17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L, 21L,
21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L, 25L, 25L
), .Label = c("01", "04", "06", "07", "08", "09", "10", "11",
"12", "13", "15", "16", "17", "18", "19", "21", "22", "23", "25",
"27", "28", "30", "44", "46", "49"), class = "factor"), GR = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"),
SES = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"), COND = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR",
"NEG-NOC", "NEU-NOC"), class = "factor"), value = c(11.8802266972569,
12.1053426662461, 12.955441582096, 15.0981004360619, 15.4046229884164,
16.671036999147, 3.13771453335467, -0.0892565159000666, 2.15365554736525,
13.6778924406572, 14.3862738306396, 6.86762877785576, 7.47946451329025,
8.93405130318593, 8.45962311067909, 23.4166601996042, 15.1868092142896,
9.97183712753913, 6.267521071803, 10.142198458411, 10.6320358418368,
12.9998037913548, 20.7052065690674, 11.8852179570666, 15.7899796085713,
7.50729833890206, 14.3076172484818, 9.93797956768228, 10.7693238464384,
5.04681800218272, 5.16656503460515, 7.87875085817396, 2.29899409536951,
10.0135486953849, 5.48278706243332, 7.81908431468528, 8.64382513728869,
3.35777109534179, 3.47474629234488, 4.35678644331281, 3.47085321062162,
6.56231512354717, 4.93825547529124, 7.33985613752315, 6.81966900599588,
6.54487921689425, 7.25872117706077, 1.10301223694429, -0.856423579793706,
-0.887835692028378, -0.931653372049331, 5.6617683754256,
2.29939831067085, 5.1554825066748, 6.59026080217083, 3.0741733363644,
1.80359068950898, 1.63892755704177, 3.857933716935, 0.769316188513939,
10.7031907391191, 9.53278894637555, 8.01071628743378, 6.04891324234645,
11.1964453850602, 3.46633322373091, 14.4393884282958, 11.2339563353478,
7.74933708914689, 7.1182095475238, 7.39260082121406, 0.627435381320771,
9.15473202689768, 13.6559037433263, 7.14786907480758)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -75L)), structure(list(
ID = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L,
4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L,
9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L, 13L,
13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L,
17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L,
21L, 21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L,
25L, 25L), .Label = c("01", "04", "06", "07", "08", "09",
"10", "11", "12", "13", "15", "16", "17", "18", "19", "21",
"22", "23", "25", "27", "28", "30", "44", "46", "49"), class = "factor"),
GR = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"), SES = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"),
COND = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR", "NEG-NOC", "NEU-NOC"
), class = "factor"), value = c(-11.7785042972793, -9.14927207125904,
-7.58190508537766, -4.01515836011381, -6.60165385653499,
-2.02861964460179, 4.46729570509601, 2.54036572774646, 2.22923889930115,
-0.883620011106743, -2.63569087592267, -2.0629672230873,
1.14544537612393, 2.08056674659401, 0.0422658298956365, 13.2986259796748,
5.06669915366333, 3.93467692474742, 0.0229069420708053, 4.31923128857779,
0.237726051904304, 1.89972383690448, 3.2371880079134, 0.318100791495115,
-8.08292381883298, -5.73174008540523, -15.7998485301436,
1.75469999857951, 0.677370118816266, -1.8397955509895, 2.55445787016256,
-0.380810453692585, 0.62462329496673, 2.61316333850434, 2.68202480583985,
1.76690658846479, 0.148635887703097, -0.958853757041888,
-3.17305964093897, -7.82526758429289, -6.58557573679886,
-4.39207076049089, 2.36752476749952, 0.594715760553033, -0.29794568443312,
-4.5365387390683, 0.196832250811775, -2.70852853745588, 0.498995124872827,
0.165171574219401, 0.269498974991661, 0.901948386281446,
-2.45955661653299, 1.63525170542944, 0.155897732673534, 1.8491735212703,
-0.856727109535223, -1.16182571974245, 1.07658425742917,
-2.21433585407388, 4.3385479368043, 4.40588599635354, 0.127710423625772,
-6.26956613362656, -1.17658595005389, -7.25886366924741,
-0.888293709383838, -2.14177059335841, -2.42141595261389,
-2.958120275175, -5.1274001953303, -5.32347488769128, -4.41290818553442,
-1.21404719262173, -4.23649270310915)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -75L)), structure(list(
ID = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L,
4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L,
9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L, 13L,
13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L,
17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L,
21L, 21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L,
25L, 25L), .Label = c("01", "04", "06", "07", "08", "09",
"10", "11", "12", "13", "15", "16", "17", "18", "19", "21",
"22", "23", "25", "27", "28", "30", "44", "46", "49"), class = "factor"),
GR = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"), SES = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"),
COND = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR", "NEG-NOC", "NEU-NOC"
), class = "factor"), value = c(-5.96429031525769, -5.10918437158799,
-2.81732229625975, -1.43557366487622, -3.14872157912645,
0.160393685024631, 3.52155765271648, 2.10437989449921, 2.70693992810407,
5.49897156207812, 5.81171180245335, -1.37301251388987, -0.434363848460157,
2.87987510596148, -1.27152670283348, 17.2093269365993, 7.79412746755931,
8.11964589961276, 4.95253363860044, 9.50695673265293, 4.15235381401148,
6.1294488368639, 8.01447499455337, 0.783414018677801, -1.24197194087055,
-0.487178595894761, -9.79031812534203, 4.22150266269492,
4.20139847550095, 0.208005397351335, 4.19096721581768, 0.815283302847055,
1.48137456347872, 2.0809543999959, 4.35199943309111, 2.84860039832237,
3.05879540677983, 2.11976068962167, -0.269002712326028, -2.77155065610474,
-2.59002218694999, 0.17928456999128, 2.24515223348079, 1.88805943988563,
-0.0920286086411814, -2.00968595029144, 2.59427260100332,
-1.27622011197768, 0.588399071755827, -1.43982473126936,
1.96978732491278, -0.338674980283045, -1.86484698930706,
-0.0154791822607025, 2.55036185373462, 4.42520405730058,
-0.599156247027551, 1.60091251589958, 4.7367320574401, -0.192490723623988,
4.8452288234686, 5.71745745981867, 1.02554478706585, -4.5951256708181,
1.1704842909792, -7.42770276334892, 3.15655538248828, -0.639830772856786,
-0.345116641695513, -0.0391030568720636, -2.61585906518491,
-2.71685194532693, -1.7348388034111, 1.00287124847525, -2.4844653851482
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-75L)), structure(list(ID = structure(c(1L, 1L, 1L, 2L, 2L, 2L,
3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L,
8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L,
13L, 13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L,
17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L, 21L,
21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L, 25L, 25L
), .Label = c("01", "04", "06", "07", "08", "09", "10", "11",
"12", "13", "15", "16", "17", "18", "19", "21", "22", "23", "25",
"27", "28", "30", "44", "46", "49"), class = "factor"), GR = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"),
SES = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"), COND = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR",
"NEG-NOC", "NEU-NOC"), class = "factor"), value = c(8.23981597718437,
9.51261484648731, 9.42367409925817, 5.06332653216481, 5.02619159395405,
9.07903916629231, 7.56089165217984, 5.49719893790597, 4.91476855238182,
13.0320953572069, 10.8414516494484, 5.86927622259489, 3.25309970442897,
4.6847880297099, 2.71096740085175, 25.567439566524, 16.3241813617706,
13.0990192799703, 11.9200281736866, 14.6901305277101, 9.67397418905514,
10.2974302220899, 12.0768070828642, 5.9401530589224, 12.4817579327688,
12.419526465857, 1.00612108990875, 9.63063375751153, 10.5631237176538,
3.08031473770521, 3.35694102903017, 4.28046277054405, -0.133592200169464,
6.9103658689166, 7.64737651416791, 6.75669517393108, 8.5369185279747,
7.08645126073423, 4.47409706618326, 4.39617687043259, 3.27924738047746,
6.06169418872804, 5.34939694712468, 5.58288092654703, 4.85729686493463,
7.38032829587839, 11.7259526759912, 4.95764559864061, 6.24066579989613,
3.49843659402445, 4.07498375647916, 3.55732294589389, 1.33918111568512,
0.956782967443242, 2.32002496709926, 3.15289777246607, -0.832211906889126,
6.39254974438057, 7.0533787627062, 2.97245026797807, 6.23573445580928,
7.6052386193207, 2.98791225155534, 3.10850022259445, 8.12060882554471,
-0.00459651443883508, 13.5899217198075, 9.93070913311253,
8.10285456644801, 5.04464304009428, 2.02262615478956, 1.0510618938653,
5.62233873107127, 10.1193593084848, 5.87476640145049)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -75L)))), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L), groups = structure(list(
signals = c("LPPearlyCz", "LPPearlyFCz", "LPPearlyPz", "P3Cz",
"P3FCz", "P3Pz"), .rows = structure(list(5L, 4L, 6L, 2L,
1L, 3L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -6L), .drop = TRUE))
>
I've tried to check for the presence of outliers as follows:
outliers_table <- df_join %>%
unnest() %>%
dplyr::select(COND, signals, value) %>%
group_by(COND) %>% #it is the equivalent to use as grouping variable the time
identify_outliers(value)
That turns
A tibble: 30 x 5
COND signals value is.outlier is.extreme
<fct> <chr> <dbl> <lgl> <lgl>
1 NEG-CTR P3FCz -11.6 TRUE FALSE
2 NEG-CTR P3Cz 13.7 TRUE FALSE
3 NEG-CTR P3Pz 15.1 TRUE FALSE
4 NEG-CTR P3Pz 13.7 TRUE FALSE
5 NEG-CTR P3Pz 23.4 TRUE TRUE
6 NEG-CTR P3Pz 15.8 TRUE FALSE
7 NEG-CTR P3Pz 14.4 TRUE FALSE
8 NEG-CTR LPPearlyFCz -11.8 TRUE FALSE
9 NEG-CTR LPPearlyCz 17.2 TRUE FALSE
10 NEG-CTR LPPearlyPz 25.6 TRUE TRUE
If I'm interested in delete all of those values that are TRULY EXTREME, how could do I do by using some iterative function orr some if statment?? Please just consider also other alternative in case it is easier (also to keep on the command I've written by adding another %>% command row) that scripring down a for loop or some other function.
Since I'm at the very beginning I've coded the failing code I've created:
outliers_bale <- df_join %>%
unnest() %>%
dplyr::select(COND, signals, value) %>%
group_by(COND) %>% #it is the equivalent to use as grouping variable the time
identify_outliers(value) %>%
filter(is.outlier & is.extreme)
values <- outliers_table$value
df_join[!(df_join$data %in% values), ]
And I am not able to figure out whether it worked or not.
Thanks in advance
CodePudding user response:
All right. Let's do it together step by step. As I understand it, you have serious concerns that in your data (I keep it in the variable df
) there are outliers and even extreme values. First, we will extract from your data only one grouped tibble and filter for COND ==" NEG-NOC "
library(tidyverse)
library(rstatix)
library(outliers)
data = df$data[[1]] %>% filter(COND=="NEG-NOC")
Now let's consider what method of outlier identification we will use.
We can use the boxplot
function for this.
boxplot.stats(data$value)$out
#[1] 8.164181
This is fine, but it only gives us outliers in vector form. The second way is to use identify_outliers
. This gives us a tibble
but still only with those lines that have these outlier values.
data %>% identify_outliers(variable = "value")
# # A tibble: 1 x 7
# ID GR SES COND value is.outlier is.extreme
# <fct> <fct> <fct> <fct> <dbl> <lgl> <lgl>
# 1 11 RP V NEG-NOC 8.16 TRUE FALSE
Well, let's use the outlier
function from the outliers
package. This can give us a logic vector.
outlier(data$value, opposite = T)
#[1] 8.164181
outlier(data$value, opposite = T, logical = T)
# [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
#[22] FALSE FALSE FALSE FALSE
However, neither of these methods will assist you in deciding what to do with these outliers. Please read this carefully . As you can see, you have three options to choose from: Imputation, Capping, Prediction. Which one will you choose? I chose Capping. So I wrote a tiny function that identifies outliers, extreme values and additionally returns your values after Capping.
fOutCapp = function(data){
x = data$value
qnt = quantile(x, probs=c(.25, .75), na.rm = T)
caps = quantile(x, probs=c(.05, .95), na.rm = T)
H = 1.5 * IQR(x, na.rm = T)
He = 3 * IQR(x, na.rm = T)
is.outlier = (x < (qnt[1] - H)) | (x > (qnt[2] H))
x[x < (qnt[1] - H)] <- caps[1]
x[x > (qnt[2] H)] <- caps[2]
data %>% group_by(COND) %>%
mutate(
is.outlier = is.outlier,
is.extreme = (x < (qnt[1] - He)) | (x > (qnt[2] He)),
cap.value = x
)
}
Let's see if it works
data %>% fOutCapp() %>% filter(is.outlier)
# A tibble: 1 x 8
# ID GR SES COND value is.outlier is.extreme cap.value
# <fct> <fct> <fct> <fct> <dbl> <lgl> <lgl> <dbl>
# 1 11 RP V NEG-NOC 8.16 TRUE FALSE 4.95
data %>% fOutCapp()
# A tibble: 25 x 8
# ID GR SES COND value is.outlier is.extreme cap.value
# <fct> <fct> <fct> <fct> <dbl> <lgl> <lgl> <dbl>
# 1 01 RP V NEG-NOC -11.1 FALSE FALSE -11.1
# 2 04 RP V NEG-NOC 0.239 FALSE FALSE 0.239
# 3 06 RP V NEG-NOC -2.96 FALSE FALSE -2.96
# 4 07 RP V NEG-NOC 1.09 FALSE FALSE 1.09
# 5 08 RP V NEG-NOC 2.99 FALSE FALSE 2.99
# 6 09 RP V NEG-NOC 5.42 FALSE FALSE 5.42
# 7 10 RP V NEG-NOC -2.83 FALSE FALSE -2.83
# 8 11 RP V NEG-NOC 8.16 TRUE FALSE 4.95
# 9 12 RP V NEG-NOC -9.83 FALSE FALSE -9.83
# 10 13 RP V NEG-NOC 2.12 FALSE FALSE 2.12
# ... with 15 more rows
Note, however, that your data inside the variable data
is grouped after the variable COND
. So let's write one more tiny function that will do our fOutCapp
on each of the groups.
fOutCappGroup = function(data) data %>% group_by(COND) %>%
group_modify(~fOutCapp(.x))
df$data[[1]] %>% fOutCappGroup()
# # A tibble: 75 x 8
# # Groups: COND [3]
# COND ID GR SES value is.outlier is.extreme cap.value
# <fct> <fct> <fct> <fct> <dbl> <lgl> <lgl> <dbl>
# 1 NEG-CTR 01 RP V -11.6 FALSE FALSE -11.6
# 2 NEG-CTR 04 RP V -0.314 FALSE FALSE -0.314
# 3 NEG-CTR 06 RP V -0.214 FALSE FALSE -0.214
# 4 NEG-CTR 07 RP V -2.83 FALSE FALSE -2.83
# 5 NEG-CTR 08 RP V 4.24 FALSE FALSE 4.24
# 6 NEG-CTR 09 RP V 9.57 FALSE FALSE 9.57
# 7 NEG-CTR 10 RP V -6.13 FALSE FALSE -6.13
# 8 NEG-CTR 11 RP V 0.529 FALSE FALSE 0.529
# 9 NEG-CTR 12 RP V -7.74 FALSE FALSE -7.74
# 10 NEG-CTR 13 RP V 1.27 FALSE FALSE 1.27
# # ... with 65 more rows
Bingo. Everything works great. Now we only needs to do one simple mutation.
df %>% group_by(signals) %>%
mutate(data = map(data, ~fOutCappGroup(.x))) %>%
unnest(data)
output
# A tibble: 450 x 9
# Groups: signals [6]
signals COND ID GR SES value is.outlier is.extreme cap.value
<chr> <fct> <fct> <fct> <fct> <dbl> <lgl> <lgl> <dbl>
1 P3FCz NEG-CTR 01 RP V -11.6 FALSE FALSE -11.6
2 P3FCz NEG-CTR 04 RP V -0.314 FALSE FALSE -0.314
3 P3FCz NEG-CTR 06 RP V -0.214 FALSE FALSE -0.214
4 P3FCz NEG-CTR 07 RP V -2.83 FALSE FALSE -2.83
5 P3FCz NEG-CTR 08 RP V 4.24 FALSE FALSE 4.24
6 P3FCz NEG-CTR 09 RP V 9.57 FALSE FALSE 9.57
7 P3FCz NEG-CTR 10 RP V -6.13 FALSE FALSE -6.13
8 P3FCz NEG-CTR 11 RP V 0.529 FALSE FALSE 0.529
9 P3FCz NEG-CTR 12 RP V -7.74 FALSE FALSE -7.74
10 P3FCz NEG-CTR 13 RP V 1.27 FALSE FALSE 1.27
# ... with 440 more rows
This is how your sentence has been completed. Not only did we identify outliers, but we also applied capping to them. Now decide whether to use the value
variable or the cap.value
variable for further analysis. The decision is yours.
CodePudding user response:
If your function identify_outliers
returns TRUE
/FALSE
, based on whether or not a given value
is an outlier, then you can use filter(identify_outliers(value))
and move on.