Home > Enterprise >  How to find the nearest point of the reference points in ggplot scatterplot
How to find the nearest point of the reference points in ggplot scatterplot

Time:03-11

I have a dataframe called X. I can plot this with the code:

ggplot(X, aes(x=PC2, y=PC4, color=ADGC_COHORT))   geom_point()   xlab("PC2")   ylab("PC4")   ggtitle("ADGC_Selected_Hispanic")  
  scale_color_manual(values = c(Reported_NHW = 'black', Reported_Hispanic = 'red', Reported_Asian = 'violet', Reported_African = 'blue'))

I need to find the nearest black dots for each red dot. How do I do it? Can I use dist function?

X <- structure(list(FID = c("0", "0", "0", "0", "0", "0", "0", "0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
"0"), IID = c("10AD23195", "10AD31971", "10AD32127", "10AD32289", 
"10AD32644", "10AD33438", "11AD34642", "11AD35811", "10267", 
"11003004", "22200552", "22201305", "22201431", "41948357", "43556969", 
"51000837", "51002675", "51002686", "61000195", "70671651", "96760004", 
"259506102", "273301207", "A0000336", "ACT150285", "B0000041", 
"NACC001959", "NACC020841", "NACC022771", "NACC024092", "NACC024498", 
"NACC030255", "NACC038815", "NACC053944", "NACC075374", "NACC079985", 
"NACC084450", "NACC087090", "NACC087133", "NACC154574", "NACC165306", 
"NACC167216", "NACC167723", "NACC175958", "NACC181584", "NACC219993", 
"NACC226012", "NACC226739", "NACC229219", "NACC229318", "NACC272965", 
"NACC275260", "NACC275998", "NACC280668", "NACC281764", "NACC322676", 
"NACC335048", "NACC352403", "NACC361877", "NACC365120", "NACC383575", 
"NACC396238", "NACC401953", "NACC433042", "NACC433486", "NACC436835", 
"NACC450485", "NACC457252", "NACC460993", "NACC470988", "NACC475092", 
"NACC481835", "NACC486601", "NACC490238", "NACC496710", "NACC497363", 
"NACC497643", "NACC497766", "NACC504122", "NACC521787", "NACC536405", 
"NACC541311", "NACC553751", "NACC571297", "NACC592868", "NACC608826", 
"NACC612796", "NACC614149", "NACC628344", "NACC631119", "NACC639120", 
"NACC639291", "NACC646552", "NACC651614", "NACC658435", "NACC662008", 
"NACC684540", "NACC702552", "NACC731950", "NACC745118"), PC1 = c(-0.00500635, 
-0.0071419, -0.00720665, -0.00828047, -0.00249927, -0.0113208, 
-0.0025202, -0.0039718, 0.00105025, -0.000230377, -0.00394814, 
-0.00999936, -0.00308066, -0.00243508, -0.000537416, 0.000840247, 
-0.000313625, 5.40352e-05, -0.00144882, 0.000162621, -0.00128288, 
0.00109402, -5.58069e-05, 9.20493e-05, 0.00163322, 0.00192847, 
-7.99048e-05, -0.000924237, -0.000311757, 0.000756335, -0.00175941, 
0.00075175, -0.000787873, -0.00108393, -0.000577144, 0.000100956, 
0.000876609, 0.00136654, 0.00131008, 0.000749385, 0.000769559, 
0.000140682, -0.00060905, -0.00248822, -0.000845463, -0.00290189, 
-0.000707708, -0.00248295, -0.00130689, 0.00184288, -0.000660284, 
-0.00196331, 0.000810278, -0.00124366, -0.00200878, -0.000200944, 
-0.000378608, -0.0057622, -0.00238826, -0.00170189, -0.000959599, 
-0.00127934, 0.000382537, 0.000957577, 0.00050995, -0.000573417, 
-0.000780025, 0.000133208, -0.00101257, 0.000219465, 0.000479164, 
-0.00154787, -0.000448425, -0.00152911, -0.000188187, -0.00311675, 
0.000150829, 0.00100682, 0.00141525, -0.000741198, -0.0023244, 
0.000596799, 0.000296812, 0.000891851, -0.000172286, 0.000381284, 
0.00109817, 0.00136706, 0.00144102, -0.000605082, -0.000292632, 
0.000782823, 0.000515744, -0.00106719, -0.00101844, -3.82677e-05, 
-0.00185289, -0.00265205, -0.00299918, -0.00158714), PC2 = c(0.000237796, 
-0.00303068, -0.00213157, -0.00289113, -0.000117426, -0.00251395, 
0.00714161, -0.000175241, 1.97294e-05, 0.00273247, -0.00184243, 
-0.00298054, -0.00098264, 0.00703866, 0.00296748, 0.00171164, 
0.00241205, 0.00207537, 0.00478611, 0.00334661, 0.00463375, 0.00244878, 
0.00187759, 0.00126081, -8.87074e-05, 0.00139764, 0.000500133, 
0.00569144, 0.000470599, 0.000307262, 0.00425025, 0.000905096, 
0.0052914, 0.00531263, 0.000656693, 0.00462525, 0.00144841, -0.000385966, 
0.00174328, 0.0025986, 0.00271254, -3.76444e-06, 0.00257943, 
0.0074846, 0.00405286, 0.00627885, 0.00388089, 0.00602721, 0.00434971, 
0.00173626, 0.00105399, 0.00486757, 2.92981e-05, 0.0016996, 0.00677392, 
0.00278593, 0.00223796, -0.00187797, 0.00826741, 0.00822235, 
0.00444225, 0.00447007, 0.00057967, -0.000235233, 0.00182423, 
0.00286262, 0.00450772, 0.00139543, 0.00442209, 0.00293874, 0.00115984, 
0.00449674, 0.00400239, 0.00602032, 0.00395752, 0.00947724, 0.00147931, 
0.000499464, 0.000418398, 0.00229546, 0.00599384, 0.00318457, 
0.00104123, 0.00155526, 0.00413344, 0.00308039, 0.00050665, 0.00207024, 
0.000398551, 0.00102246, 0.00242452, 0.00231291, -8.04707e-06, 
0.00376894, 0.00332509, 0.00139932, 0.00352587, 0.00688049, 0.00469337, 
0.00351454), PC3 = c(0.00225999, 0.000944204, 0.00173629, 0.000806402, 
0.000935327, 0.000972322, 0.00588377, 0.000240548, 0.00106101, 
0.00058504, 0.00229321, 0.00128701, 0.000380554, 0.00541664, 
-0.000236188, 0.000180451, 0.000416481, -0.000106913, 0.000614304, 
-0.000725011, 0.00292305, -0.000966784, -0.000908365, 0.0046179, 
-0.000539883, 0.000358605, -0.00080447, -0.00034989, 0.000361821, 
-0.00101722, 0.00392033, -0.00233073, 0.00241678, 0.0050311, 
-0.00112005, 0.000171456, 0.0018776, -0.00323287, -0.00204191, 
-0.000982061, 0.00224032, -0.000510143, 0.00121356, 0.00381627, 
0.000673827, 0.00302078, -0.000783529, 0.0028697, 0.00145124, 
0.00034878, 0.00179317, 0.00347337, -0.0017134, 0.000156688, 
0.00250998, 0.000273484, -0.00280404, -0.000786118, 0.00669574, 
0.00439216, -0.000631726, 0.00252103, -0.00160461, -0.000876934, 
-0.000417027, 0.00208479, 0.00120976, -0.00226125, 0.00210568, 
0.00329855, 0.00220495, 0.00444699, 0.00100191, 0.00394448, 0.00271569, 
0.00774704, 0.000301963, 0.00098307, 0.0019597, 0.000310705, 
0.00213371, 0.000988491, -0.00180521, -0.00143613, 0.000829638, 
-0.00051507, 0.000607852, -0.00206475, 0.00231497, -0.000739553, 
-4.20342e-05, 0.0020624, -0.000715921, 0.000610361, 0.000213057, 
-0.00104639, 0.000355547, 0.00362908, 0.00290723, -0.000352263
), PC4 = c(-0.00706244, -0.00614951, -0.00608959, -0.00517222, 
-0.0080459, -0.00537751, -0.0346335, -0.0079784, -0.00868999, 
-0.0125225, -0.00610861, -0.0069192, -0.00619574, -0.0327712, 
-0.0144122, -0.0114191, -0.0125767, -0.0116823, -0.024806, -0.0188518, 
-0.0254271, -0.0118229, -0.00955652, -0.00918322, -0.00546761, 
-0.011626, -0.00526092, -0.0248463, -0.005643, -0.00776498, -0.022394, 
-0.0099742, -0.0233134, -0.0183221, -0.00721167, -0.018248, -0.0094726, 
-0.00643869, -0.00921738, -0.0152759, -0.00752335, -0.00788289, 
-0.0183863, -0.0294224, -0.022569, -0.0290879, -0.0172247, -0.030695, 
-0.0246169, -0.00616516, -0.00958478, -0.0272321, -0.00530374, 
-0.0118055, -0.0325487, -0.0171497, -0.011303, -0.0054941, -0.0319148, 
-0.0295316, -0.0185755, -0.0205746, -0.00984303, -0.00863454, 
-0.00977416, -0.017416, -0.0242596, -0.0056123, -0.022826, -0.0183456, 
-0.0100485, -0.0271615, -0.0150789, -0.0303827, -0.0198175, -0.0416398, 
-0.0101774, -0.00588486, -0.00579681, -0.0163451, -0.0265722, 
-0.0204538, -0.010178, -0.0094466, -0.0217378, -0.0181495, -0.00562416, 
-0.0135586, -0.00544501, -0.0069821, -0.0173774, -0.0114546, 
-0.00596798, -0.0189899, -0.0150524, -0.00660429, -0.0169662, 
-0.0297313, -0.0225819, -0.0174871), PC5 = c(-0.00405824, 0.00166995, 
0.00207271, 0.000659781, 0.0014358, 0.000465039, 0.00059709, 
0.00262627, -0.00380052, 0.0029557, 0.00223558, 0.00205493, 0.00276749, 
0.000733308, 0.00215299, 0.00273405, 0.00219488, 0.00423049, 
-0.00413065, -0.00565354, 0.000854486, -0.00174069, -0.00360244, 
-0.00168002, 0.00291469, -0.000977519, 0.00111854, 0.00172896, 
-0.00339644, 0.00243256, 0.00125672, -0.00464645, 0.00178787, 
0.00169768, 0.00285395, -0.00465064, -0.000902313, 0.00256212, 
0.00274758, 0.000625498, -0.000734492, 0.00295225, 0.00144833, 
-0.00532999, 0.00219553, 0.000831507, 0.00301484, -0.00604156, 
0.00220097, 0.00230262, 0.000214473, 0.000462045, 0.00304645, 
0.00196043, 0.000407369, -0.00552926, 0.00182272, 0.000349243, 
-0.00724846, 0.000623578, 0.00301894, 0.00105975, 0.00255941, 
0.00252931, 0.00445429, -0.00534611, 0.00143791, 0.00143812, 
0.00174097, 0.00123442, -0.00515319, 0.00179283, -0.00340608, 
0.00114487, -0.00197399, 0.000194605, 0.00280975, -0.000934036, 
0.00228153, -0.00471003, 0.00115577, -0.00475763, 0.00268732, 
0.0032134, 0.000601558, -0.00561957, 0.00136914, -0.00156705, 
0.00251303, -0.00546758, -0.0121511, -0.00142926, 0.00172791, 
0.00218994, 0.00237938, 0.00316171, -0.00370905, 0.00153046, 
0.000337708, 0.00271551), PC6 = c(-0.00284765, -0.00283895, 0.00527764, 
-0.0026476, 0.00375259, 0.00499592, -4.77904e-05, -0.00222164, 
-0.00188066, -0.00246881, -0.00519103, 0.000944638, -7.51577e-05, 
0.00166054, -0.00113528, -0.00336485, -0.00493939, -0.00443414, 
0.00246634, 0.000692459, 0.0048257, -0.000416408, 0.00303809, 
-0.000306585, 0.000545971, -0.000673914, 0.00024636, 0.00220557, 
0.000560706, 0.0013399, -0.00753716, -0.000332443, 0.00120966, 
-0.00241685, -0.00226413, -0.00121462, -0.00253501, 0.00134274, 
0.00426563, 0.00337122, 0.00104513, -0.00154901, 0.00134593, 
-0.00355423, -0.00288068, 0.000103676, -0.00461957, 0.000799467, 
0.000343996, -4.21211e-05, -0.00309365, 0.00111251, 0.00270762, 
0.00153617, -0.000373304, -0.00117106, -0.000887805, 0.000624426, 
0.00217074, -0.000613308, -0.00175413, -0.00300287, -0.00093576, 
-0.00339223, 0.00235271, -0.00154638, -0.000361306, -0.000166313, 
-0.00132198, 0.00521665, -0.000862368, -0.00247073, 0.000610055, 
-0.00905174, 0.00166697, -0.000189474, 0.000834589, -0.0036722, 
0.00443142, 0.00241834, 0.00220121, 0.000853215, -0.00909188, 
-0.00265009, 9.33136e-05, 0.000810262, 0.00290696, -0.00105542, 
0.00373403, -0.00152446, 0.00100027, -0.0010031, -3.29579e-05, 
0.0021066, -0.00469546, -0.00377332, -0.00582785, 0.00152219, 
-0.000646666, -0.00338471), PC7 = c(-0.000966784, 0.000858472, 
-0.000731002, 0.000208545, 0.00152989, 0.00198066, -0.000535221, 
-0.00122511, 0.00196948, -0.00581567, -0.000408847, -0.00194034, 
-0.00182141, 0.000903603, 0.00396622, -0.000453125, 7.8716e-05, 
0.000296362, -0.000310084, -0.00683725, 0.00556656, -0.00576127, 
0.00098513, -0.0059674, -0.00457095, 0.000133349, 3.1212e-05, 
-3.35463e-05, 0.00679432, 0.00618758, 0.00584437, 0.000245108, 
0.00465516, -0.000800927, 0.000407999, -0.00019604, 0.00574862, 
-0.00347399, -0.00399713, -0.00408003, -0.00563817, 0.00614369, 
-0.000498816, 0.00400406, 0.00344057, -0.00726582, 0.00618692, 
0.00523823, 0.0059971, 0.00645581, -0.00315803, 0.00230628, 0.000556851, 
-0.000454469, -0.000895915, 0.00375606, -0.00407484, 0.00304841, 
-0.00149239, -0.00201436, -0.00658857, -0.00651249, 0.00464553, 
-0.00606149, -0.00407884, 0.00363839, -0.00751041, -0.00181031, 
0.0030093, 0.00481183, -0.00340948, -0.00188252, -0.00607603, 
0.00391179, 0.0048066, -0.00410118, -0.000240318, -0.000578176, 
0.0011836, -0.00217002, -0.000885254, -0.00119436, 4.86889e-05, 
0.00185881, -0.00277575, 0.000804656, -0.00618109, 0.00572166, 
0.00572875, 0.00693037, 0.000416884, 0.0017671, 0.00550398, 0.000410082, 
0.000904955, -0.00502355, 0.00040453, 0.00357145, -0.000443468, 
-0.007186), PC8 = c(0.00207626, -0.000777909, 0.00123836, 0.00240049, 
0.00309608, 0.00142598, -0.000846854, -0.00273043, 0.00245558, 
7.87495e-05, 0.00108764, 0.00363284, 0.00372123, 0.00375252, 
0.00373071, 0.00357798, -0.00201885, -0.00242323, 0.0029049, 
0.00136961, 0.00056879, -0.00535726, 0.00598565, 0.00204834, 
0.0033111, 0.00416105, -0.00324434, 0.00432905, 0.00497544, -3.66053e-05, 
-0.000834625, 0.00531072, 0.00253484, 0.0020734, 0.000272664, 
0.00485057, -0.00339794, 5.68565e-05, 0.00348046, -0.00350781, 
0.00364018, 0.00502046, -0.0018104, 0.00227955, -0.00624284, 
0.00332553, 0.00263908, 0.000825162, 0.000234857, -0.000366634, 
-0.00110272, -0.00237979, 0.00361058, 0.00242855, 0.00359062, 
0.00400025, -0.0026744, -0.000584628, 0.0045323, 0.000209633, 
0.00112061, 0.00241239, 0.00461344, -0.00326694, 0.00834395, 
0.00226968, 0.00198815, -0.0009646, -0.00153965, -0.00636332, 
-0.00112789, -0.00365031, 0.00412531, -0.00572533, -0.000483452, 
-0.00339615, 0.00247309, -0.00267566, 0.00225668, 0.00310274, 
0.00150271, -0.00233343, 0.00397858, 0.00178316, 0.000200166, 
0.0027995, -0.00364627, 0.00201606, 0.00572085, 0.00127903, 0.00550745, 
0.00378463, -0.00378493, 0.0058174, -0.00063385, 0.00470976, 
0.00414126, 0.00669577, 0.00361024, 3.30171e-05), PC9 = c(-0.00413579, 
0.00558255, 0.00272567, 0.00345609, 0.00149536, 0.00178221, -0.00301513, 
0.00320935, -0.00120161, -0.00273661, 0.00412995, -0.00144162, 
-0.00276852, -0.00121214, -0.0031418, -0.00293077, 0.00163583, 
-8.58703e-05, -0.00253966, 0.00348989, -0.000887324, -0.00268176, 
0.00128175, 0.000377872, 0.00626353, 0.0054778, -0.00192926, 
0.000403404, 0.00142964, -0.00360545, 0.00763599, 0.00161319, 
-1.52309e-05, -0.00331918, -0.0060337, -0.0045956, -0.00161098, 
-0.000397117, 0.000443053, 0.0015468, 0.00573104, -0.00445638, 
0.00114611, -0.00230803, -0.00556554, -0.00222919, 0.000618817, 
-0.00128647, -0.00512245, 0.00145164, 0.000935877, -0.00273432, 
-0.0008535, -0.00201693, -0.000715877, 0.000102212, 0.00293262, 
0.000558846, -0.00290544, -0.00114504, -0.00518739, -0.00646145, 
0.00470529, 0.00112699, 0.00602447, -0.00506389, -0.0055939, 
-0.00446697, 0.00121267, -0.0024236, -0.00357483, -0.00114964, 
-0.00333506, -0.00386698, 0.00150182, 0.000725408, 0.0019953, 
0.00154262, -0.00193988, 0.00124157, -0.00500225, -0.0041103, 
-0.000204015, -0.00624489, 0.00448099, -0.00378036, -0.00430964, 
0.00166735, 0.00647164, -0.000921242, 0.00399003, -1.86807e-05, 
0.00791428, -0.000326779, -0.00595342, -0.00332109, 4.11595e-05, 
0.000529987, -0.00593942, -0.00186746), PC10 = c(0.0013762, -0.00151124, 
-0.00535836, -0.00474789, 0.00165198, 0.000771432, 0.00172573, 
0.00339571, -0.00014711, 0.00772729, 0.00240275, -0.00106593, 
-0.00259736, -0.00144892, 0.00303319, 0.00939072, 0.00466454, 
0.00815695, 0.00255319, 0.00200561, -0.0052339, -0.00281621, 
0.00568939, -0.00841715, 0.00571457, -0.00108134, 0.00840258, 
0.00262997, 0.00580448, 0.00689939, -0.00237231, 0.00308316, 
-0.00377734, -0.00386487, 0.00822291, 0.00135683, 0.00420492, 
0.00687831, 0.0029626, 0.000104389, -0.00224762, 0.00250943, 
0.00275259, -0.0031458, -0.00588663, -0.00371983, 0.00166524, 
-0.00920249, -0.00299459, 0.00551048, 0.00231961, -0.00394275, 
0.00186957, 0.00864174, -0.00197336, -0.002543, 0.00702364, 0.00286504, 
-0.00332131, -0.00402947, 0.00373151, -0.0017673, 0.00858158, 
0.00194678, 0.00472241, 0.00153001, -0.00084134, 0.0038617, -0.00584107, 
0.00175432, 0.000150018, -0.000355714, -0.00235193, -0.00466048, 
-0.00278861, -0.0058688, 0.00570999, 0.00742921, 0.00301807, 
0.000718667, -0.00197903, -0.000859995, -0.00288219, 0.00234434, 
0.00351312, 0.00291462, 0.00346978, 0.00109873, -0.000341392, 
0.00488295, 0.00703475, 0.00328033, -7.2989e-05, 0.00289667, 
-0.00298768, 0.00421138, 0.00181077, -0.00253433, -0.00583558, 
0.0016199), COHORT = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("JPT", 
"YRI", "CEU", "ADGC"), class = "factor"), KEY = c("0:10AD23195", 
"0:10AD31971", "0:10AD32127", "0:10AD32289", "0:10AD32644", "0:10AD33438", 
"0:11AD34642", "0:11AD35811", "0:10267", "0:11003004", "0:22200552", 
"0:22201305", "0:22201431", "0:41948357", "0:43556969", "0:51000837", 
"0:51002675", "0:51002686", "0:61000195", "0:70671651", "0:96760004", 
"0:259506102", "0:273301207", "0:A0000336", "0:ACT150285", "0:B0000041", 
"0:NACC001959", "0:NACC020841", "0:NACC022771", "0:NACC024092", 
"0:NACC024498", "0:NACC030255", "0:NACC038815", "0:NACC053944", 
"0:NACC075374", "0:NACC079985", "0:NACC084450", "0:NACC087090", 
"0:NACC087133", "0:NACC154574", "0:NACC165306", "0:NACC167216", 
"0:NACC167723", "0:NACC175958", "0:NACC181584", "0:NACC219993", 
"0:NACC226012", "0:NACC226739", "0:NACC229219", "0:NACC229318", 
"0:NACC272965", "0:NACC275260", "0:NACC275998", "0:NACC280668", 
"0:NACC281764", "0:NACC322676", "0:NACC335048", "0:NACC352403", 
"0:NACC361877", "0:NACC365120", "0:NACC383575", "0:NACC396238", 
"0:NACC401953", "0:NACC433042", "0:NACC433486", "0:NACC436835", 
"0:NACC450485", "0:NACC457252", "0:NACC460993", "0:NACC470988", 
"0:NACC475092", "0:NACC481835", "0:NACC486601", "0:NACC490238", 
"0:NACC496710", "0:NACC497363", "0:NACC497643", "0:NACC497766", 
"0:NACC504122", "0:NACC521787", "0:NACC536405", "0:NACC541311", 
"0:NACC553751", "0:NACC571297", "0:NACC592868", "0:NACC608826", 
"0:NACC612796", "0:NACC614149", "0:NACC628344", "0:NACC631119", 
"0:NACC639120", "0:NACC639291", "0:NACC646552", "0:NACC651614", 
"0:NACC658435", "0:NACC662008", "0:NACC684540", "0:NACC702552", 
"0:NACC731950", "0:NACC745118"), ADGC_COHORT = c("Reported_AA", 
"Reported_AA", "Reported_AA", "Reported_AA", "Reported_AA", "Reported_AA", 
"Reported_AA", "Reported_AA", "Reported_NHW", "Reported_NHW", 
"Reported_NHW", "Reported_NHW", "Reported_NHW", "Reported_NHW", 
"Reported_NHW", "Reported_NHW", "Reported_NHW", "Reported_NHW", 
"Reported_NHW", "Reported_NHW", "Reported_NHW", "Reported_NHW", 
"Reported_NHW", "Reported_NHW", "Reported_NHW", "Reported_NHW", 
"Reported_NHW", "Reported_NHW", "Reported_NHW", "Reported_NHW", 
"Reported_NHW", "Reported_NHW", "Reported_Hispanic", "Reported_NHW", 
"Reported_NHW", "Reported_NHW", "Reported_NHW", "Reported_NHW", 
"Reported_NHW", "Reported_NHW", "Reported_NHW", "Reported_NHW", 
"Reported_NHW", "Reported_NHW", "Reported_NHW", "Reported_Hispanic", 
"Reported_NHW", "Reported_Hispanic", "Reported_NHW", "Reported_NHW", 
"Reported_NHW", "Reported_NHW", "Reported_NHW", "Reported_NHW", 
"Reported_NHW", "Reported_NHW", "Reported_NHW", "Reported_Hispanic", 
"Reported_NHW", "Reported_NHW", "Reported_NHW", "Reported_Hispanic", 
"Reported_NHW", "Reported_NHW", "Reported_NHW", "Reported_NHW", 
"Reported_NHW", "Reported_NHW", "Reported_NHW", "Reported_Hispanic", 
"Reported_NHW", "Reported_NHW", "Reported_NHW", "Reported_NHW", 
"Reported_NHW", "Reported_NHW", "Reported_NHW", "Reported_NHW", 
"Reported_NHW", "Reported_Hispanic", "Reported_NHW", "Reported_NHW", 
"Reported_NHW", "Reported_NHW", "Reported_NHW", "Reported_NHW", 
"Reported_NHW", "Reported_NHW", "Reported_NHW", "Reported_NHW", 
"Reported_NHW", "Reported_NHW", "Reported_NHW", "Reported_NHW", 
"Reported_NHW", "Reported_NHW", "Reported_NHW", "Reported_Hispanic", 
"Reported_NHW", "Reported_NHW")), row.names = c(925L, 1432L, 
1570L, 1700L, 1921L, 2382L, 2686L, 3339L, 3608L, 4469L, 4685L, 
4687L, 4688L, 5160L, 5167L, 5233L, 5392L, 5399L, 5563L, 5613L, 
5729L, 8489L, 8505L, 9227L, 11464L, 11663L, 11693L, 11942L, 11961L, 
11983L, 11989L, 12057L, 12157L, 12365L, 12640L, 12692L, 12741L, 
12763L, 12764L, 13564L, 13686L, 13708L, 13715L, 13821L, 13903L, 
14391L, 14464L, 14471L, 14496L, 14498L, 15018L, 15046L, 15053L, 
15119L, 15131L, 15616L, 15783L, 15986L, 16114L, 16153L, 16382L, 
16518L, 16583L, 16959L, 16965L, 17004L, 17181L, 17259L, 17302L, 
17436L, 17482L, 17568L, 17640L, 17688L, 17785L, 17797L, 17802L, 
17803L, 17884L, 18129L, 18293L, 18352L, 18491L, 18710L, 18967L, 
19184L, 19238L, 19251L, 19445L, 19485L, 19585L, 19587L, 19673L, 
19725L, 19802L, 19837L, 20109L, 20335L, 20693L, 20862L), class = "data.frame")

CodePudding user response:

Yes, you could use dist. For a 100 row data frame it will create a 100 x 100 matrix of distances between all the points if you do:

distances <- dist(X[c("PC2", "PC4")], diag = TRUE, upper = TRUE)
distances <- as.matrix(distances)

Since we are only interested in the distances from the red dots, we can subset the matrix rows:

distances <- distances[X$ADGC_COHORT == "Reported_Hispanic",]

But since we don't want to know the distances between red points, we remove all the "red point" columns

distances <- distances[,X$ADGC_COHORT != "Reported_Hispanic"]

If we run a which.min on each row, we find the index of the non-red points in a data frame of the non-red points, so we can subset our original data frame like this:

X2 <- X[X$ADGC_COHORT != "Reported_Hispanic",][apply(distances, 1, which.min),]

So if we take the original plot:

p <- ggplot(X, aes(x=PC2, y=PC4, color=ADGC_COHORT))  
  geom_point()   
  xlab("PC2")   
  ylab("PC4")   
  ggtitle("ADGC_Selected_Hispanic")  
  scale_color_manual(values = c(Reported_NHW = 'black', 
                                Reported_Hispanic = 'red', 
                                Reported_Asian = 'violet', 
                                Reported_African = 'blue'))

p

enter image description here

We can draw circles around the nearest points like this:

p   geom_point(data = X2, shape = 21, size = 5) 

enter image description here

It's important to realise that one or two of these points might not look like the closest because PC2 and PC4 are scaled differently on the plot. If you draw with coord_equal you will be able to confirm visually that these are indeed the closest points.

p   geom_point(data = X2, shape = 21, size = 5)   coord_equal()

enter image description here

Note there is also one case where two red points share the same closest black neighbour.

  • Related