Home > Blockchain >  How do I make ggrepel move (some) labels outside US map boundaries?
How do I make ggrepel move (some) labels outside US map boundaries?

Time:04-01

I'm trying to create my first map using ggrepel, but as you can see I've instead created a dumpster fire of overlapping labels. Most of the locations I'm mapping and labelling are clustered in the northeast, so the labels overlap. How do I get some of the labels to slide over beyond the map boundaries (in the ocean, so to speak)? Here's the code I used to create this monster:

plot_usmap(fill = "light blue", alpha = 0.5)  
  ggrepel::geom_label_repel(data = top_18_2_transformed, aes(x=x, y=y, label=INSTNM),
                            size=3,
                            label.padding = unit(.75,"mm"),
                            nudge_y = 20,
                            nudge_x = 20,
                            box.padding=0.3,
                            max.overlaps=30,
                            point.padding=NA,
                            family="Avenir Next",
                            fill="gray99",
                            alpha=1.0,
                            label.r=unit(0.2,"lines"),
                            min.segment.length = 0.1,
                            label.size=unit(.15,"mm"),
                            segment.color="black",
                            segment.size=1,seed=1000)  
  geom_point(data = top_18_2_transformed, aes(x = x, y = y, size = UGDS),
             color = "red",
             alpha = 0.75)  
  labs(title = "Select Colleges",
       size = "Undergrad Enrollment")  
  theme(legend.position = "right")

And here's a picture of my problematic map: enter image description here

Thanks in advance for any corrections you may be able to offer.

UPDATE 31 March 2022: here's the dput(top_18_2_transformed):

structure(list(lon = c(-74.659365, -122.167359, -78.937624, -75.19391, 
-71.093226, -77.073463, -118.125878, -117.709837, -71.222839, 
-79.941993, -72.926688, -76.483084, -73.961885, -71.169242, -74.025334, 
-75.380236, -70.624084, -71.118313), lat = c(40.348732, 37.429434, 
36.001135, 39.950929, 42.359243, 38.908809, 34.137349, 34.106515, 
42.385995, 40.44357, 41.311158, 42.4472, 40.808286, 42.336213, 
40.744776, 40.606822, 41.739072, 42.374471), UNITID = c(186131, 
243744, 198419, 215062, 166683, 131496, 110404, 115409, 164739, 
211440, 130794, 190415, 190150, 164924, 186867, 213543, 166692, 
166027), OPEID = c(262700, 130500, 292000, 337800, 217800, 144500, 
113100, 117100, 212400, 324200, 142600, 271100, 270700, 212800, 
263900, 328900, 218100, 215500), OPEID6 = c(2627, 1305, 2920, 
3378, 2178, 1445, 1131, 1171, 2124, 3242, 1426, 2711, 2707, 2128, 
2639, 3289, 2181, 2155), INSTNM = c("Princeton University", "Stanford University", 
"Duke University", "University of Pennsylvania", "Massachusetts Institute of Technology", 
"Georgetown University", "California Institute of Technology", 
"Harvey Mudd College", "Bentley University", "Carnegie Mellon University", 
"Yale University", "Cornell University", "Columbia University in the City of New York", 
"Boston College", "Stevens Institute of Technology", "Lehigh University", 
"Massachusetts Maritime Academy", "Harvard University"), CITY = c("Princeton", 
"Stanford", "Durham", "Philadelphia", "Cambridge", "Washington", 
"Pasadena", "Claremont", "Waltham", "Pittsburgh", "New Haven", 
"Ithaca", "New York", "Chestnut Hill", "Hoboken", "Bethlehem", 
"Buzzards Bay", "Cambridge"), STABBR = c("NJ", "CA", "NC", "PA", 
"MA", "DC", "CA", "CA", "MA", "PA", "CT", "NY", "NY", "MA", "NJ", 
"PA", "MA", "MA"), ZIP = c("08544-0070", "94305", "27708", "19104-6303", 
"02139-4307", "20057-0001", "91125", "91711", "02452-4705", "15213-3890", 
"6520", "14853", "10027", "2467", "07030-5991", "18015", "02532-1803", 
"2138"), ACCREDAGENCY = c("Middle States Commission on Higher Education", 
"Western Association of Schools and Colleges Senior Colleges and University Commission", 
"Southern Association of Colleges and Schools Commission on Colleges", 
"Middle States Commission on Higher Education", "New England Commission on Higher Education", 
"Middle States Commission on Higher Education", "Western Association of Schools and Colleges Senior Colleges and University Commission", 
"Western Association of Schools and Colleges Senior Colleges and University Commission", 
"New England Commission on Higher Education", "Middle States Commission on Higher Education", 
"New England Commission on Higher Education", "Middle States Commission on Higher Education", 
"Middle States Commission on Higher Education", "New England Commission on Higher Education", 
"Middle States Commission on Higher Education", "Middle States Commission on Higher Education", 
"New England Commission on Higher Education", "New England Commission on Higher Education"
), INSTURL = c("www.princeton.edu/", "www.stanford.edu/", "www.duke.edu/", 
"www.upenn.edu/", "web.mit.edu/", "www.georgetown.edu/", "www.caltech.edu/", 
"https://www.hmc.edu/", "www.bentley.edu/", "www.cmu.edu/", "https://www.yale.edu/", 
"www.cornell.edu/", "www.columbia.edu/", "www.bc.edu/", "www.stevens.edu/", 
"www.lehigh.edu/", "https://www.maritime.edu/", "www.harvard.edu/"
), SCH_DEG = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
3, 3), PREDDEG = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
3, 3, 3, 3), HIGHDEG = c(4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 
4, 4, 4, 4, 4, 4), REGION = c(2, 8, 5, 2, 1, 2, 8, 8, 1, 2, 1, 
2, 2, 1, 2, 2, 1, 1), CCBASIC = c(15, 15, 15, 15, 15, 15, 15, 
21, 18, 15, 15, 15, 15, 15, 16, 16, 22, 15), ADM_RATE = c(0.0578, 
0.0434, 0.076, 0.0766, 0.067, 0.1436, 0.0642, 0.1367, 0.4672, 
0.1544, 0.0608, 0.1085, 0.0545, 0.2722, 0.3996, 0.321, 0.9146, 
0.0464), ACTCM25 = c(33, 32, 33, 33, 34, 31, 35, 33, 27, 33, 
33, 32, 33, 31, 31, 29, 19, 33), ACTCM75 = c(35, 35, 35, 35, 
36, 35, 36, 35, 31, 35, 35, 35, 35, 34, 34, 33, 24, 35), SAT_AVG = c(1517, 
1503, 1522, 1511, 1547, 1473, 1557, 1526, 1327, 1513, 1517, 1487, 
1511, 1437, 1429, 1380, 1100, 1517), DISTANCEONLY = c(0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), UGDS = c(5308, 
6994, 6546, 10774, 4516, 7141, 938, 893, 4157, 6535, 6089, 14976, 
8221, 9637, 3641, 5164, 1654, 7547), CURROPER = c(1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), COSTT4_A = c(70900, 
71587, 75105, 75303, 70240, 73840, 72084, 76953, 68577, 72265, 
73900, 73879, 76907, 73053, 68734, 68383, 27858, 73485), COSTT4_P = c("NULL", 
"NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", 
"NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", 
"NULL"), TUITIONFEE_IN = c(52800, 53529, 58031, 57770, 53790, 
56058, 54600, 58660, 51830, 57119, 55500, 57222, 61788, 57910, 
54014, 55240, 10018, 51925), TUITIONFEE_OUT = c(52800, 53529, 
58031, 57770, 53790, 56058, 54600, 58660, 51830, 57119, 55500, 
57222, 61788, 57910, 54014, 55240, 25752, 51925), AVGFACSAL = c(20724, 
20865, 16863, 18277, 19624, 15798, 20595, 14397, 14592, 12296, 
19830, 15574, 19431, 15599, 15318, 13763, 8928, 20988), PFTFAC = c("0.835", 
"0.9881", "0.9364", "0.7779", "0.9885", "0.4815", "0.9289", "0.8992", 
"0.6696", "0.9161", "0.717", "0.9074", "0.4521", "0.6662", "1", 
"0.8392", "0.5867", "0.862"), C150_4 = c(0.979, 0.9432, 0.9462, 
0.96, 0.954, 0.9491, 0.9357, 0.9167, 0.8952, 0.9049, 0.972, 0.9453, 
0.9549, 0.9404, 0.8473, 0.8981, 0.7629, 0.971), RET_FT4 = c(0.9768, 
0.9876, 0.9827, 0.9808, 0.9946, 0.9679, 0.9826, 0.9744, 0.9201, 
0.9732, 0.9892, 0.9748, 0.9853, 0.9467, 0.9394, 0.9349, 0.8672, 
0.9722), RET_PT4 = c("NULL", "NULL", "NULL", "0.9245", "NULL", 
"0.6667", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "0.95", 
"NULL", "NULL", "NULL", "NULL", "NULL"), MD_EARN_WNE_P10 = c("95689", 
"97798", "93115", "103246", "111222", "96375", "112166", "108988", 
"107974", "99998", "88655", "91176", "89871", "93021", "98159", 
"95033", "91668", "84918"), PCT25_EARN_WNE_P10 = c("52729", "61965", 
"61558", "65218", "67120", "61372", "67501", "69466", "73117", 
"62003", "60311", "59566", "56005", "62006", "72669", "65644", 
"68187", "56301"), PCT75_EARN_WNE_P10 = c("167686", "172245", 
"151838", "174907", "169465", "147685", "175675", "173725", "146079", 
"159483", "146102", "147189", "141158", "147010", "127298", "134075", 
"129421", "153746"), MD_EARN_WNE_P6 = c("84713", "88873", "77260", 
"80445", "112623", "71107", "129420", "112059", "78514", "87824", 
"72046", "78779", "79434", "70858", "82237", "79832", "79354", 
"77816"), GRAD_DEBT_MDN_SUPP = c("10450", "12000", "13500", "16763", 
"13418", "16500", "PrivacySuppressed", "22089", "25000", "22014", 
"13142", "14500", "21500", "18000", "27000", "23000", "26000", 
"12665"), GRAD_DEBT_MDN10YR_SUPP = c("104.4654099", "119.9602793", 
"134.9553142", "167.5745134", "134.1355856", "164.945384", "PrivacySuppressed", 
"220.8168841", "249.9172485", "220.0671323", "131.3764992", "144.9520041", 
"214.9288337", "179.9404189", "269.9106283", "229.9238686", "259.9139384", 
"126.6080781"), C100_4 = c(0.898, 0.7288, 0.8831, 0.8571, 0.8691, 
0.9076, 0.8434, 0.8565, 0.8479, 0.7599, 0.8777, 0.8694, 0.8635, 
0.9003, 0.4566, 0.8003, 0.6322, 0.8476), ICLEVEL = c(1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), OPENADMP = c(2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), GRADS = c("2997", 
"10253", "10037", "14803", "6990", "12080", "1299", "NULL", "1086", 
"7562", "7517", "8984", "23235", "4846", "3624", "1775", "97", 
"21592"), ACCREDCODE = c("MSACHE", "WASCSR", "SACSCC", "MSACHE", 
"NECHE", "MSACHE", "WASCSR", "WASCSR", "NECHE", "MSACHE", "NECHE", 
"MSACHE", "MSACHE", "NECHE", "MSACHE", "MSACHE", "NECHE", "NECHE"
), RET_FT4_POOLED = c(0.9788, 0.9879, 0.9793, 0.9821, 0.9909, 
0.9651, 0.9806, 0.9716, 0.9262, 0.97, 0.9892, 0.9741, 0.9825, 
0.9479, 0.9423, 0.9378, 0.8633, 0.9817), C100_4_POOLED = c(0.8856, 
0.739, 0.8788, 0.8546, 0.8602, 0.9009, 0.8242, 0.8551, 0.8326, 
0.7546, 0.8772, 0.8766, 0.8677, 0.8918, 0.4515, 0.7621, 0.5955, 
0.8573), BOOKSUPPLY = c("1050", "1245", "1434", "1358", "820", 
"1200", "1428", "800", "1300", "1000", "1050", "970", "1294", 
"1250", "1200", "1000", "1500", "1000"), ADMCON7 = c(1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1), MDCOMP_ALL = c(0.5845, 
0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 
0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 
0.5845), MDCOST_ALL = c(15387.5, 15387.5, 15387.5, 15387.5, 15387.5, 
15387.5, 15387.5, 15387.5, 15387.5, 15387.5, 15387.5, 15387.5, 
15387.5, 15387.5, 15387.5, 15387.5, 15387.5, 15387.5), MDEARN_ALL = c(37078, 
37078, 37078, 37078, 37078, 37078, 37078, 37078, 37078, 37078, 
37078, 37078, 37078, 37078, 37078, 37078, 37078, 37078), PPTUG_EF = c(0, 
0, 0.0031, 0.0537, 0.0064, 0.0214, 0, 0.0011, 0.0118, 0.017, 
2e-04, 3e-04, 0.0633, 0.0127, 0, 0.0128, 0.023, 0.0745), INEXPFTE = c(60048, 
113338, 68756, 56874, 80756, 31693, 105185, 34419, 15842, 28167, 
57231, 29893, 96463, 23266, 12504, 24995, 9687, 46272), C150_4_POOLED = c(0.9712, 
0.9435, 0.9512, 0.9574, 0.9477, 0.9452, 0.9278, 0.9179, 0.8917, 
0.8968, 0.969, 0.9452, 0.9566, 0.9297, 0.8608, 0.886, 0.7484, 
0.974), GRAD_DEBT_MDN = c("10450", "12000", "13500", "16763", 
"13418", "16500", "17747", "22089", "25000", "22014", "13142", 
"14500", "21500", "18000", "27000", "23000", "26000", "12665"
), x = c(2107384.76948701, -1933340.27810509, 1876178.25472949, 
2077243.02501463, 2314261.77712267, 1955381.08673633, -1660141.85673732, 
-1623368.30493136, 2303424.70345276, 1678023.03854027, 2211596.23078863, 
1896995.53745184, 2147624.50302849, 2309370.68277906, 2144734.86774305, 
2041573.64168227, 2373567.48443726, 2311783.20749272), y = c(-188894.792987744, 
-582296.149881856, -762721.806918975, -245389.810253038, 123275.753360416, 
-404107.357328073, -1027748.36033576, -1039201.65863312, 122405.777575308, 
-300870.762534603, -39714.5927185968, -7748.73302456512, -121333.925485063, 
118650.586978148, -129820.607837031, -179439.260821836, 71069.0976923304, 
124173.1993115)), class = "data.frame", row.names = c(NA, -18L
))

CodePudding user response:

With a little data manipulation, you could move the labels out to either side of the country an draw segments to connect the labels to the universities:

top_18_2_transformed <- top_18_2_transformed[order(-top_18_2_transformed$y),]
colleges_east <- top_18_2_transformed[top_18_2_transformed$x > 0,]
colleges_west <- top_18_2_transformed[top_18_2_transformed$x < 0,]
colleges_west$lab_x <- -2300000
colleges_west$lab_y <- seq(-1000000, -1500000, -250000)
colleges_east$lab_x <- 2800000
colleges_east$lab_y <- seq(1000000, -2500000, -250000)


plot_usmap(fill = "light blue", alpha = 0.5)  
 geom_text(data = colleges_west, 
           aes(x = lab_x, y = lab_y, label =stringr::str_wrap(INSTNM, 25)),
           hjust = 1, size = 3, lineheight = 0.8)  
  geom_text(data = colleges_east, 
            aes(x = lab_x, y = lab_y, label = stringr::str_wrap(INSTNM, 25)),
            hjust = 0, size = 3, lineheight = 0.8)  
  geom_point(data = top_18_2_transformed, aes(x = x, y = y, size = UGDS),
             color = "red",
             alpha = 0.75)  
  geom_segment(data = colleges_east, 
               aes(x, y, xend = lab_x - 100000, yend = lab_y))  
  geom_segment(data = colleges_west, 
               aes(x, y, xend = lab_x   100000, yend = lab_y))  
  labs(title = "Select Colleges",
       size = "Undergrad Enrollment")  
  theme(legend.position = c(0.35, 0),
        legend.direction = 'horizontal')  
  coord_cartesian(xlim = c( -3500000,  4000000),
                  ylim = c(-3000000,   1500000)) 

enter image description here

  • Related