Home > Software design >  ggplot with arrows to connect different variables
ggplot with arrows to connect different variables

Time:12-19

i have three variables in my dataset:

  • school (School)
  • actual score (actual_score)
  • expected score (expected_score)

and need to do this graph

enter image description here

So far I have

data%>%
  mutate(School=fct_reorder(School, actual_score)
  )%>%
  ggplot(aes(x=School)) 
  geom_point(aes(y=actual_score), colour="red") 
  geom_point(aes(y= expected_score), colour="blue")

But they are just points... how to connect them?

structure(list(School = structure(c(9L, 
6L, 8L, 2L, 1L), levels = c("11278", "11274", "11285", "11289", 
"11280", "01424", "11290", "11272", "01206", "11286"), class = "factor"), 
    actual_score = c(453.4875, 423.375757575758, 441.481481481482, 
    375.103846153846, 363.621428571429), expected_score = c(452.489150512886, 
    428.002515274828, 439.209772701724, 384.917346549729, 382.216349569884
    )), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -5L), .rows = structure(list(
        1:5), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L), .drop = TRUE))

CodePudding user response:

To connect your points you could use a geom_segment. And to get the different shapes map on the shape aesthetic. Also do the same for color to get a legend reflecting both shape and color. The rest is some styling plus some additional geom_text layers for the labels.

library(dplyr)
library(ggplot2)
library(forcats)

data %>%
  mutate(School = fct_reorder(School, actual_score)) %>%
  ggplot(aes(x = School))  
  geom_segment(aes(xend = School, y = actual_score, yend = expected_score),
    colour = "grey80", linewidth = 1
  )  
  geom_point(aes(y = actual_score, colour = "Actual", shape = "Actual"), size = 3)  
  geom_point(aes(y = expected_score, colour = "Expected", shape = "Expected"), size = 3)  
  geom_label(aes(
    y = actual_score, label = round(actual_score),
    vjust = ifelse(actual_score > expected_score, 0, 1)
  ), label.size = NA, label.padding = unit(10, "pt"), fill = NA)  
  geom_label(aes(
    y = expected_score, label = round(expected_score),
    vjust = ifelse(expected_score > actual_score, 0, 1)
  ), label.size = NA, label.padding = unit(10, "pt"), fill = NA)  
  scale_color_manual(values = c("red", "blue"))  
  scale_shape_manual(values = c(16, 17))  
  scale_y_continuous(breaks = seq(320, 480, 40), limits = c(320, 480))  
  labs(color = NULL, shape = NULL, x = NULL, y = "Average NAPLAN Score")  
  theme_minimal()  
  theme(
    legend.position = "bottom",
    axis.title.y = element_text(face = "bold"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank()
  )

enter image description here

DATA

data <- structure(list(
  School = structure(c(9L, 6L, 8L, 2L, 1L), levels = c(
    "11278", "11274", "11285", "11289",
    "11280", "01424", "11290", "11272", "01206", "11286"
  ), class = "factor"),
  actual_score = c(
    453.4875, 423.375757575758, 441.481481481482,
    375.103846153846, 363.621428571429
  ), expected_score = c(
    452.489150512886,
    428.002515274828, 439.209772701724, 384.917346549729, 382.216349569884
  )
), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -5L), .rows = structure(list(1:5), ptype = integer(0), class = c(
  "vctrs_list_of",
  "vctrs_vctr", "list"
)))

CodePudding user response:

Your dput result is slightly corrupt, so I slightly modified it. You can use geom_linerange to connect the points. I also included the rest of the graph as placing the labels is a bit tricky.

library(tidyverse)

data <- tibble(
  School = structure(
    c(9L, 6L, 8L, 2L, 1L), 
    levels = c("11278", "11274", "11285", "11289", "11280", "01424", "11290", "11272", "01206", "11286"), 
    class = "factor"), 
  actual_score = c(453.4875, 423.375757575758, 441.481481481482, 375.103846153846, 363.621428571429), 
  expected_score = c(452.489150512886, 428.002515274828, 439.209772701724, 384.917346549729, 382.216349569884))


data%>%
  mutate(School = fct_reorder(fct_relabel(School, ~ paste("School", LETTERS[1:(length(.))])), actual_score)) %>%
  ggplot(aes(x = School))  
  geom_linerange(aes(ymin = actual_score, ymax = expected_score))   
  geom_point(aes(y = actual_score, color = "Actual", shape = "Acutal"), size = 3)  
  geom_text(aes(y = actual_score - 5   10 * (actual_score > expected_score), label = round(actual_score)))   
  geom_point(aes(y = expected_score, color = "Expected", shape = "Expected"), size = 3)   
  geom_text(aes(y = expected_score - 5   10 * (actual_score < expected_score), label = round(expected_score)))   
  scale_color_manual(name = NULL, 
                     labels = c("Acutal", "Expected"), 
                     values = c("blue", "red"))   
  scale_shape_manual(name = NULL, 
                     labels = c("Acutal", "Expected"), 
                     values = c(16, 17))   
  labs(y = "Average NAPLAN score", x = NULL)   
  theme_minimal()   
  theme(legend.position = "bottom", 
        panel.grid.major.x = element_blank())

Created on 2022-12-19 with reprex v2.0.2

  • Related