Home > database >  How to add a new (custom) variable to a ggplot legend
How to add a new (custom) variable to a ggplot legend

Time:08-02

I've run a number of models with two estimated parameters per model with five groups and two treatments. I'm trying to graph the confidence intervals of these estimates in a large panel plot. Since, I've simulated these data sets I would like to be able to include a dashed line for the "true value" of the parameter which I set at the beginning of the exercise for reference so we can see how well the confidence interval of the model estimates includes the true value. I can do this just fine but I'd like to include another line in the legend that shows "dashed black line" = True Value.

Here's an example of the code. The first set of code works and does not include the dashed black line in the legend.

group = c("group1", "group2", "group3", "group4", "group5")
treatment = c("treatment1", "treatment2")
estimates = c("estim1", "estim2")
parameters = c("param1", "param2")
means = c(0, 0, 5, 0, -5, 0, 0, 7, -5, 10, -5, 0, 0, 0, 0, 0, -5, 0, 0, 10)
UL = c(.5, .5, 5.5, .5, -4.5, 0.5, 0.5, 7.5, -4.5, 10.5, -4.5, .5, .5, .5, .5, .5, -4.5, .5, .5, 10.5)
LL = c(-.5, -.5, 4.5, -.5, -5.5, -.5, -.5, 6.5, -4.5, 9.5, -4.5, -.5, -.5, -.5, -.5, -.5, -4.5, -.5, -.5, 9.5)
values = c(.2, -.2, 5.2, -.3, -4.7, -.1, -.2, 6.9, -5.3, 10.1, -4.4, 0.1, 0.2, 0.3, 0.1, -0.1, -4.9, -.2, -.2, 9.9)

df = data.frame(
  group = rep(rep(group, each = 2), 2),
  treatment = rep(treatment, each = 10),
  estimates = rep(estimates, 10),
  LL = LL,
  means = means,
  UL = UL,
  parameters = rep(parameters, 10),
  values = values
)

ggplot(data = df, aes(x = as.factor(estimates), y = means, color = estimates)) 
  geom_point() 
  geom_errorbar(aes(ymin = LL, ymax = UL), width=.1, position = position_dodge(0.1)) 
  geom_segment(x = rep(c(.6, 1.6), 10), xend = rep(c(1.4, 2.4), 10), 
               y =  values, yend = values, col = "black",
               linetype = 3) 
  scale_x_discrete(labels = c(expression(beta[1]), expression(beta[2]))) 
  xlab("Beta coefficient type") ylab("Confidence Interval of Estimate") 
  ggtitle("Coefficient Estimates") 
  facet_grid(row = vars(treatment), col = vars(group)) 
  scale_color_manual(name = "Symbols",
                     values = c("estim1" = "#F8766D", "estim2" = "#00BFC4"),
                     labels = c(expression(beta[1]),
                                expression(beta[2])))
  scale_shape_manual(values = c("b1" = 16,
                                "b2" = 16) 
  scale_linetype_manual(values = c("b1" = 1,
                                   "b2" = 1))

The second set of code, does not work but is my best attempt as to what maybe I should do to try to get the dashed black line in the legend.

ggplot(data = df, aes(x = as.factor(estimates), y = means, color = estimates)) 
  geom_point() 
  geom_errorbar(aes(ymin = LL, ymax = UL), width=.1, position = position_dodge(0.1)) 
  geom_segment(x = rep(c(.6, 1.6), 10), xend = rep(c(1.4, 2.4), 10), 
               y =  values, yend = values, col = "black",
               linetype = 3) 
  scale_x_discrete(labels = c(expression(beta[1]), expression(beta[2]))) 
  xlab("Beta coefficient type") ylab("Confidence Interval of Estimate") 
  ggtitle("Coefficient Estimates") 
  facet_grid(row = vars(treatment), col = vars(group)) 
  scale_color_manual(name = "Symbols",
                     values = c("estim1" = "#F8766D", "estim2" = "#00BFC4"),
                                #"" = "#00000"),
                     labels = c(expression(beta[1]),
                                expression(beta[2])))#,
                                #"True Value"))# 
  scale_shape_manual(values = c("b1" = 16,
                                "b2" = 16,
                                "" = 0)) 
  scale_linetype_manual(values = c("b1" = 1,
                                   "b2" = 1,
                                   "b3" = 3))

GGplot with new variable added to legend

I've also thought that maybe I could include try to relevel the df$estimates column to include three levels (the existing) "estim1", "estim2" and a dummy "True Value" level with no observations but I'm worried that this would just add an empty "True Value" tick to each of my 12 plots on the x-axis sublabels.

Thanks for you help.

CodePudding user response:

Map the linetype of your geom_segment to a string called "True value" inside aes, then add a scale_linetype_manual call. This will create a separate legend entry that matches the appearance of your segment and has the correct label.

ggplot(data = df, aes(x = as.factor(estimates), y = means, color = estimates))  
  geom_point()  
  geom_errorbar(aes(ymin = LL, ymax = UL), width=.1, 
                position = position_dodge(0.1))  
  geom_segment(x = rep(c(.6, 1.6), 10), xend = rep(c(1.4, 2.4), 10), 
               y =  values, yend = values, col = "black",
               aes(linetype = "True value"))  
  scale_x_discrete(labels = c(expression(beta[1]), expression(beta[2])))  
  xlab("Beta coefficient type") ylab("Confidence Interval of Estimate")  
  ggtitle("Coefficient Estimates")  
  facet_grid(row = vars(treatment), col = vars(group))  
  scale_color_manual(name = "Symbols",
                     values = c("estim1" = "#F8766D", "estim2" = "#00BFC4"),
                     labels = c(expression(beta[1]),
                                expression(beta[2])))  
  scale_linetype_manual(values = 3, name = NULL)

enter image description here

  • Related