Home > database >  change legend shape of only one level of a variable in ggplot2
change legend shape of only one level of a variable in ggplot2

Time:07-08

I have a dataframe as such:

dat <- data.table::data.table(
    overlaps = c(1L,2L,3L,4L,5L,6L,7L,8L,9L,10L,
                 11L,12L,1L,2L,3L,4L,5L,6L,7L,8L,9L,10L,11L,12L),
           N = c(4157L,2396L,1591L,1166L,829L,572L,
                 447L,297L,238L,184L,120L,90L,NA,NA,NA,NA,NA,NA,NA,NA,
                 NA,NA,NA,NA),
         pct = c(10.0007217263695,5.76418793754661,
                 3.82755551278659,2.80510982269589,1.99437053431809,1.37609161113383,
                 1.07537229051892,0.714509105781028,0.57256958645079,
                 0.442658839945149,0.288690547790314,0.216517910842736,
                 5.90055623577914,2.87095152657789,1.75885982427862,1.22275641228556,
                 0.866198262032255,0.638031434348857,0.504322128003869,
                 0.364155269931993,0.298313566049992,0.222848843908313,
                 0.195119357081085,0.110664709986287),
      cm_pct = c(29.8409796232588,19.8402578968894,
                 14.0760699593427,10.2485144465562,7.44340462386027,5.44903408954218,
                 4.07294247840835,2.99757018788943,2.2830610821084,
                 1.71049149565761,1.26783265571246,0.979142107922151,15.4040998051144,
                 9.50354356933526,6.63259204275737,4.87373221847875,
                 3.65097580619319,2.78477754416094,2.14674610981208,1.64242398180821,
                 1.27826871187622,0.979955145826224,0.757106301917911,
                 0.561986944836827),
      pct_sd = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
                 NA,0.130441927889185,0.0834272862417102,0.0665490386787199,
                 0.0546826013702531,0.0526981297641897,0.0533898011751216,
                 0.0250810797874368,0.0320688128894919,0.0310478034379932,
                 0.0191658535302748,0.0211240341308091,0.0162573956365332),
       peaks = as.factor(c("eprint_peaks",
                           "eprint_peaks","eprint_peaks","eprint_peaks",
                           "eprint_peaks","eprint_peaks","eprint_peaks","eprint_peaks",
                           "eprint_peaks","eprint_peaks","eprint_peaks",
                           "eprint_peaks","mean_random_peaks","mean_random_peaks",
                           "mean_random_peaks","mean_random_peaks",
                           "mean_random_peaks","mean_random_peaks","mean_random_peaks",
                           "mean_random_peaks","mean_random_peaks",
                           "mean_random_peaks","mean_random_peaks","mean_random_peaks"))

That looks like that:

   overlaps    N        pct     cm_pct     pct_sd             peaks
 1:        1 4157 10.0007217 29.8409796         NA      eprint_peaks
 2:        2 2396  5.7641879 19.8402579         NA      eprint_peaks
 3:        3 1591  3.8275555 14.0760700         NA      eprint_peaks
 4:        4 1166  2.8051098 10.2485144         NA      eprint_peaks
 5:        5  829  1.9943705  7.4434046         NA      eprint_peaks
 6:        6  572  1.3760916  5.4490341         NA      eprint_peaks
 7:        7  447  1.0753723  4.0729425         NA      eprint_peaks
 8:        8  297  0.7145091  2.9975702         NA      eprint_peaks
 9:        9  238  0.5725696  2.2830611         NA      eprint_peaks
10:       10  184  0.4426588  1.7104915         NA      eprint_peaks
11:       11  120  0.2886905  1.2678327         NA      eprint_peaks
12:       12   90  0.2165179  0.9791421         NA      eprint_peaks
13:        1   NA  5.9005562 15.4040998 0.13044193 mean_random_peaks
14:        2   NA  2.8709515  9.5035436 0.08342729 mean_random_peaks
15:        3   NA  1.7588598  6.6325920 0.06654904 mean_random_peaks
16:        4   NA  1.2227564  4.8737322 0.05468260 mean_random_peaks
17:        5   NA  0.8661983  3.6509758 0.05269813 mean_random_peaks
18:        6   NA  0.6380314  2.7847775 0.05338980 mean_random_peaks
19:        7   NA  0.5043221  2.1467461 0.02508108 mean_random_peaks
20:        8   NA  0.3641553  1.6424240 0.03206881 mean_random_peaks
21:        9   NA  0.2983136  1.2782687 0.03104780 mean_random_peaks
22:       10   NA  0.2228488  0.9799551 0.01916585 mean_random_peaks
23:       11   NA  0.1951194  0.7571063 0.02112403 mean_random_peaks
24:       12   NA  0.1106647  0.5619869 0.01625740 mean_random_peaks

As one may notice, the pct_sd column is only available for the level mean_random_peaks of the variable peaks. I tried plot a graph using dotplot, but in order to get errorbars I found quite difficult using geom_errorbar():

Here is my first attempt:

ggplot(mapping = aes(x=factor(overlaps),y=cm_pct),data = dat)  
  geom_dotplot(aes(fill=peaks),colour="NA",binaxis = "y", stackdir = "centerwhole",binwidth = 1.2)  
  geom_errorbar(aes(ymin=cm_pct-pct_sd,ymax=cm_pct pct_sd))  
  scale_x_discrete(name="overlaps",breaks=seq_along(1:12),labels=paste0('\u2265',seq_along(1:12)))  
  theme_classic(base_size = 13)   
  scale_fill_grey(labels=c("eprint_peaks"="eprint\npeaks","mean_random_peaks"="random\npeaks"))  
  labs(y='cumulative percentage',fill=NULL)  
  theme(legend.position = "top",
        legend.key.size = unit(1,'cm'),
        )

enter image description here

That works great but error bars are too small because the errors in the data are small, but also the circles are too big. So overall it looks strange to me this graph.

My second attempt improves things but now the legend is awkward for eprint_peaks because there is no line at all to be drawn but nonetheless the legend prints the line.

ggplot(mapping = aes(x=factor(overlaps),y=cm_pct,colour=peaks),data = dat)  
  geom_pointrange(aes(ymin=cm_pct-pct_sd-.5,ymax=cm_pct pct_sd .5))  
  scale_x_discrete(name="overlaps",breaks=seq_along(1:12),labels=paste0('\u2265',seq_along(1:12)))  
  theme_classic(base_size = 13)   
  scale_colour_grey(labels=c("eprint_peaks"="eprint\npeaks","mean_random_peaks"="random\npeaks"))  
  labs(y='cumulative percentage',fill=NULL)  
  theme(legend.position = "top",
        legend.key.size = unit(1,'cm'),
  )

I tried removing the line with this command to override the shape of the legend labels but this edits both labels not only one.

 guides(color = guide_legend(
    override.aes=list(shape = 19))) 

Is it possible to have only a dot for the legend in black and a dot line for the legend in grey ? Thanks.

enter image description here

CodePudding user response:

As you want to remove the line you have to override the linetypeaes:

library(ggplot2)

ggplot(mapping = aes(x=factor(overlaps),y=cm_pct,colour=peaks),data = dat)  
  geom_pointrange(aes(ymin=cm_pct-pct_sd-.5,ymax=cm_pct pct_sd .5))  
  scale_x_discrete(name="overlaps",breaks=seq_along(1:12),labels=paste0('\u2265',seq_along(1:12)))  
  theme_classic(base_size = 13)   
  scale_colour_grey(labels=c("eprint_peaks"="eprint\npeaks","mean_random_peaks"="random\npeaks"))  
  labs(y='cumulative percentage',fill=NULL)  
  theme(legend.position = "top",
        legend.key.size = unit(1,'cm'),
  )  
  guides(color = guide_legend(override.aes=list(linetype = c("blank", "solid")))) 

enter image description here

  • Related