Home > database >  Omit SOME data labels from ggplot bar plot
Omit SOME data labels from ggplot bar plot

Time:07-21

I have some code that downloads COVID vaccination data from the CDCs website, does some manipulation, and eventually produces a bar plot like this

enter image description here

You'll notice the zero at the base of MA's column. The number of unvaccinated people in MA has been getting smaller and smaller and today eventually hit 0. Leaving aside whether you believe there are actually NO people in MA that are unvaccinated, I can believe the number is quite small....

Is there any way that I can tell ggplot to NOT include that 0 data label?

Here's the data

finalBarPlotData <- structure(list(State = structure(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 
9L, 1L, 10L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 10L, 2L, 3L, 
4L, 5L, 6L, 7L, 8L, 9L, 1L, 10L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 
9L, 1L, 10L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 10L), .Label = c("US", 
"CA", "FL", "GA", "MA", "NM", "OH", "OK", "TN", "UT"), class = "factor"), 
    variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("Boosted", 
    "Eligible", "Ineligible", "Incomplete", "Unvax"), class = "factor"), 
    value = c(10811037.5, 5225168.9, 1825544.6, 2168196.8, 581220.4, 
    2941639.4, 761232.2, 1397638.8, 80651731.4, 624473.9, 16751023.5, 
    8810766.1, 3818756.4, 3144199.2, 864186.6, 3745298.6, 1439699.8, 
    2217344.2, 133078622.6, 1382887.1, 1257568, 470455, 236940, 
    211389, 64755, 192495, 95483, 127987, 8775787, 72985, 4080930, 
    2745163, 1151722, 1382887, 363931, 592129, 553536, 548320, 
    38221889, 258139, 6611664, 4226184, 3584460, 0, 222736, 4217538, 
    1107020, 2537884, 67511493, 867473)), row.names = c(NA, -50L
), class = "data.frame")

and the code for the plot

stackColors <- c(Boosted = "green", Eligible="blue", Ineligible="yellow2", 
                 Incomplete ="orange", Unvax = "red3")


boosterBar<- ggplot(finalBarPlotData, aes(x=State, y=value,
                                          fill=factor(variable,
                                                      levels = c("Boosted", "Eligible",
                                                                 "Ineligible", "Incomplete",
                                                                 "Unvax"))))  
  geom_bar(stat="identity", position="fill")     #percent stacked
  labs(x="", y="% of Population", fill="",
       caption="Data: CDC\nNumber in bars represent millions of residents",
       #subtitle = paste("Data as of", format(newestDate, "%A, %B %e, %Y")))  
  scale_y_continuous(labels = function(x) paste0(x*100, "%"))   # Multiply by 100 & add %  
  theme(plot.title = element_text(size = rel(1), face = "bold"),
        plot.subtitle = element_text(size = rel(0.7)),
        plot.caption = element_text(size = rel(1)),
        axis.text.y = element_text(color='black'),
        axis.title.y = element_text(color='black'),
        axis.text.x = element_text(size=rel(1.25), angle = 45,hjust = 1))  
  scale_fill_manual(values = stackColors, labels=c("Received Booster","Fully Vax, Booster Eligible","Fully Vax, Booster Ineligible", "First Dose Only", "Unvaccinated"))  
  geom_text(aes(label=round(value/1000000,digits=2)), position=position_fill(vjust=0.5), color="black")


print(boosterBar)

Thanks in advance....

CodePudding user response:

There's a lot going on in your plot; perhaps the 'easiest' way is to only plot text if it's > 1, e.g.

library(tidyverse)

df <- finalBarPlotData <- structure(list(State = structure(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 
                                                             9L, 1L, 10L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 10L, 2L, 3L, 
                                                             4L, 5L, 6L, 7L, 8L, 9L, 1L, 10L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 
                                                             9L, 1L, 10L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 10L), .Label = c("US", 
                                                                                                                               "CA", "FL", "GA", "MA", "NM", "OH", "OK", "TN", "UT"), class = "factor"), 
                                         variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
                                                                3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
                                                                4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("Boosted", 
                                                                                                                        "Eligible", "Ineligible", "Incomplete", "Unvax"), class = "factor"), 
                                         value = c(10811037.5, 5225168.9, 1825544.6, 2168196.8, 581220.4, 
                                                   2941639.4, 761232.2, 1397638.8, 80651731.4, 624473.9, 16751023.5, 
                                                   8810766.1, 3818756.4, 3144199.2, 864186.6, 3745298.6, 1439699.8, 
                                                   2217344.2, 133078622.6, 1382887.1, 1257568, 470455, 236940, 
                                                   211389, 64755, 192495, 95483, 127987, 8775787, 72985, 4080930, 
                                                   2745163, 1151722, 1382887, 363931, 592129, 553536, 548320, 
                                                   38221889, 258139, 6611664, 4226184, 3584460, 0, 222736, 4217538, 
                                                   1107020, 2537884, 67511493, 867473)), row.names = c(NA, -50L
                                                   ), class = "data.frame")

stackColors <- c(Boosted = "green", Eligible="blue", Ineligible="yellow2", 
                 Incomplete ="orange", Unvax = "red3")


boosterBar<- ggplot(finalBarPlotData, aes(x=State, y=value,
                                          fill=factor(variable,
                                                      levels = c("Boosted", "Eligible",
                                                                 "Ineligible", "Incomplete",
                                                                 "Unvax"))))  
  geom_bar(stat="identity", position="fill")     #percent stacked
  labs(x="", y="% of Population", fill="",
       caption="Data: CDC\nNumber in bars represent millions of residents")  
       #subtitle = paste("Data as of", format(newestDate, "%A, %B %e, %Y")))  
       scale_y_continuous(labels = function(x) paste0(x*100, "%"))   # Multiply by 100 & add %  
         theme(plot.title = element_text(size = rel(1), face = "bold"),
               plot.subtitle = element_text(size = rel(0.7)),
               plot.caption = element_text(size = rel(1)),
               axis.text.y = element_text(color='black'),
               axis.title.y = element_text(color='black'),
               axis.text.x = element_text(size=rel(1.25), angle = 45,hjust = 1))  
         scale_fill_manual(values = stackColors, labels=c("Received Booster","Fully Vax, Booster Eligible","Fully Vax, Booster Ineligible", "First Dose Only", "Unvaccinated"))  
         geom_text(aes(label=ifelse(value > 1, round(value/1000000,digits=2), "")), position=position_fill(vjust=0.5), color="black")
       
       
       print(boosterBar)

Created on 2022-07-21 by the reprex package (v2.0.1)

  • Related