Home > Net >  Overlapping bar plot in ggplot2
Overlapping bar plot in ggplot2

Time:03-28

I'm trying to hide a variable in a stacked bar chart in a dataset.
Below is popByAgeEthnicity dataset:

structure(list(Age.group = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 
6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 
10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L), .Label = c("12-15", 
"16-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49", 
"50-54", "55-59", "60-64", "65 "), class = "factor"), Ethnic.group = structure(c(1L, 
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("Asian", 
"European or Other", "Maori", "Pacific Peoples", "Unknown"), class = "factor"), 
    Pop = c(32356, 140870, 59582, 26122, 28226, 135046, 52901, 
    25847, 41494, 176345, 63537, 32940, 70703, 195133, 60414, 
    31390, 85640, 193590, 52224, 26999, 79430, 181746, 42045, 
    23791, 55518, 183038, 39977, 21992, 43659, 215424, 42804, 
    20844, 36416, 220480, 40053, 20228, 33508, 229498, 38121, 
    17104, 30389, 212340, 29545, 13351, 61105, 646932, 49290, 
    25989)), row.names = c(NA, -48L), groups = structure(list(
    Age.group = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 
    3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 
    7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 
    10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L), .Label = c("12-15", 
    "16-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49", 
    "50-54", "55-59", "60-64", "65 "), class = "factor"), Ethnic.group = structure(c(1L, 
    2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 
    1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 
    4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 
    3L, 4L), .Label = c("Asian", "European or Other", "Maori", 
    "Pacific Peoples", "Unknown"), class = "factor"), .rows = structure(list(
        1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 
        14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 
        25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 
        36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 
        47L, 48L), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, -48L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

Following is the code to produce this stacked bar chart:

popByAgeEthnicity <- vacc %>%
    filter(Ethnic.group != "Unknown") %>%
     group_by(Age.group, Ethnic.group) %>%
     summarise(Pop = sum(Pop), .groups = "keep")

ggplot(
    data = popByAgeEthnicity, 
    mapping = aes(x = Age.group, y = Pop, fill = Ethnic.group)
)  
    geom_bar(stat = "identity")

Here is the outcome of this code:


Now I need to write a code to get the below chart:


I've tried several methods like changing the scales but that didn't help me.
Any help will be appreciated.

CodePudding user response:

First convert Ethnic.group to a factor, then filter the data frame to only contain the entries where Ethnic.group != "Asian". This will mean that Ethnic.group "remembers" that there is a category called "Asian", even though it contains no actual "Asian" values.

To use this "memory" in your plot, add scale_fill_discrete(drop = FALSE) to include all factor levels even when some of the levels are not represented in the data.

These two steps will keep "Asian" in the legend as a possible value, but without any actual entries on the plot, and maintain the colours that were present in your original plot.

popByAgeEthnicity %>%
  mutate(Ethnic.group = factor(Ethnic.group)) %>%
  filter(Ethnic.group != "Asian") %>%
  ggplot(aes(Age.group, Pop, fill = Ethnic.group))  
  geom_col()  
  scale_fill_discrete(drop = FALSE)

enter image description here

As a side note, writing geom_bar(stat = "identity") is just a long way of writing geom_col()

CodePudding user response:

Thank you Allan for you answer. This code produces the desired outcome :

ggplot(
    data = popByAgeEthnicity, 
    aes(x = Age.group )
)  
    geom_bar(aes(y = Pop, fill = Ethnic.group), 
             stat = "identity", position = "identity") 

enter image description here

  • Related