Home > Mobile >  How to separate a stacked bar in geom_col and reorder bins?
How to separate a stacked bar in geom_col and reorder bins?

Time:10-23

I'm trying to create a bar chart showing employment by firm size for two different years where I want the years to have separate bars. Right now I used a fill argument just to show the year distinction. The bins are also not in the correct order and I'm having trouble trying to fix that. I've attached my code, the a photo of how the data is arranged and the current output.

firm_size %>% 
  ggplot(aes(`firm size`, total, fill = year))  
  geom_col()  
  theme_minimal()  
  labs(title = "Firm Size Distribution",
       subtitle = "2021",
       x = "Number of Employees at Firm",
       y = "% of Total Santa Barbara Employment")

enter image description here

enter image description here

CodePudding user response:

library(stringr)
library(ggplot2)

firm_size <- data.frame(
  firm.size = c("0-4", "5-9", "10-19", "20-49", "50-99", "100-249", 
                "250-499", "500-999", "1000 ", "0-4", "5-9", "10-19",
                "20-49", "50-99", "100-249", "250-499", "500-999"), 
  total = c(14901L, 15378L, 22271L, 35254L, 23014L, 25752L,  14531L, 
            14349L, 19009L, 11468L, 13466L, 19835L, 30597L, 22441L, 
            23591L, 12342L, 6660L), 
  year = c(2021L, 2021L, 2021L, 2021L, 2021L, 2021L, 2021L, 2021L, 
           2021L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L)
)

firm_size$rank <- order(firm_size$year,
                        str_split(firm_size$firm.size, '-') |>
                                     sapply('[', 1) |>
                                     str_replace_all('\\ ', '') |>
                          as.numeric())

firm_size %>%
  ggplot(aes(x = reorder(firm.size, rank), y = total, fill = firm.size))  
  geom_col()  
  theme_minimal()  
  labs(title = "Firm Size Distribution",
       subtitle = "2021",
       x = "Number of Employees at Firm",
       y = "% of Total Santa Barbara Employment")  
  facet_wrap(~year)

firm_size

CodePudding user response:

To plot the groups side by side use position = "dodge2". To get the groups in order change the column to factor and give it the right arrangement.

Data

df <- structure(list(firm = c("0-4", "250-", "5-9", "50-", "500-", 
"10-", "100-", "700-", "1000 ", "20-", "0-4", "250-", "5-9", 
"50-", "500-", "10-", "100-", "700-", "1000 ", "20-"), total = c(12368L, 
15272L, 19289L, 11251L, 18825L, 10355L, 17699L, 13953L, 19090L, 
15402L, 10931L, 19188L, 15636L, 14001L, 19051L, 10258L, 15433L, 
10480L, 17325L, 18490L), year = c(2021, 2021, 2021, 2021, 2021, 
2021, 2021, 2021, 2021, 2021, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011)), row.names = c(NA, -20L), class = "data.frame")

Prepare

df$year <- as.character(df$year)
df$firm <- as.factor(df$firm)
levels(df$firm) <- c("0-4", "5-9", "10-", "20-", "50-", "100-",
  "250-", "500-", "700-", "1000 ")

Plot

library(ggplot2)

ggplot(df)   
  geom_col(aes(firm, total, fill=year), position = "dodge2")

ggplot barplot

CodePudding user response:

If you want to compare categories between years, perhaps you would be better with an alluvial bar plot. This is not only more attractive than a dodged bar plot, but gives a clear and intuitive representation of your data:

library(tidyverse)
library(ggalluvial)

firm_size %>%
  mutate(year = factor(year),
         firm.size = factor(firm.size, unique(firm.size))) %>%
  complete(expand(., firm.size, year), fill = list(total = 0)) %>%
  ggplot(aes(x = year, y = total,
                        stratum = firm.size, alluvium = firm.size,
                        fill = firm.size))  
  geom_flow(color = "gray85")  
  geom_stratum(color = "gray85")  
  geom_text(stat = "stratum", aes(color = firm.size,
            label = ifelse(total == 0, "", scales::comma(total))))  
  scale_fill_brewer(name = "Firm Size")  
  scale_color_manual(values = rep(c("black", "white"), times = c(6, 3)),
                     guide = "none")  
  theme_minimal(base_size = 16)  
  scale_y_continuous(labels = scales::comma)  
  theme(panel.grid.major.x = element_blank())

enter image description here

  • Related