Home > Software engineering >  Difference or variance bars between stacked bars in ggplot
Difference or variance bars between stacked bars in ggplot

Time:11-18

I am trying to insert difference or variance bars between stacked bar totals in ggplot. I would like the result to look something like the image in this link: Stacked bar graph with difference between bars

I would like to label these variance bars with the difference between the stacked bars on either side.

Could somebody show me how I can achieve this?

Here is my sample data:

df <- 
data.frame(
x.axis.Var = rep(c("Phase1", "Phase2", "Phase3", "Phase4"), 2),
cat.Var = rep(c("High Grade","Low Grade"), each = 4),
values = c(500, 200, 600, 300, 
           150, 500, 600, 200   
           )
)

Arranging data:

df.tmp <- df %>%
mutate(
x.axis.Var = factor(x.axis.Var,
levels = c("Phase1", "Phase2", "Phase3", "Phase4")),
cat.Var = factor(cat.Var,
levels = c("High Grade","Low Grade"))
) %>%
arrange(x.axis.Var, desc(cat.Var)) %>%
mutate(end.Bar = cumsum(values),
start.Bar = c(0, head(end.Bar, -1))) %>%
mutate(group.id = group_indices(., x.axis.Var)) %>%
group_by(x.axis.Var) %>%
mutate(total.by.x = sum(values)) %>%
select(x.axis.Var, cat.Var, group.id, start.Bar, values, end.Bar, total.by.x)

Plotting data using ggplot:

ggplot(df.tmp, aes(x = group.id, y = values, fill = cat.Var))  
geom_bar(stat = "identity",position = "stack", width = 0.3) 
scale_fill_manual(values=c('#ff0000','#c8f464'))  
scale_x_continuous(
expand=c(0,0),
limits = c(min(df.tmp$group.id)-0.5,max(df.tmp$group.id) 0.5),
breaks = c(min(df.tmp$group.id)-0.5,
unique(df.tmp$group.id),
unique(df.tmp$group.id)   0.5
),
labels =
c("",
as.character(unique(df.tmp$x.axis.Var)),
rep(c(""), length(unique(df.tmp$x.axis.Var)))
)
)  
theme(
text = element_text(size = 5, color = "#4e4d47"),
axis.text = element_text(size = 8, angle = 90, vjust = 0.5, hjust=1, color = "#4e4d47", face = 
"bold"),
axis.text.y = element_text(margin = margin(r = 0.3, unit = "cm")),
axis.ticks.x =
element_line(color =
c("black",
rep(NA, length(unique(df.tmp$x.axis.Var))),
rep("black", length(unique(df.tmp$x.axis.Var))-1)
)
),
axis.line = element_line(colour = "#4e4d47", size = 0.5),
axis.ticks.length = unit(.15, "cm"),
axis.title.x =       element_blank(),
axis.title.y =       element_blank(),
panel.background =   element_blank(),
plot.margin =        unit(c(1, 1, 1, 1), "lines"),
legend.text =        element_text(size = 10,
color = "#4e4d47",
face = "bold",
margin = margin(l = 0.25, unit = "cm")
),
legend.title =       element_blank()
)

CodePudding user response:

I've taken the liberty to strip your code of much irrelevant stuff.

You will hardly get around some manual computation before plotting. One way is to create a separate data frame with your summary values and plot them as geom_rect (because geom_col starts at 0 by definition)

You will need to convert your categorical x into integer - then you can "interpose" your summary columns. You will need to add some labels to make it look categorical.

If needed, add a geom_layer for your annotation and if you want a different fill, you could use the {ggnewscale} package.

library(tidyverse)

df1 <- data.frame(
  x.axis.Var = rep(c("Phase1", "Phase2", "Phase3", "Phase4"), 2),
  cat.Var = rep(c("High Grade","Low Grade"), each = 4),
  values = c(500, 200, 600, 300, 
             150, 500, 600, 200   
  )
) %>% 
  mutate(cat.Var = factor(cat.Var,
                          levels = c("High Grade","Low Grade")))

col_width <-.2

df2 <- 
  df1 %>% 
  group_by(x.axis.Var) %>%
  summarise(sum = sum(values)) %>%
  ungroup() %>%
  mutate(diff = c(NA, diff(sum)), 
         x = seq(.5, 3.5, 1),
         xmin = x - col_width,
         xmax = x  col_width,
         ymin = lag(sum), 
         ymax = ymin   diff)

ggplot()  
  geom_col(data = df1, aes(x = as.integer(as.factor(x.axis.Var)), y = values, fill = cat.Var),
           width = col_width*2 )  
  scale_fill_manual(NULL, values=c('#ff0000','#c8f464'))  
  ggnewscale::new_scale_fill()  
  geom_rect(data = df2, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = (ymax - ymin) > 0),
            alpha = .5, show.legend = FALSE)  
  geom_text(data = df2, aes(x, y = ymin   (ymax - ymin)/ 2, label = diff))  
  scale_fill_brewer(NULL, palette = "Set1")  
  scale_x_continuous(NULL, breaks = 1:4, labels = paste0("Phase", 1:4)) 
#> Warning: Removed 1 rows containing missing values (geom_rect).
#> Warning: Removed 1 rows containing missing values (geom_text).

Created on 2021-11-17 by the reprex package (v2.0.1)

  • Related