Stacked Barplot with percentages of total, divided into groups-CodePudding

I am trying to plot a stacked barplot using the following df. My goal is to show the differential distribution of the "Marker"s in two different "Group"s (IPN, INV), so that the sum of all 4 subgroups (WT, MUT-i, MUT-p, MUT-d) equals a 100%.

What is the best approach to do this?

structure(list(Marker = c("p16", "p16", "p16", "p16", "p16", 
"p16", "p16", "p16", "p53", "p53", "p53", "p53", "p53", "p53", 
"p53", "p53", "c-MET", "c-MET", "c-MET", "c-MET", "c-MET", "c-MET", 
"c-MET", "c-MET", "c-MYC", "c-MYC", "c-MYC", "c-MYC", "c-MYC", 
"c-MYC", "c-MYC", "c-MYC", "EGFR", "EGFR", "EGFR", "EGFR", "EGFR", 
"EGFR", "EGFR", "EGFR", "HER2-CISH", "HER2-CISH", "HER2-CISH", 
"HER2-CISH", "HER2-CISH", "HER2-CISH", "HER2-CISH", "HER2-CISH", 
"PD-L1 IC1%", "PD-L1 IC1%", "PD-L1 IC1%", "PD-L1 IC1%", "PD-L1 IC1%", 
"PD-L1 IC1%", "PD-L1 IC1%", "PD-L1 IC1%", "PD-L1 TPS1%", "PD-L1 TPS1%", 
"PD-L1 TPS1%", "PD-L1 TPS1%", "PD-L1 TPS1%", "PD-L1 TPS1%", "PD-L1 TPS1%", 
"PD-L1 TPS1%", "PD-L1 CPS1%", "PD-L1 CPS1%", "PD-L1 CPS1%", "PD-L1 CPS1%", 
"PD-L1 CPS1%", "PD-L1 CPS1%", "PD-L1 CPS1%", "PD-L1 CPS1%"), 
    Group = c("IPN", "IPN", "IPN", "IPN", "INV", "INV", "INV", 
    "INV", "IPN", "IPN", "IPN", "IPN", "INV", "INV", "INV", "INV", 
    "IPN", "IPN", "IPN", "IPN", "INV", "INV", "INV", "INV", "IPN", 
    "IPN", "IPN", "IPN", "INV", "INV", "INV", "INV", "IPN", "IPN", 
    "IPN", "IPN", "INV", "INV", "INV", "INV", "IPN", "IPN", "IPN", 
    "IPN", "INV", "INV", "INV", "INV", "IPN", "IPN", "IPN", "IPN", 
    "INV", "INV", "INV", "INV", "IPN", "IPN", "IPN", "IPN", "INV", 
    "INV", "INV", "INV", "IPN", "IPN", "IPN", "IPN", "INV", "INV", 
    "INV", "INV"), Subgroup = c("WT", "MUT-i", "MUT-p", "MUT-d", 
    "WT", "MUT-i", "MUT-p", "MUT-d", "WT", "MUT-i", "MUT-p", 
    "MUT-d", "WT", "MUT-i", "MUT-p", "MUT-d", "WT", "MUT-i", 
    "MUT-p", "MUT-d", "WT", "MUT-i", "MUT-p", "MUT-d", "WT", 
    "MUT-i", "MUT-p", "MUT-d", "WT", "MUT-i", "MUT-p", "MUT-d", 
    "WT", "MUT-i", "MUT-p", "MUT-d", "WT", "MUT-i", "MUT-p", 
    "MUT-d", "WT", "MUT-i", "MUT-p", "MUT-d", "WT", "MUT-i", 
    "MUT-p", "MUT-d", "WT", "MUT-i", "MUT-p", "MUT-d", "WT", 
    "MUT-i", "MUT-p", "MUT-d", "WT", "MUT-i", "MUT-p", "MUT-d", 
    "WT", "MUT-i", "MUT-p", "MUT-d", "WT", "MUT-i", "MUT-p", 
    "MUT-d", "WT", "MUT-i", "MUT-p", "MUT-d"), `Number of Cases` = c(59, 
    0, 1, 5, 42, 0, 0, 1, 42, 2, 3, 18, 27, 1, 2, 12, 7, 15, 
    11, 23, 14, 9, 10, 12, 56, 0, 1, 8, 41, 1, 0, 3, 17, 16, 
    11, 20, 18, 12, 10, 6, 60, 0, 0, 4, 44, 0, 0, 2, 60, 1, 1, 
    4, 42, 0, 0, 0, 63, 0, 0, 2, 39, 1, 0, 2, 48, 4, 4, 9, 31, 
    3, 1, 7)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-72L))type here

Objective, or something similar to it... Whichever you think is a good representation of the data

Thanks a lot!

CodePudding user response：

Update: request removing 0%: removed first solution:

One way to remove the 0% is to subset the data for labeling in ggtext.

library(tidyverse)

df1 <- df %>% 
  mutate(Marker = fct_relevel(Marker, c("p16", "p53", "c-MET", "c-MYC")),
         Subgroup = fct_relevel(Subgroup, c("WT", "MUT-i", "MUT-p", "MUT-d"))) %>% 
  group_by(Group, Marker, Subgroup) %>% 
  summarise(sum=sum(`Number of Cases`)) %>% 
  # group_by(Subgroup) %>% #depends on what percent you want
  mutate(pct= prop.table(sum) * 100) 
  
ggplot(df1, aes(Marker, pct, fill=Subgroup))  
  geom_col(position = position_stack())  
  ylab("Percentage")  
  geom_text(data = df1 %>% filter(pct != 0),
            aes(label=paste0(sprintf("%1.1f", pct),"%")),
            position=position_stack(vjust=0.5))  
  facet_wrap(. ~  Group) 
  ggtitle("My title")  
  theme_bw()