Home > Mobile >  How to add categorical variables labels from group names (fill argument) while removing redundant la
How to add categorical variables labels from group names (fill argument) while removing redundant la

Time:08-31

I would like to add labels on top of my bar plot (ggplot) to be able to visually see my groups easier.

Here is a condensed version of my data:

structure(list(prey_name = c("Empty", "Dreissena", "Amphipod", 
                             "Chiro.Pupae", "Goby", "Byths", "Chiro.Larvae", "Byths", "Empty", 
                             "Empty", "Byths", "Amphipod", "Eurycercidae", "Empty", "Chiro.Pupae", 
                             "Empty", "Amphipod", "Goby", "Daphnia", "Chiro.Larvae", "Byths", 
                             "Byths", "Byths", "Empty", "Sphaeriidae", "Empty", "Empty", "Chiro.Pupae", 
                             "Amphipod", "Empty", "Byths", "Chiro.Pupae", "Isopod", "Byths", 
                             "Byths", "Empty", "Chiro.Adult", "Hemimysis", "Chiro.Pupae", 
                             "Byths", "Empty", "Empty", "Empty", "Amphipod", "Empty", "Empty", 
                             "Empty", "Empty", "Goby", "Empty", "Byths", "Chiro.Larvae", "Amphipod", 
                             "Empty", "Chironomidae", "Goby", "Amphipod", "Empty", "Byths", 
                             "Empty", "Chiro.Larvae", "Chiro.Larvae", "Byths", "Empty", "Dreissena", 
                             "Chironomidae", "Chiro.Pupae", "Empty", "Isopod", "Dreissena", 
                             "Byths", "Copepoda", "Goby", "Empty", "Empty", "EggMass", "Empty", 
                             "Empty", "Amphipod", "Empty", "Amphipod", "Byths", "Eurycercidae", 
                             "Sphaeriidae", "Empty", "Chiro.Pupae", "Chiro.Pupae", "Empty", 
                             "Amphipod", "Empty", "Byths", "Byths", "Byths", "Goby", "Empty", 
                             "Empty", "Empty", "Empty", "Empty", "Empty"), id = c("708_1", 
                                                                                  "824_29", "723_80", "604_2", "1031_2", "719_34", "824_39", "619_25", 
                                                                                  "802_5", "822_2", "602_10", "803_2", "1024_33", "630_13", "619_27", 
                                                                                  "701_2", "627_3", "701_6", "647_16", "824_31", "643_9", "807_13", 
                                                                                  "601_6", "601_7", "616_6", "712_3", "639_4", "652_1", "604_2", 
                                                                                  "701_7", "643_8", "627_2", "616_6", "712_6", "824_30", "643_3", 
                                                                                  "614_3", "803_6", "614_1", "629_2", "712_5", "703_1", "712_4", 
                                                                                  "622_5", "824_21", "822_5", "630_13", "801_10", "1031_1", "807_11", 
                                                                                  "1004_18", "652_1", "719_32", "807_1", "716_1", "1031_1", "713_1", 
                                                                                  "1003_26", "1010_1", "822_6", "824_28", "647_17", "601_5", "630_11", 
                                                                                  "824_35", "723_80", "813_3", "708_2", "1024_33", "824_26", "813_3", 
                                                                                  "627_2", "803_6", "801_15", "1030_38", "643_7", "617_3", "605_4", 
                                                                                  "813_3", "643_1", "817_6", "630_10", "1024_44", "617_1", "701_5", 
                                                                                  "616_3", "635_4", "807_2", "614_1", "801_6", "808_1", "642_7", 
                                                                                  "643_2", "804_1", "701_3", "617_4", "635_2", "801_3", "619_20", 
                                                                                  "1029_6"), CRN = c(708L, 824L, 723L, 604L, 1031L, 719L, 824L, 
                                                                                                     619L, 802L, 822L, 602L, 803L, 1024L, 630L, 619L, 701L, 627L, 
                                                                                                     701L, 647L, 824L, 643L, 807L, 601L, 601L, 616L, 712L, 639L, 652L, 
                                                                                                     604L, 701L, 643L, 627L, 616L, 712L, 824L, 643L, 614L, 803L, 614L, 
                                                                                                     629L, 712L, 703L, 712L, 622L, 824L, 822L, 630L, 801L, 1031L, 
                                                                                                     807L, 1004L, 652L, 719L, 807L, 716L, 1031L, 713L, 1003L, 1010L, 
                                                                                                     822L, 824L, 647L, 601L, 630L, 824L, 723L, 813L, 708L, 1024L, 
                                                                                                     824L, 813L, 627L, 803L, 801L, 1030L, 643L, 617L, 605L, 813L, 
                                                                                                     643L, 817L, 630L, 1024L, 617L, 701L, 616L, 635L, 807L, 614L, 
                                                                                                     801L, 808L, 642L, 643L, 804L, 701L, 617L, 635L, 801L, 619L, 1029L
                                                                                  ), FID = c(1L, 29L, 80L, 2L, 2L, 34L, 39L, 25L, 5L, 2L, 10L, 
                                                                                             2L, 33L, 13L, 27L, 2L, 3L, 6L, 16L, 31L, 9L, 13L, 6L, 7L, 6L, 
                                                                                             3L, 4L, 1L, 2L, 7L, 8L, 2L, 6L, 6L, 30L, 3L, 3L, 6L, 1L, 2L, 
                                                                                             5L, 1L, 4L, 5L, 21L, 5L, 13L, 10L, 1L, 11L, 18L, 1L, 32L, 1L, 
                                                                                             1L, 1L, 1L, 26L, 1L, 6L, 28L, 17L, 5L, 11L, 35L, 80L, 3L, 2L, 
                                                                                             33L, 26L, 3L, 2L, 6L, 15L, 38L, 7L, 3L, 4L, 3L, 1L, 6L, 10L, 
                                                                                             44L, 1L, 5L, 3L, 4L, 2L, 1L, 6L, 1L, 7L, 2L, 1L, 3L, 4L, 2L, 
                                                                                             3L, 20L, 6L), number = c(0L, 1L, 99L, 1216L, 5L, 239L, 4L, 30L, 
                                                                                                                      0L, 0L, 20L, 40L, 931L, 0L, 1L, 0L, 173L, 1L, 363L, 9L, 608L, 
                                                                                                                      68L, 251L, 0L, 1L, 0L, 0L, 32L, 1L, 0L, 256L, 50L, 5L, 194L, 
                                                                                                                      8L, 0L, 74L, 6L, 255L, 8L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 3L, 
                                                                                                                      0L, 333L, 2L, 3L, 0L, 23L, 3L, 1L, 0L, 107L, 0L, 5L, 3L, 20L, 
                                                                                                                      0L, 1L, 67L, 1L, 0L, 17L, 1L, 68L, 8L, 1L, 0L, 0L, 3L, 0L, 0L, 
                                                                                                                      1L, 0L, 32L, 213L, 1056L, 3L, 0L, 576L, 55L, 0L, 6L, 0L, 330L, 
                                                                                                                      6L, 72L, 2L, 0L, 0L, 0L, 0L, 0L, 0L), MONTH = c(7L, 8L, 7L, 6L, 
                                                                                                                                                                      11L, 7L, 8L, 6L, 8L, 8L, 6L, 8L, 11L, 6L, 6L, 7L, 6L, 7L, 6L, 
                                                                                                                                                                      8L, 6L, 8L, 6L, 6L, 6L, 7L, 6L, 6L, 6L, 7L, 6L, 6L, 6L, 7L, 8L, 
                                                                                                                                                                      6L, 6L, 8L, 6L, 6L, 7L, 7L, 7L, 6L, 8L, 8L, 6L, 8L, 11L, 8L, 
                                                                                                                                                                      11L, 6L, 7L, 8L, 7L, 11L, 7L, 11L, 11L, 8L, 8L, 6L, 6L, 6L, 8L, 
                                                                                                                                                                      7L, 8L, 7L, 11L, 8L, 8L, 6L, 8L, 8L, 11L, 6L, 6L, 6L, 8L, 6L, 
                                                                                                                                                                      8L, 6L, 11L, 6L, 7L, 6L, 6L, 8L, 6L, 8L, 8L, 6L, 6L, 8L, 7L, 
                                                                                                                                                                      6L, 6L, 8L, 6L, 11L), adult = c("Y", "Y", "Y", "Y", "Y", "Y", 
                                                                                                                                                                                                      "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
                                                                                                                                                                                                      "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
                                                                                                                                                                                                      "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
                                                                                                                                                                                                      "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
                                                                                                                                                                                                      "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
                                                                                                                                                                                                      "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
                                                                                                                                                                                                      "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
                                                                                                                                                                                                      "Y", "Y", "Y"), month_name = structure(c(7L, 8L, 7L, 6L, 11L, 
                                                                                                                                                                                                                                               7L, 8L, 6L, 8L, 8L, 6L, 8L, 11L, 6L, 6L, 7L, 6L, 7L, 6L, 8L, 
                                                                                                                                                                                                                                               6L, 8L, 6L, 6L, 6L, 7L, 6L, 6L, 6L, 7L, 6L, 6L, 6L, 7L, 8L, 6L, 
                                                                                                                                                                                                                                               6L, 8L, 6L, 6L, 7L, 7L, 7L, 6L, 8L, 8L, 6L, 8L, 11L, 8L, 11L, 
                                                                                                                                                                                                                                               6L, 7L, 8L, 7L, 11L, 7L, 11L, 11L, 8L, 8L, 6L, 6L, 6L, 8L, 7L, 
                                                                                                                                                                                                                                               8L, 7L, 11L, 8L, 8L, 6L, 8L, 8L, 11L, 6L, 6L, 6L, 8L, 6L, 8L, 
                                                                                                                                                                                                                                               6L, 11L, 6L, 7L, 6L, 6L, 8L, 6L, 8L, 8L, 6L, 6L, 8L, 7L, 6L, 
                                                                                                                                                                                                                                               6L, 8L, 6L, 11L), levels = c("January", "February", "March", 
                                                                                                                                                                                                                                                                            "April", "May", "June", "July", "August", "September", "October", 
                                                                                                                                                                                                                                                                            "November", "December"), class = "factor"), BMASSprop = c(NaN, 
                                                                                                                                                                                                                                                                                                                                      0.0174639459714402, 0.09975608226961, 0.999816612301896, 0.2, 
                                                                                                                                                                                                                                                                                                                                      1, 0.99369653669798, 0.0119834315937307, NaN, NaN, 1, 1, 0.0318128167117478, 
                                                                                                                                                                                                                                                                                                                                      NaN, 0.991957112535848, NaN, 0.199222346114873, 0.941324937580778, 
                                                                                                                                                                                                                                                                                                                                      1, 0.971438368782348, 1, 1, 0.261050876294526, NaN, 0.00113648663275246, 
                                                                                                                                                                                                                                                                                                                                      NaN, NaN, 0.953445065176909, 5.49161967291152e-05, NaN, 0.000590014543772203, 
                                                                                                                                                                                                                                                                                                                                      0.999982700299285, 0.00135296027708626, 1, 0.0409008184478508, 
                                                                                                                                                                                                                                                                                                                                      NaN, 0.891996142719383, 0.00572915878256989, 0.995872043743129, 
                                                                                                                                                                                                                                                                                                                                      1, NaN, NaN, NaN, 0.993813065639553, NaN, NaN, NaN, NaN, 0.333333333333333, 
                                                                                                                                                                                                                                                                                                                                      NaN, 0.462998223209947, 0.0465549348230912, 0.797194659694581, 
                                                                                                                                                                                                                                                                                                                                      NaN, 0.433386754658534, 0.333333333333333, 0.0340277269556374, 
                                                                                                                                                                                                                                                                                                                                      NaN, 1, NaN, 0.945830148625487, 0.975005294668752, 1, NaN, 0.827886840316451, 
                                                                                                                                                                                                                                                                                                                                      0.90024391773039, 0.794054544390783, NaN, 0.436583079997355, 
                                                                                                                                                                                                                                                                                                                                      0.912449278688986, 0.152910279633107, 1.72997007151776e-05, 0.852983770022535, 
                                                                                                                                                                                                                                                                                                                                      NaN, NaN, 0.144497135806673, NaN, NaN, 0.05303517597611, NaN, 
                                                                                                                                                                                                                                                                                                                                      0.245671533948344, 0.204684590999682, 1, 0.00196488163927268, 
                                                                                                                                                                                                                                                                                                                                      NaN, 1, 0.994350282485876, NaN, 0.00156505026194397, NaN, 1, 
                                                                                                                                                                                                                                                                                                                                      0.1767290479113, 1, 0.5, NaN, NaN, NaN, NaN, NaN, NaN)), row.names = c(NA, 
                                                                                                                                                                                                                                                                                                                                                                                                             -100L), class = c("data.table", "data.frame"))

In case this makes things easier here are the packages I am using for this:

library(ggplot2)
library(tidyverse)
library(scales)
library("RColorBrewer")
library(ggsci)

In my original data frame (full data - too long to post here) there are many groups so it gets hard to distinguish the colors between the groups. These plots are just for data exploration so I would like to add prey name labels on top on my bar plots to be able to see the groups easier.

This is my original plot using ggplot:

ggplot(example, aes(x = factor(MONTH), y = BMASSprop, fill = prey_name))  
  geom_bar(stat="identity", position="fill")  
  ggtitle("Proportion of Prey Biomass in Adult YP Stomachs")  
  labs(x="Month", fill = "Prey Name")   
  labs(y = "Prey Biomass Consumed",
       caption = "Source: DNR Diet Data")  
  scale_x_discrete(labels=c("June","July","August","November"))  
  theme_bw()  
  theme(legend.position = "right",
        plot.title = element_text(hjust=0.5),
        legend.background = element_rect(fill = "white", color = 1),
        axis.ticks.length = unit(0.2,"cm"))  
  scale_fill_igv(palette = "default")

Main Plot

Again, in this plot the different groups (prey_names) are not that challenging to distinguish but with my additional groups some of the colors get too similar and hard to tell apart. Is there a way I can add labels (the prey names) on top of each unique color on the bar plot?

I have tried the following:

ggplot(example, aes(x = factor(MONTH), y = BMASSprop, fill = prey_name))  
  geom_bar(stat="identity", position="fill")  
  ggtitle("Proportion of Prey Biomass in Adult YP Stomachs")  
  labs(x="Month", fill = "Prey Name")   
  labs(y = "Prey Biomass Consumed",
       caption = "Source: DNR Diet Data")  
  scale_x_discrete(labels=c("June","July","August","November"))  
  scale_y_continuous(labels = prey_name)  
  theme_bw()  
  theme(legend.position = "right",
        plot.title = element_text(hjust=0.5),
        legend.background = element_rect(fill = "white", color = 1),
        axis.ticks.length = unit(0.2,"cm"))  
  scale_fill_igv(palette = "default")

This gives the following error:

#Error in check_breaks_labels(breaks, labels) : object 'prey_name' not found

I also tried this:

ggplot(example, aes(x = factor(MONTH), y = BMASSprop, fill = prey_name))  
  geom_bar(stat="identity", position="fill")  
  ggtitle("Proportion of Prey Biomass in Adult YP Stomachs")  
  labs(x="Month", fill = "Prey Name")   
  labs(y = "Prey Biomass Consumed",
       caption = "Source: DNR Diet Data")  
  scale_x_discrete(labels=c("June","July","August","November"))  
  geom_text(aes(label = prey_name))  
  theme_bw()  
  theme(legend.position = "right",
        plot.title = element_text(hjust=0.5),
        legend.background = element_rect(fill = "white", color = 1),
        axis.ticks.length = unit(0.2,"cm"))  
  scale_fill_igv(palette = "default")

Try 2

This one gets me closer to what I want but there are duplicate labels and some are also stacked on top of each other.

So then I tried this:

ggplot(example, aes(x = factor(MONTH), y = BMASSprop, fill = prey_name))  
  geom_bar(stat="identity", position="fill")  
  ggtitle("Proportion of Prey Biomass in Adult YP Stomachs")  
  labs(x="Month", fill = "Prey Name")   
  labs(y = "Prey Biomass Consumed",
       caption = "Source: DNR Diet Data")  
  scale_x_discrete(labels=c("June","July","August","November"))  
  geom_text(aes(label = prey_name), position = "stack")  
  theme_bw()  
  theme(legend.position = "right",
        plot.title = element_text(hjust=0.5),
        legend.background = element_rect(fill = "white", color = 1),
        axis.ticks.length = unit(0.2,"cm"))  
  scale_fill_igv(palette = "default")

Try 3

This completely messes up my plot.

Then I tried this:

ggplot(example, aes(x = factor(MONTH), y = BMASSprop, fill = prey_name))  
  geom_bar(stat="identity", position="fill")  
  ggtitle("Proportion of Prey Biomass in Adult YP Stomachs")  
  labs(x="Month", fill = "Prey Name")   
  labs(y = "Prey Biomass Consumed",
       caption = "Source: DNR Diet Data")  
  scale_x_discrete(labels=c("June","July","August","November"))  
  geom_text(aes(label = prey_name), position = position_dodge(0.9))  
  theme_bw()  
  theme(legend.position = "right",
        plot.title = element_text(hjust=0.5),
        legend.background = element_rect(fill = "white", color = 1),
        axis.ticks.length = unit(0.2,"cm"))  
  scale_fill_igv(palette = "default")

Try 4

This still results in duplicate and stacked labeling.

Is there a way to add a single label for each prey name by month?

I would like to achieve something like this if possible:

Goal Plot

Any suggestions on how to achieve this? Thank you for your time!

CodePudding user response:

A way to achieve this, is to create a intermediate plot in order to summarize your data per prey to clean the data of the multiple names and frequencies:

clean_df = example %>% group_by(MONTH, prey_name) %>%
na.omit %>% summarize(prey_tot = sum(BMASSprop)) %>%
mutate(month_tot = sum(prey_tot, na.rm = T),
prey_frq = prey_tot * 100 / month_tot,
pos_label = sum(prey_frq, na.rm = T)-(cumsum(prey_frq)-0.5*prey_frq))

In the summarize you collapse all the same prey, then create the frequency in % of each prey per month and finally add the "pos_label" column which computes the y value at which you want your label.

Then you can generate the plot:

ggplot(clean_df, aes(as.factor(MONTH), prey_frq, fill=prey_name))  
geom_bar(stat = "identity")  
geom_text(data = subset(clean_df, prey_frq>5), aes(y = pos_label, label = prey_name))  
ggtitle("Proportion of Prey Biomass in Adult YP Stomachs")  
labs(x="Month", fill = "Prey Name", y = "Prey Biomass Consumed (%)", caption = "Source: DNR Diet Data")   
scale_x_discrete(labels=c("June","July","August","November"))  
scale_fill_igv(palette = "default")
theme_bw()  
theme(legend.position = "right", plot.title = element_text(hjust=0.5),
 legend.background = element_rect(fill = "white", color = 1),
 axis.ticks.length = unit(0.2,"cm"))

You can set the value to filter out the label you want to see on the graph (here all prey with > 5% of total biomass)

Stacked histograms with labels centered

  • Related