How to group my x axis values into 3 years each and include years with no data, ggplot2 R?-CodePudding

I am trying to plot some data that is assoicated with a year, and the years are not consecutive (e.g. some data was recorded in 1984, 1999, 2000, 2001 etc.). I want to bin my data into groups of 3 years (e.g. 1984-1986, 1987-1989 etc.), but I also want the binned years that have no data to show on the x axis.... so if there is no data for 1987-1989 I still want that on the x axis to show there was no data.

Here is my dataframe

taxa_diversity = structure(list(Year_Publication = c(1994L, 2001L, 2002L, 2002L, 
2004L, 2006L, 2007L, 2009L, 2011L, 2012L, 2013L, 2014L, 2015L, 
2016L, 2017L, 2017L, 2017L, 2018L, 2018L, 2018L, 2019L, 2019L, 
2019L, 2020L, 2020L, 2020L, 2020L, 2021L, 2021L, 2022L, 2022L, 
2022L, 2022L), Taxa = c("Teleostei", "Chondrichtyes", "Chondrichtyes", 
"Gastropoda", "Teleostei", "Malacostraca", "Teleostei", "Teleostei", 
"Teleostei", "Teleostei", "Teleostei", "Teleostei", "Teleostei", 
"Teleostei", "Chondrichtyes", "Teleostei", "Chondrostei", "Teleostei", 
"Chondrichtyes", "Decapoda", "Teleostei", "Gastropoda", "Chondrichtyes", 
"Chondrostei", "Teleostei", "Bivalvia", "Tetrapoda", "Teleostei", 
"Orthoptera", "Chondrichtyes", "Teleostei", "Reptilia", "Bovidae"
), Total_Species_Per_Taxa = c(2L, 1L, 1L, 3L, 1L, 1L, 1L, 2L, 
4L, 2L, 1L, 1L, 2L, 4L, 2L, 4L, 1L, 7L, 3L, 1L, 6L, 1L, 3L, 1L, 
7L, 2L, 1L, 1L, 1L, 3L, 4L, 1L, 1L), Total_Species_Per_Pub = c(2L, 
1L, 4L, 4L, 1L, 1L, 1L, 2L, 4L, 2L, 1L, 1L, 2L, 4L, 7L, 7L, 7L, 
11L, 11L, 11L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 2L, 2L, 9L, 
9L, 9L, 9L)), class = "data.frame", row.names = c(NA, -33L))

This is code I'm using the make the graph I currently have

Taxa_Plot = ggplot(data = taxa_diversity, 
                   aes(x = factor(`Year_Publication`), 
                       y = `Total_Species_Per_Taxa`, fill = Taxa))  
  geom_col()  
  scale_fill_viridis(discrete = TRUE)  
  theme_bw()  
  theme(
    
    #get rid of grey grid marks in the back
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank() ,
    
    # set horizontal grid marks to follow y-values across
    panel.grid.major.y = element_line( size= .5, color="black" ), 
    panel.background = element_blank(),
    panel.border = element_blank(),
    axis.line = element_line(colour = "black"),
    axis.title.x = element_text(size=20, face="bold", 
                                margin = margin(30,0,0,0)),
    axis.title.y = element_text(size=20, face="bold",
                                margin = margin(0,30,0,0)),
    
    #increase the text size on x and y axis
    axis.text.x = element_text(size = 15),
    axis.text.y = element_text(size = 15),
    legend.text = element_text(size = 20),
    legend.title = element_text(size = 25))  
  
  #change axis titles
  xlab("Year of Publication")  
  ylab("Total Number of Unique Species")  
  
  scale_y_continuous(limits = c(0,14),
                     expand = expansion(mult = c(0,0)))
Taxa_Plot

I was hoping there might be a way to call for the binning in the ggplot functions....

Any ideas.

CodePudding user response：

Using cut to create your bins and setting drop=FALSE in scale_x_discrete you could do:

library(ggplot2)
library(viridis)

breaks <- seq(1994, 2024, by = 3)
labels <- paste(breaks[-length(breaks)], breaks[-1] - 1, sep = "-")

taxa_diversity$Year_Publication_Cut <- cut(
  taxa_diversity$Year_Publication, 
  breaks, labels, right = FALSE)

ggplot(
  data = taxa_diversity,
  aes(
    x = Year_Publication_Cut,
    y = Total_Species_Per_Taxa, 
    fill = Taxa
  )
)  
  geom_col()  
  scale_x_discrete(drop = FALSE)  
  scale_y_continuous(
    #limits = c(0, 14),
    expand = expansion(mult = c(0, 0))
  )  
  scale_fill_viridis(discrete = TRUE)  
  theme_bw()  
  theme(
    # get rid of grey grid marks in the back
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank(),
    # set horizontal grid marks to follow y-values across
    panel.grid.major.y = element_line(size = .5, color = "black"),
    panel.background = element_blank(),
    panel.border = element_blank(),
    axis.line = element_line(colour = "black"),
    axis.title.x = element_text(
      size = 20, face = "bold",
      margin = margin(30, 0, 0, 0)
    ),
    axis.title.y = element_text(
      size = 20, face = "bold",
      margin = margin(0, 30, 0, 0)
    ),
    # increase the text size on x and y axis
    axis.text.x = element_text(size = 15),
    axis.text.y = element_text(size = 15),
    legend.text = element_text(size = 20),
    legend.title = element_text(size = 25)
  )  

  # change axis titles
  xlab("Year of Publication")  
  ylab("Total Number of Unique Species")

CodePudding user response：

You can transform your data this way:

taxa_diversity <- transform(taxa_diversity, Year_Publication = 
   cut(Year_Publication, (max(Year_Publication)-min(Year_Publication)) / 3))

add a discrete scale:

  scale_x_discrete(drop=FALSE)

to clarity you set the angle of labels:

    ...
    axis.text.x = element_text(size = 15, angle = 90),
    ...

The plot will be shown as follows:

CodePudding user response：

I am not sure, maybe this is also an option for you:

library(viridis)
library(tidyverse)

taxa_diversity %>% 
  arrange(Year_Publication) %>% 
  mutate(id = row_number()) %>% 
  group_by(group = cumsum(Year_Publication != lag(Year_Publication, def = first(Year_Publication)))) %>% 
  mutate(Year_Publication1 = Year_Publication 2) %>% 
  pivot_longer(c(Year_Publication, Year_Publication1)) %>% 
  arrange(name, .by_group = TRUE) %>% 
  tidyr::complete(value = seq(min(value), max(value))) %>% 
  mutate(labelx = paste0(min(value), "-", max(value))) %>%
  #-------------------------------------------------------
  ggplot(aes(x = factor(labelx), 
           y = `Total_Species_Per_Taxa`, fill = Taxa))  
  geom_col(position = position_dodge())   # added
  scale_fill_viridis(discrete = TRUE)  
  theme_bw()  
  theme(
    
    #get rid of grey grid marks in the back
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank() ,
    
    # set horizontal grid marks to follow y-values across
    panel.grid.major.y = element_line( size= .5, color="black" ), 
    panel.background = element_blank(),
    panel.border = element_blank(),
    axis.line = element_line(colour = "black"),
    axis.title.x = element_text(size=20, face="bold", 
                                margin = margin(30,0,0,0)),
    axis.title.y = element_text(size=20, face="bold",
                                margin = margin(0,30,0,0)),
    
    #increase the text size on x and y axis
    axis.text.x = element_text(size = 15),
    axis.text.y = element_text(size = 15),
    legend.text = element_text(size = 20),
    legend.title = element_text(size = 25))  
  
  #change axis titles
  xlab("Year of Publication")  
  ylab("Total Number of Unique Species")  
  
  scale_y_continuous(limits = c(0,14),
                     expand = expansion(mult = c(0,0))) 
  scale_x_discrete(guide = guide_axis(n.dodge = 2))  # added