I have a dataset ("data_for_plot.csv") that looks like the below
Sample,Cyanobacteria,Rhodophyta,Unknown_phylum,Chlorophyta,Bacillariophyta,Cercozoa,Apicomplexa,Unclassified,Streptophyta,species
S047424,18.8176,10.2027,21.6216,28.8514,0,16.3851,0,4.12162,0,SpeciesName1
S047423,26.3215,22.8883,17.4932,22.8883,1.3624,5.61308,0,3.43324,0,SpeciesName1
S047422,41.305,1.91199,14.5372,39.5751,0,0.971168,0,1.69954,0,SpeciesName1
S047420,98.2477,0,1.75232,0,0,0,0,0,0,SpeciesName2
S047419,19.6175,0,42.3893,37.9932,0,0,0,0,0,SpeciesName2
S047418,45.9008,0,0,0,0,54.0992,0,0,0,SpeciesName2
S047417,0.801603,8.01603,67.4349,3.00601,19.2385,1.50301,0,0,0,SpeciesName2
S047404,10.0267,58.2888,23.262,2.31729,2.94118,3.16399,0,0,0,SpeciesName3
S047402,1.67364,70.9044,11.2649,0,11.0074,0,0,5.14966,0,SpeciesName3
S047400,25.9087,27.959,31.8733,0,9.13327,5.12582,0,0,0,SpeciesName3
and I need to build a stacked bar plot that is split by the species column. There are 3 different species and so stacked bar plots should appear in 3 different columns with the corresponding species label (SpeciesName1/2/3) appearing without underscores. I tried the below code but I got the error message At least one layer must contain all faceting variables:species
library(ggplot2)
library(reshape2)
library(tidyverse)
pc = read.csv("data_for_plot.csv", header = TRUE)
# The below replaces all single underscores with a single space in the taxon names on the final plot
pc <- pc %>% rename_all(~ gsub("_", " ", .))
pcm = melt(pc, id = c("Sample"))
pcm$Sample <- factor(pcm$Sample,levels=unique(pcm$Sample))
mx = ggplot(pcm, aes(x = Sample, fill = variable, y = value))
geom_bar(stat = "identity", colour = "black")
theme(axis.text.x = element_text(angle = 90, size = 8, colour = "black", vjust = 0.5, hjust = 1, face= "bold"),
axis.title.y = element_text(size = 16, face = "bold"), legend.title = element_text(size = 16, face = "bold"),
legend.text = element_text(size = 12, face = "bold", colour = "black"),
axis.text.y = element_text(colour = "black", size = 12, face = "bold"))
scale_y_continuous(expand = c(0,0))
labs(x = "", y = "Relative Abundance (%)", fill = "Phylum")
geom_col()
facet_wrap(~species)
mx
Any help here would be appreciated
CodePudding user response:
I think the problem is that your pivoting hasn't worked out properly - you will see that if you look at pcm
. You should only pivot columns 2:10. The modern way to do this would be with pivot_longer
:
library(tidyverse)
pc <- structure(list(Sample = c("S047424", "S047423", "S047422", "S047420",
"S047419", "S047418", "S047417", "S047404", "S047402", "S047400"
), Cyanobacteria = c(18.8176, 26.3215, 41.305, 98.2477, 19.6175,
45.9008, 0.801603, 10.0267, 1.67364, 25.9087), Rhodophyta = c(10.2027,
22.8883, 1.91199, 0, 0, 0, 8.01603, 58.2888, 70.9044, 27.959),
Unknown_phylum = c(21.6216, 17.4932, 14.5372, 1.75232, 42.3893,
0, 67.4349, 23.262, 11.2649, 31.8733), Chlorophyta = c(28.8514,
22.8883, 39.5751, 0, 37.9932, 0, 3.00601, 2.31729, 0, 0),
Bacillariophyta = c(0, 1.3624, 0, 0, 0, 0, 19.2385, 2.94118,
11.0074, 9.13327), Cercozoa = c(16.3851, 5.61308, 0.971168,
0, 0, 54.0992, 1.50301, 3.16399, 0, 5.12582), Apicomplexa = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Unclassified = c(4.12162,
3.43324, 1.69954, 0, 0, 0, 0, 0, 5.14966, 0), Streptophyta = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), species = c("Species_Name_1",
"Species_Name_1", "Species_Name_1", "Species_Name_2", "Species_Name_2",
"Species_Name_2", "Species_Name_2", "Species_Name_3", "Species_Name_3",
"Species_Name_3")), row.names = c(NA, -10L), class = "data.frame")
pc %>%
mutate(Sample = factor(Sample, unique(Sample)),
species = gsub('_', ' ', species)) %>%
rename_all(~gsub('_', ' ', .)) %>%
pivot_longer(-c(Sample, species)) %>%
mutate(name = factor(name, unique(name))) %>%
ggplot(aes(x = Sample, y = value, fill = name))
geom_col(color = 'black', size = 0.3)
facet_grid(~species, scales = 'free_x')
scale_fill_brewer(palette = 'Spectral', direction = -1)
theme(axis.text.x = element_text(angle = 90))
labs(x = "", y = "Relative Abundance (%)", fill = "Phylum")