I am trying to make a stacked bar graph with error bars split into three graphs by time. Here is the raw data:
Replicate Sample Time Stage Percent
1 1 WT Veh 24h G1 85.67
2 2 WT Veh 24h G1 82.77
3 3 WT Veh 24h G1 83.28
4 1 WT Veh 24h G2 9.14
5 2 WT Veh 24h G2 10.37
6 3 WT Veh 24h G2 11.60
7 1 WT Veh 24h S 5.20
8 2 WT Veh 24h S 6.87
9 3 WT Veh 24h S 5.12
10 1 WT E2 24h G1 61.78
11 2 WT E2 24h G1 54.48
12 3 WT E2 24h G1 58.55
13 1 WT E2 24h G2 8.10
14 2 WT E2 24h G2 12.35
15 3 WT E2 24h G2 10.43
16 1 WT E2 24h S 30.12
17 2 WT E2 24h S 33.17
18 3 WT E2 24h S 31.02
19 1 KO Veh 24h G1 79.86
20 2 KO Veh 24h G1 75.95
21 3 KO Veh 24h G1 80.96
22 1 KO Veh 24h G2 11.39
23 2 KO Veh 24h G2 12.90
24 3 KO Veh 24h G2 9.93
25 1 KO Veh 24h S 8.75
26 2 KO Veh 24h S 11.15
27 3 KO Veh 24h S 9.11
28 1 KO E2 24h G1 56.98
29 2 KO E2 24h G1 49.49
30 3 KO E2 24h G1 51.66
31 1 KO E2 24h G2 16.65
32 2 KO E2 24h G2 17.39
33 3 KO E2 24h G2 15.28
34 1 KO E2 24h S 26.37
35 2 KO E2 24h S 33.12
36 3 KO E2 24h S 33.06
37 1 WT Veh 48h G1 86.57
38 2 WT Veh 48h G1 88.80
39 3 WT Veh 48h G1 84.36
40 1 WT Veh 48h G2 7.27
41 2 WT Veh 48h G2 7.27
42 3 WT Veh 48h G2 9.91
43 1 WT Veh 48h S 6.16
44 2 WT Veh 48h S 3.94
45 3 WT Veh 48h S 5.73
46 1 WT E2 48h G1 65.06
47 2 WT E2 48h G1 67.54
48 3 WT E2 48h G1 71.00
49 1 WT E2 48h G2 6.55
50 2 WT E2 48h G2 8.06
51 3 WT E2 48h G2 7.06
52 1 WT E2 48h S 28.39
53 2 WT E2 48h S 24.40
54 3 WT E2 48h S 21.94
55 1 KO Veh 48h G1 79.23
56 2 KO Veh 48h G1 80.89
57 3 KO Veh 48h G1 82.71
58 1 KO Veh 48h G2 12.76
59 2 KO Veh 48h G2 11.20
60 3 KO Veh 48h G2 8.64
61 1 KO Veh 48h S 8.00
62 2 KO Veh 48h S 7.91
63 3 KO Veh 48h S 8.66
64 1 KO E2 48h G1 62.71
65 2 KO E2 48h G1 56.54
66 3 KO E2 48h G1 60.17
67 1 KO E2 48h G2 12.16
68 2 KO E2 48h G2 15.71
69 3 KO E2 48h G2 13.68
70 1 KO E2 48h S 25.13
71 2 KO E2 48h S 27.75
72 3 KO E2 48h S 26.16
73 1 WT Veh 72h G1 88.56
74 2 WT Veh 72h G1 86.13
75 3 WT Veh 72h G1 86.63
76 1 WT Veh 72h G2 6.42
77 2 WT Veh 72h G2 7.21
78 3 WT Veh 72h G2 9.17
79 1 WT Veh 72h S 5.02
80 2 WT Veh 72h S 6.66
81 3 WT Veh 72h S 4.21
82 1 WT E2 72h G1 75.00
83 2 WT E2 72h G1 68.69
84 3 WT E2 72h G1 72.46
85 1 WT E2 72h G2 7.53
86 2 WT E2 72h G2 12.33
87 3 WT E2 72h G2 8.15
88 1 WT E2 72h S 17.46
89 2 WT E2 72h S 18.98
90 3 WT E2 72h S 19.39
91 1 KO Veh 72h G1 78.14
92 2 KO Veh 72h G1 82.40
93 3 KO Veh 72h G1 84.60
94 1 KO Veh 72h G2 14.35
95 2 KO Veh 72h G2 11.35
96 3 KO Veh 72h G2 8.41
97 1 KO Veh 72h S 7.52
98 2 KO Veh 72h S 6.25
99 3 KO Veh 72h S 6.98
100 1 KO E2 72h G1 63.55
101 2 KO E2 72h G1 64.26
102 3 KO E2 72h G1 72.20
103 1 KO E2 72h G2 11.89
104 2 KO E2 72h G2 12.98
105 3 KO E2 72h G2 10.57
106 1 KO E2 72h S 24.56
107 2 KO E2 72h S 22.77
108 3 KO E2 72h S 17.23
At first glance, it seems like I am able to make the graph I want successfully using the following:
library(ggpubr)
df <- read.csv("raw-cell-cycle-data.csv")
df$Time <- factor(df$Time, levels = c("24h", "48h", "72h"), ordered = T)
df$Stage <- factor(df$Stage, levels = c("S", "G2", "G1"), ordered = T)
df$Sample <- factor(df$Sample, levels = c("WT Veh", "WT E2", "KO Veh", "KO E2", ordered = T))
colors <- c("#8c8c8c", "#f2f2f2", "#1a1a1a")
ggbarplot(df, x = "Sample", y = "Percent", add = "mean_se", color = "black", fill = "Stage", palette = colors)
scale_y_continuous(limits = c(0,105), breaks = c(0,25,50,75,100), expand = expansion(mult = c(0, .1)))
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12))
theme(axis.text.y = element_text(size = 12))
theme(axis.title = element_text(size = 16))
theme(text = element_text(size = 16))
facet_grid(~ Time)
Resulting plot: stacked bar plot attempt 1
However, if you examine the plot carefully you'll notice that the bar plots are exactly the same for each time point. Plotting the three time points individually, you will see that this most certainly is not the case:
library(dplyr)
data1 <- filter(df, Time == "24h")
data2 <- filter(df, Time == "48h")
data3 <- filter(df, Time == "72h")
p <- ggbarplot(data1, x = "Sample", y = "Percent", add = "mean_se", color = "black", fill = "Stage", palette = colors)
scale_y_continuous(limits = c(0,105), breaks = c(0,25,50,75,100), expand = expansion(mult = c(0, .1)))
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12))
theme(axis.text.y = element_text(size = 12))
theme(axis.title = element_text(size = 16))
theme(text = element_text(size = 16))
p
q <- ggbarplot(data2, x = "Sample", y = "Percent", add = "mean_se", color = "black", fill = "Stage", palette = colors)
scale_y_continuous(limits = c(0,105), breaks = c(0,25,50,75,100), expand = expansion(mult = c(0, .1)))
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12))
theme(axis.text.y = element_text(size = 12))
theme(axis.title = element_text(size = 16))
theme(text = element_text(size = 16))
q
r <- ggbarplot(data3, x = "Sample", y = "Percent", add = "mean_se", color = "black", fill = "Stage", palette = colors)
scale_y_continuous(limits = c(0,105), breaks = c(0,25,50,75,100), expand = expansion(mult = c(0, .1)))
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12))
theme(axis.text.y = element_text(size = 12))
theme(axis.title = element_text(size = 16))
theme(text = element_text(size = 16))
r
arranged_graph <- ggarrange(p,q,r, ncol = 3, nrow = 1)
arranged_graph
Resulting plot: Arranged graph of individual time points
The differences between the time points may be subtle, but there are differences nonetheless!
What is going on here that is causing facet_grid to repeat the data multiple times? Worst case I can make something work using the individual plots, but it would be great to understand what is going on with facet_grid!
CodePudding user response:
Update: Figured it out! The solution is to add facet.by = "Time" in the ggbarplot call. So in summary:
ggbarplot(df, x = "Sample", y = "Percent", add = "mean_se", color = "black", fill = "Stage", palette = colors, facet.by = "Time")
scale_y_continuous(limits = c(0,105), breaks = c(0,25,50,75,100), expand = expansion(mult = c(0, .1)))
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12))
theme(axis.text.y = element_text(size = 12))
theme(axis.title = element_text(size = 16))
theme(text = element_text(size = 16))
Which produces the following: New plot
I'm guessing facet_grid doesn't play well with ggpubr!
CodePudding user response:
If you use the facet.by=
option in the ggbarplot()
function, I think you'll get what you want:
df <- tibble::tribble(
~Replicate, ~Sample, ~Time, ~Stage, ~Percent,
1, "WT Veh", "24h", "G1", 85.67,
2, "WT Veh", "24h", "G1", 82.77,
3, "WT Veh", "24h", "G1", 83.28,
1, "WT Veh", "24h", "G2", 9.14,
2, "WT Veh", "24h", "G2", 10.37,
3, "WT Veh", "24h", "G2", 11.60,
1, "WT Veh", "24h", "S", 5.20,
2, "WT Veh", "24h", "S", 6.87,
3, "WT Veh", "24h", "S", 5.12,
1, "WT E2", "24h", "G1", 61.78,
2, "WT E2", "24h", "G1", 54.48,
3, "WT E2", "24h", "G1", 58.55,
1, "WT E2", "24h", "G2", 8.10,
2, "WT E2", "24h", "G2", 12.35,
3, "WT E2", "24h", "G2", 10.43,
1, "WT E2", "24h", "S", 30.12,
2, "WT E2", "24h", "S", 33.17,
3, "WT E2", "24h", "S", 31.02,
1, "KO Veh", "24h", "G1", 79.86,
2, "KO Veh", "24h", "G1", 75.95,
3, "KO Veh", "24h", "G1", 80.96,
1, "KO Veh", "24h", "G2", 11.39,
2, "KO Veh", "24h", "G2", 12.90,
3, "KO Veh", "24h", "G2", 9.93,
1, "KO Veh", "24h", "S", 8.75,
2, "KO Veh", "24h", "S", 11.15,
3, "KO Veh", "24h", "S", 9.11,
1, "KO E2", "24h", "G1", 56.98,
2, "KO E2", "24h", "G1", 49.49,
3, "KO E2", "24h", "G1", 51.66,
1, "KO E2", "24h", "G2", 16.65,
2, "KO E2", "24h", "G2", 17.39,
3, "KO E2", "24h", "G2", 15.28,
1, "KO E2", "24h", "S", 26.37,
2, "KO E2", "24h", "S", 33.12,
3, "KO E2", "24h", "S", 33.06,
1, "WT Veh", "48h", "G1", 86.57,
2, "WT Veh", "48h", "G1", 88.80,
3, "WT Veh", "48h", "G1", 84.36,
1, "WT Veh", "48h", "G2", 7.27,
2, "WT Veh", "48h", "G2", 7.27,
3, "WT Veh", "48h", "G2", 9.91,
1, "WT Veh", "48h", "S", 6.16,
2, "WT Veh", "48h", "S", 3.94,
3, "WT Veh", "48h", "S", 5.73,
1, "WT E2", "48h", "G1", 65.06,
2, "WT E2", "48h", "G1", 67.54,
3, "WT E2", "48h", "G1", 71.00,
1, "WT E2", "48h", "G2", 6.55,
2, "WT E2", "48h", "G2", 8.06,
3, "WT E2", "48h", "G2", 7.06,
1, "WT E2", "48h", "S", 28.39,
2, "WT E2", "48h", "S", 24.40,
3, "WT E2", "48h", "S", 21.94,
1, "KO Veh", "48h", "G1", 79.23,
2, "KO Veh", "48h", "G1", 80.89,
3, "KO Veh", "48h", "G1", 82.71,
1, "KO Veh", "48h", "G2", 12.76,
2, "KO Veh", "48h", "G2", 11.20,
3, "KO Veh", "48h", "G2", 8.64,
1, "KO Veh", "48h", "S", 8.00,
2, "KO Veh", "48h", "S", 7.91,
3, "KO Veh", "48h", "S", 8.66,
1, "KO E2", "48h", "G1", 62.71,
2, "KO E2", "48h", "G1", 56.54,
3, "KO E2", "48h", "G1", 60.17,
1, "KO E2", "48h", "G2", 12.16,
2, "KO E2", "48h", "G2", 15.71,
3, "KO E2", "48h", "G2", 13.68,
1, "KO E2", "48h", "S", 25.13,
2, "KO E2", "48h", "S", 27.75,
3, "KO E2", "48h", "S", 26.16,
1, "WT Veh", "72h", "G1", 88.56,
2, "WT Veh", "72h", "G1", 86.13,
3, "WT Veh", "72h", "G1", 86.63,
1, "WT Veh", "72h", "G2", 6.42,
2, "WT Veh", "72h", "G2", 7.21,
3, "WT Veh", "72h", "G2", 9.17,
1, "WT Veh", "72h", "S", 5.02,
2, "WT Veh", "72h", "S", 6.66,
3, "WT Veh", "72h", "S", 4.21,
1, "WT E2", "72h", "G1", 75.00,
2, "WT E2", "72h", "G1", 68.69,
3, "WT E2", "72h", "G1", 72.46,
1, "WT E2", "72h", "G2", 7.53,
2, "WT E2", "72h", "G2", 12.33,
3, "WT E2", "72h", "G2", 8.15,
1, "WT E2", "72h", "S", 17.46,
2, "WT E2", "72h", "S", 18.98,
3, "WT E2", "72h", "S", 19.39,
1, "KO Veh", "72h", "G1", 78.14,
2, "KO Veh", "72h", "G1", 82.40,
3, "KO Veh", "72h", "G1", 84.60,
1, "KO Veh", "72h", "G2", 14.35,
2, "KO Veh", "72h", "G2", 11.35,
3, "KO Veh", "72h", "G2", 8.41,
1, "KO Veh", "72h", "S", 7.52,
2, "KO Veh", "72h", "S", 6.25,
3, "KO Veh", "72h", "S", 6.98,
1, "KO E2", "72h", "G1", 63.55,
2, "KO E2", "72h", "G1", 64.26,
3, "KO E2", "72h", "G1", 72.20,
1, "KO E2", "72h", "G2", 11.89,
2, "KO E2", "72h", "G2", 12.98,
3, "KO E2", "72h", "G2", 10.57,
1, "KO E2", "72h", "S", 24.56,
2, "KO E2", "72h", "S", 22.77,
3, "KO E2", "72h", "S", 17.23)
library(ggpubr)
#> Loading required package: ggplot2
df$Time <- factor(df$Time, levels = c("24h", "48h", "72h"), ordered = T)
df$Stage <- factor(df$Stage, levels = c("S", "G2", "G1"), ordered = T)
df$Sample <- factor(df$Sample, levels = c("WT Veh", "WT E2", "KO Veh", "KO E2", ordered = T))
colors <- c("#8c8c8c", "#f2f2f2", "#1a1a1a")
ggbarplot(df, x = "Sample", y = "Percent", add = "mean_se", color = "black", fill = "Stage", palette = colors, facet.by = "Time")
scale_y_continuous(limits = c(0,105), breaks = c(0,25,50,75,100), expand = expansion(mult = c(0, .1)))
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12))
theme(axis.text.y = element_text(size = 12))
theme(axis.title = element_text(size = 16))
theme(text = element_text(size = 16))
Created on 2022-04-07 by the reprex package (v2.0.1)