Can see there are similar questions about actually plotting it but I’m really struggling to get the data organised correctly. I have two vectors storing goals scored from 100,000 simulated football matches for two teams (Home and Away). My end goal is a side by side bar plot showing the frequency for each number of goals.
I’ve used table() to show frequency and then merged them with NA as 0 so that they end up the same length but when I’m trying to use ggplot2 to plot, I’m running into a lot of issues because with how they’re merged I end up with HomeGoals (as in 0, 1, 2, 3, 4, 5), Freq.x and Freq.y (frequencies for Horm/Away) as column headings
Is there a better way to do this? Any help appreciated!
CodePudding user response:
Try using position_dodge()
with ggplot2
Keep in mind that the data has to be in a long format like in the example data.
ggplot(df1) geom_bar( aes(values, fill=ind), position=position_dodge() )
Data
df1 <- structure(list(values = c(3L, 7L, 4L, 8L, 10L, 5L, 7L, 9L, 1L,
8L, 7L, 0L, 8L, 7L, 0L, 10L, 6L, 9L, 9L, 2L, 3L, 10L, 9L, 8L,
5L, 4L, 1L, 6L, 0L, 2L, 5L, 7L, 2L, 9L, 10L, 9L, 2L, 8L, 9L,
4L, 4L, 3L, 8L, 0L, 5L, 10L, 9L, 9L, 7L, 4L, 10L, 1L, 2L, 7L,
1L, 4L, 5L, 10L, 5L, 8L, 8L, 2L, 0L, 9L, 1L, 7L, 3L, 5L, 3L,
10L, 6L, 8L, 6L, 1L, 3L, 7L, 4L, 10L, 0L, 9L, 5L, 0L, 0L, 10L,
9L, 0L, 5L, 1L, 4L, 9L, 3L, 8L, 4L, 6L, 4L, 8L, 9L, 1L, 6L, 8L,
3L, 2L, 5L, 5L, 5L, 0L, 0L, 0L, 10L, 7L, 0L, 3L, 3L, 10L, 4L,
8L, 6L, 3L, 0L, 10L, 1L, 2L, 4L, 5L, 7L, 10L, 1L, 9L, 7L, 4L,
9L, 2L, 5L, 9L, 0L, 5L, 9L, 0L, 8L, 6L, 10L, 5L, 0L, 4L, 6L,
2L, 0L, 2L, 9L, 7L, 9L, 4L, 9L, 9L, 0L, 9L, 2L, 9L, 5L, 0L, 10L,
0L, 3L, 0L, 7L, 3L, 3L, 1L, 6L, 0L, 4L, 6L, 2L, 3L, 4L, 1L, 7L,
10L, 6L, 1L, 9L, 7L, 2L, 3L, 1L, 7L, 3L, 10L, 10L, 1L, 5L, 2L,
1L, 3L, 8L, 0L, 8L, 6L, 1L, 8L, 7L, 4L, 4L, 5L, 2L, 2L, 7L, 4L,
8L, 4L, 4L, 7L, 3L, 8L, 8L, 4L, 7L, 4L, 10L, 2L, 4L, 1L, 0L,
8L, 5L, 3L, 2L, 0L, 0L, 5L, 8L, 6L, 6L, 9L, 7L, 1L, 1L, 10L,
10L, 5L, 8L, 10L, 2L, 0L, 2L, 10L, 3L, 10L, 4L, 7L, 1L, 1L, 7L,
1L, 8L, 8L, 4L, 0L, 9L, 3L, 2L, 3L, 3L, 10L, 3L, 5L, 0L, 2L,
2L, 2L, 10L, 2L, 7L, 8L, 4L, 10L, 4L, 6L, 3L, 9L, 0L, 9L, 6L,
5L, 5L, 8L, 3L, 1L, 7L, 4L, 3L, 9L, 6L, 10L, 6L, 8L, 1L, 9L,
10L, 0L, 1L, 6L, 6L, 8L, 10L, 2L, 8L, 5L, 3L, 8L, 4L, 9L, 10L,
1L, 8L, 4L, 10L, 5L, 10L, 0L, 6L, 1L, 7L, 5L, 5L, 10L, 8L, 8L,
7L, 10L, 4L, 4L, 7L, 10L, 10L, 7L, 7L, 8L, 6L, 3L, 5L, 3L, 5L,
10L, 1L, 5L, 10L, 3L, 4L, 0L, 9L, 7L, 2L, 9L, 1L, 3L, 10L, 9L,
3L, 4L, 9L, 0L, 2L, 3L, 1L, 10L, 9L, 10L, 0L, 0L, 2L, 8L, 10L,
10L, 5L, 4L, 1L, 10L, 10L, 5L, 0L, 8L, 6L, 8L, 7L, 1L, 6L, 7L,
5L, 1L, 3L, 2L, 2L, 8L, 7L, 9L, 9L, 5L, 0L, 9L), ind = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), class = "factor", .Label = c("home",
"away"))), class = "data.frame", row.names = c(NA, -400L))
CodePudding user response:
It's hard to understand what kind of issues you are running into. Here's an attempt with simulated data. You may want to consider how you organize your data. I'm assuming you have a data frame (or two vectors).
set.seed(1)
df = data.frame(home = sample(0:8, size = 1000, replace = T),
away = sample(0:6, size = 1000, replace = T))
require(ggplot2)
require(gridExtra)
p1 = ggplot(df)
geom_histogram(aes(x = home)) ggtitle("Home") xlab("Goals")
p2 = ggplot(df)
geom_histogram(aes(x = away)) ggtitle("Away") xlab("Goals")
grid.arrange(p1, p2, ncol=2)
To get bars side by side:
df2 = reshape2::melt(df1, value.name = 'score', variable.names = 'team')
df3 = as.data.frame(table(df2$score, df2$variable))
ggplot(df3, aes(x=Var1, y=Freq, fill=Var2))
geom_bar(position="dodge", stat="identity")