I would like to create a bar graph using data in long format.
Here is my code:
library(data.table)
library(dplyr)
library(ggplot2)
dv1 = runif(n = 100, min = 1, max = 7)
dv2 = runif(n = 100, min = 1, max = 7)
dv3 = runif(n = 100, min = 1, max = 7)
country <- rep(c("India", "US", "Poland"), length.out = 100)
df <- data.frame(country, dv1, dv2, dv3)
df$casenum <- seq.int(nrow(df))
df2 <- df %>% select(casenum, country, dv1, dv2, dv3)
df.melt <- data.table::melt(setDT(df2), id = 1L,
measure = list(c(3,4,5)),
value.name = c("dv"))
df.melt2 <- df2 %>%
select(casenum, country)
df.melt.final <- dplyr::left_join(df.melt, df.melt2, by="casenum")
ggplot(df.melt.final, aes(fill=variable, y=dv, x=country))
geom_bar(position="dodge", stat="identity")
The bar graph looks like this, but the means on the graph do not correspond with the actual means in the data. What can I do with it?
CodePudding user response:
You'll want to calculate the mean before plotting. E.g.
library(ggplot2)
library(dplyr)
df.melt.final |>
group_by(country, variable) |>
summarise(dv = mean(dv)) |>
ggplot(aes(fill=variable, y=dv, x=country))
geom_bar(position="dodge", stat="identity")
Output:
Check means:
# A tibble: 9 × 3
country variable dv
<chr> <fct> <dbl>
1 India dv1 3.97
2 India dv2 3.97
3 India dv3 3.91
4 Poland dv1 3.97
5 Poland dv2 3.73
6 Poland dv3 4.18
7 US dv1 4.16
8 US dv2 4.16
9 US dv3 4.02
CodePudding user response:
Another option is saying stat = "summary"
and fun = "mean"
in your geom_bar
like this:
library(data.table)
library(dplyr)
library(ggplot2)
set.seed(7)
dv1 = runif(n = 100, min = 1, max = 7)
dv2 = runif(n = 100, min = 1, max = 7)
dv3 = runif(n = 100, min = 1, max = 7)
country <- rep(c("India", "US", "Poland"), length.out = 100)
df <- data.frame(country, dv1, dv2, dv3)
df$casenum <- seq.int(nrow(df))
df2 <- df %>% select(casenum, country, dv1, dv2, dv3)
df.melt <- data.table::melt(setDT(df2), id = 1L,
measure = list(c(3,4,5)),
value.name = c("dv"))
df.melt2 <- df2 %>%
select(casenum, country)
df.melt.final <- dplyr::left_join(df.melt, df.melt2, by="casenum")
ggplot(df.melt.final, aes(fill=variable, y=dv, x=country))
geom_bar(position="dodge", stat = "summary", fun = "mean")
# This are the means to show
df.melt.final %>%
group_by(country, variable) %>%
summarise(dv = mean(dv))
#> `summarise()` has grouped output by 'country'. You can override using the
#> `.groups` argument.
#> # A tibble: 9 × 3
#> # Groups: country [3]
#> country variable dv
#> <chr> <fct> <dbl>
#> 1 India dv1 4.18
#> 2 India dv2 3.97
#> 3 India dv3 4.34
#> 4 Poland dv1 4.14
#> 5 Poland dv2 4.25
#> 6 Poland dv3 4.28
#> 7 US dv1 3.84
#> 8 US dv2 4.66
#> 9 US dv3 3.66
Created on 2022-08-26 with reprex v2.0.2