Home > Software engineering >  R: Bar chart with one bar for every answer alternative
R: Bar chart with one bar for every answer alternative

Time:12-18

This code produces a stacked bar chart, that is fine. But I'd like to do a regular bar chart as well, but I don't know how to modify this code for that purpose. So, in that regular bar chart there would be three bars, one for every answer alternative that are "yes", "no" and "maybe". I find this surprisingly difficult to do. What would be a smart way to do that?

mydata %>%
  
  mutate(
    variable = recode(
      variable,
      'item3' = 'item name here')) %>%
  
  drop_na() %>%
  ggplot(mapping = aes(x = variable))  
  geom_bar(aes(fill = value),
           position = 'fill',
           width = 0.30) 
  scale_y_continuous(name = 'Percentage',
                     breaks = seq(from = 0, to = 1, by = 0.2),
                     minor_breaks = seq(from = 0, to = 1, by = 0.1),
                     labels = seq(from = 0, to = 100, by = 20),
                     expand = c(0,0))  
  xlab( element_blank() )  
  scale_fill_manual(
    values = c(colour="red"), #if this was stacked bar chart, here would be a color palette
    labels = c(
      'yes',
      'no',
      'maybe'
    ),
    drop = FALSE
  )  
  
  guides(
    fill = guide_legend(title = 'Answer')
  )   theme(panel.background = element_blank(),
            axis.ticks.y = element_blank(),
            panel.grid.major = element_line(colour = 'grey'),
            axis.ticks.x = element_line(colour = 'grey'),
            panel.grid.minor = element_line(colour = 'lightgrey'),
            axis.text.y = element_text(
              size = 11,
              face = 'bold',
              hjust = 0
            ))  
  coord_flip()

Here's the data I used

structure(list(variable = c("item3", "item3", "item3", "item3", 
"item3", "item3", "item3", "item3", "item3", "item3", "item3", 
"item3", "item3", "item3", "item3", "item3", "item3", "item3", 
"item3", "item3", "item3", "item3", "item3", "item3", "item3", 
"item3", "item3", "item3", "item3", "item3", "item3", "item3", 
"item3", "item3", "item3", "item3", "item3", "item3", "item3", 
"item3", "item3", "item3", "item3", "item3", "item3", "item3", 
"item3", "item3", "item3", "item3", "item3", "item3", "item3", 
"item3", "item3", "item3", "item3", "item3", "item3", "item3", 
"item3", "item3", "item3", "item3", "item3", "item3", "item3", 
"item3", "item3", "item3", "item3", "item3", "item3", "item3", 
"item3", "item3", "item3", "item3", "item3", "item3", "item3", 
"item3", "item3", "item3", "item3", "item3", "item3", "item3", 
"item3", "item3", "item3", "item3", "item3", "item3", "item3", 
"item3", "item3", "item3", "item3", "item3", "item3", "item3", 
"item3", "item3", "item3", "item3", "item3", "item3", "item3", 
"item3", "item3", "item3", "item3", "item3", "item3", "item3", 
"item3", "item3", "item3", "item3", "item3", "item3", "item3", 
"item3", "item3", "item3", "item3", "item3", "item3", "item3", 
"item3", "item3", "item3", "item3", "item3", "item3", "item3", 
"item3", "item3", "item3", "item3", "item3", "item3", "item3", 
"item3", "item3", "item3"), value = structure(c(1L, 1L, 2L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 3L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L), .Label = c("1", 
"2", "3"), class = c("ordered", "factor"))), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -147L))

CodePudding user response:

Maybe this is what you are looking for. To get a bar for each of your values you have to map value on x or y and drop position="fill". That works fine if you want to the counts. If you want to show the percentages then one approach would be to compute the counts and percentages manually outside of ggplot:

library(dplyr)
library(tidyr)
library(ggplot2)

mydata <- mydata %>%
  mutate(
    variable = recode(
      variable,
      "item3" = "item name here"
    )
  ) %>%
  drop_na() %>%
  mutate(value = recode(value, "1" = "yes", "2" = "no", "3" = "maybe")) %>%
  count(variable, value) |> 
  group_by(variable) |> 
  mutate(pct = n / sum(n))

ggplot(mydata, mapping = aes(x = value, y = pct))  
  geom_col(aes(fill = value),
    width = 0.30
  )  
  scale_y_continuous(
    breaks = seq(from = 0, to = 1, by = 0.2),
    minor_breaks = seq(from = 0, to = 1, by = 0.1),
    labels = seq(from = 0, to = 100, by = 20),
    limits = c(0, 1),
    expand = c(0, 0)
  )  
  scale_fill_manual(
    values = c(yes = "green", no = "red", maybe = "grey80"),
    drop = FALSE
  )  
  labs(x = "Answer", y = "Percentage")  
  guides(
    fill = "none"
  )  
  theme(
    panel.background = element_blank(),
    panel.grid.major = element_line(colour = "grey"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_line(colour = "lightgrey"),
    axis.ticks.x = element_blank(),
    axis.ticks.y = element_line(colour = "grey"),
    axis.text.x = element_text(
      size = 11,
      face = "bold",
      hjust = 0
    )
  )

enter image description here

  • Related