Home > Blockchain >  Why does scale_x_discrete recycle data?
Why does scale_x_discrete recycle data?

Time:01-31

Given this data:

library(tidyverse)
library(ggplot2)
library(ggtext)


df <- structure(
  list(
    variable = c(
      "var1",
      "var1",
      "var1",
      "var2",
      "var2",
      "var2",
      "var3",
      "var3",
      "var3",
      "var4",
      "var4",
      "var4"
    ),
    gruppe = structure(
      c(1L, 7L, 16L,
        1L, 7L, 16L, 1L, 7L, 16L, 1L, 7L, 16L),
      levels = c(
        "AAAA",
        "BBBB",
        "CCCC",
        "DDDD",
        "EEEE",
        "FFFF",
        "GGGG",
        "HHHH",
        "IIII",
        "JJJJ",
        "KKKK",
        "LLLL",
        "MMMM",
        "NNNN",
        "OOOO",
        "PPPP GGGG"
      ),
      class = "factor"
    ),
    auspraegung = c(
      "lorem ipsum",
      "lorem ipsum",
      "lorem ipsum",
      "lorem ipsum",
      "lorem ipsum",
      "lorem ipsum",
      "lorem ipsum",
      "lorem ipsum",
      "lorem ipsum",
      "lorem ipsum",
      "lorem ipsum",
      "lorem ipsum"
    ),
    n = c(148L, 32L, 89L, 135L, 31L, 93L,
          123L, 28L, 76L, 115L, 26L, 67L),
    prozent = c(88.1, 84.2,
                85, 81.3, 81.6, 89, 74.1, 73.7, 72, 69.7, 68.4, 65),
    gruppen_variablen_summe = c(168L,
                                38L, 105L, 166L, 38L, 105L, 166L, 38L, 105L, 165L, 38L, 103L),
    auspr_n = c(
      "88.1% \nn = 148",
      "84.2% \nn = 32",
      "85% \nn = 89",
      "81.3% \nn = 135",
      "81.6% \nn = 31",
      "89% \nn = 93",
      "74.1% \nn = 123",
      "73.7% \nn = 28",
      "72% \nn = 76",
      "69.7% \nn = 115",
      "68.4% \nn = 26",
      "65% \nn = 67"
    ),
    gruppe_n = c(
      "AAAA \nn = 168",
      "GGGG \nn = 38",
      "PPPP GGGG \nn = 105",
      "AAAA \nn = 166",
      "GGGG \nn = 38",
      "PPPP GGGG \nn = 105",
      "AAAA \nn = 166",
      "GGGG \nn = 38",
      "PPPP GGGG \nn = 105",
      "AAAA \nn = 165",
      "GGGG \nn = 38",
      "PPPP GGGG \nn = 103"
    )
  ),
  class = c("grouped_df", "tbl_df", "tbl", "data.frame"),
  row.names = c(NA,-12L),
  groups = structure(
    list(
      variable = c(
        "var1",
        "var1",
        "var1",
        "var2",
        "var2",
        "var2",
        "var3",
        "var3",
        "var3",
        "var4",
        "var4",
        "var4"
      ),
      gruppe = structure(
        c(1L,
          7L, 16L, 1L, 7L, 16L, 1L, 7L, 16L, 1L, 7L, 16L),
        levels = c(
          "AAAA",
          "BBBB",
          "CCCC",
          "DDDD",
          "EEEE",
          "FFFF",
          "GGGG",
          "HHHH",
          "IIII",
          "JJJJ",
          "KKKK",
          "LLLL",
          "MMMM",
          "NNNN",
          "OOOO",
          "PPPP GGGG"
        ),
        class = "factor"
      ),
      .rows = structure(
        list(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
             10L, 11L, 12L),
        ptype = integer(0),
        class = c("vctrs_list_of",
                  "vctrs_vctr", "list")
      )
    ),
    class = c("tbl_df", "tbl", "data.frame"),
    row.names = c(NA, -12L),
    .drop = TRUE
  )
)

varlabels <- c(
  "var1" = "Text var 1",
  "var2" = "Text var 2",
  "var3" = "Text var 3",
  "var4" = "Text var 4"
)

Because I want to have the bars in the plot in a certain order, I assign an id:

df$gruppen_id <- rep(c(2, 1, 3), times = 4)

Now I create a plot, which is also ok. Unfortunately, the values for n (total) are not displayed correctly in the axis labeling. Here the values seem to be recycled.

    df %>%
  ggplot(aes(
    x = fct_reorder(str_wrap(gruppe, width = 30),-gruppen_id),
    y = prozent,
    fill = gruppe
  ))  
  geom_bar(
    na.rm = T,
    stat = "identity",
    position = position_dodge2(preserve = "total"),
    show.legend = F
  )  
  geom_text(
    aes(label = paste(sprintf("%1.1f%%", prozent), "\nn =", n)),
    size = 3,
    hjust = 1.3,
    vjust = 0.5,
    position = position_dodge2(.9),
    colour = "white"
  )  
  scale_fill_manual(values = c("#6DBE99", "#004B93", "#389DC3"))  
  labs(title = "Text title\n")  
  labs(caption = "Text caption")  
  coord_flip()  
  facet_wrap(
    ~ variable,
    nrow = 4,
    as.table = T,
    labeller = as_labeller(varlabels)
  )  
  xlab("")  
  ylab("")  
  guides(fill = guide_legend(title = ""))  
  expand_limits(x = 2, y = 100)  
  scale_x_discrete(label = paste(
    str_wrap(df$gruppe, 30),
    "\nn (total) =",
    df$gruppen_variablen_summe
  ))  
  scale_y_continuous(
    expand = c(NA, 100),
    breaks = c(0, 25, 50, 75, 100),
    labels = c("0%", "25%", "50%", "75%", "100%\n")
  )  
  theme(
    panel.background = element_rect(fill = "white", colour = NA),
    panel.grid.major.x = element_line(color = "#EAEAEA"),
    axis.title.x = element_blank(),
    axis.ticks.x = element_blank(),
    axis.ticks.y = element_blank(),
    strip.text.x = element_text(
      lineheight = 1,
      size = 10,
      angle = 0,
      hjust = 0
    ),
    strip.background = element_rect(color = NA, fill = NA),
    plot.background = element_rect(fill = "white", color = "white"),
    panel.spacing.y = unit(2, "lines"),
    plot.title = element_text(
      size = 11,
      lineheight = 1,
      face = "bold",
      hjust = -0.5,
      vjust = -2
    ),
    plot.margin = margin(0.3, 0.5, 0.5, 0.3, "cm"),
    plot.caption.position = "plot",
    plot.caption = element_text(
      hjust = 0,
      vjust = 0.5,
      margin = margin(0, 0, 0, 0.1, "cm")
    )
  )

enter image description here

CodePudding user response:

IMHO assigning vectors from a dataframe directly to the labels or ... argument of a scale is always error prone and in most cases there are easier approaches to achieve the desired result, i.e. in your case add a column to your data which contains your x axis labels. Then map this column on x and set scales="free_y" in facet_wrap:

Note: I have stripped down your code to a more minimal example.

library(ggplot2)

df$x <- paste(str_wrap(df$gruppe, 30), "\nn (total) =", df$gruppen_variablen_summe)

ggplot(df, aes(
    x = fct_reorder(x, -gruppen_id),
    y = prozent,
    fill = gruppe
  ))  
  geom_bar(
    na.rm = T,
    stat = "identity",
    position = position_dodge2(preserve = "total"),
    show.legend = F
  )  
  scale_fill_manual(values = c("#6DBE99", "#004B93", "#389DC3"))  
  labs(title = "Text title\n", x = NULL, y = NULL)  
  labs(caption = "Text caption")  
  coord_flip()  
  facet_wrap(
    ~ variable,
    nrow = 4,
    as.table = T,
    labeller = as_labeller(varlabels),
    scales = "free_y"
  )  
  guides(fill = guide_legend(title = ""))  
  expand_limits(x = 2, y = 100)  
  scale_y_continuous(
    expand = c(NA, 100),
    breaks = c(0, 25, 50, 75, 100),
    labels = c("0%", "25%", "50%", "75%", "100%\n")
  )

enter image description here

  • Related