Home > Software design >  R: mean displayed wrong in ggplot
R: mean displayed wrong in ggplot

Time:08-16

So i am trying to show a connection in strategic voting and age using a linear regression. To that end, I have created the following df showing the mean of the people who voted or didnt vote strategically.

structure(list(StrategischeWahl2021 = c("0", "1"), Q3 = c(26.8603351955307, 27.6375)),class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -2L))

I then proceeded to plot this Graph, but as you can see, my geompoints are way of. The real means are 26.8 and 27.6, but in the graph they appear somewhere around 48-51. I have no clue what is causing this, and would appreciate any insight as to whats causing this and how it can be fixed. Thank you very much in advance ;)

Code used for graph:

    Deskriptive_Statistik %>% 
  ggplot() 
  aes(x=StrategischeWahl2021, y=Q3) 
  geom_boxplot(width = .1) 
  geom_jitter(width = .1, alpha = .1) 
  geom_point(data= Alter_Strat2021,
             color="red",
             size=5,
             shape=17) 
  geom_line(data=Alter_Strat2021,
            group=1,
            color="red")

Data Deskriptive_Statistik:

    dput(Deskriptive_Stistik)
structure(list(Q3 = c("24", "20", "20", "19", "21", "33", "27", 
"20", "53", "31", "21", "22", "21", "20", "25", "21", "24", "29", 
"53 ", "20", "21", "22", "48", "28", "20", "23", "29", "29", 
"23", "41", "29", "21", "29", "47", "23", "53", "34", "19", "23", 
"24", "29", "29", "20", "22", "29", "25", "21", "22", "29", "20", 
"30", "21", "23", "19", "23", "18", "25", "22", "28", "25", "22", 
"21", "24", "24", "29", "55", "20", "20", "21", "20", "28", "22", 
"21", "22", "20", "31", "22", "20", "31", "22", "22", "30", "20", 
"22", "18", "23", "55", "22", "25", "25", "21", "39", "22", "20", 
"49", "58", "20", "19", "21", "22", "29", "23", "32", "35", "20", 
"20", "21", "28", "24", "28", "60", "70", "43", "21", "25", "60", 
"34", "54", "24", "25", "23", "21", "48", "20", "25", "24", "21", 
"25", "22", "24", "21", "22", "21", "18", "22", "21", "22", "18", 
"19", "71", "23", "26", "18", "24", "21", "51", "37", "41", "23", 
"25", "22", "35", "21", "18", "22", "29", "26", "21", "22", "23", 
"43", "22", "23", "22", "21", "69", "20", "25", "54", "20", "26", 
"28", "23", "28", "38", "21", "22", "78", "23", "25", "25", "63", 
"32", "33", "20", "21", "20", "23", "21", "24", "19", "24", "37", 
"21", "26", "24", "21", "23", "21", "19", "22", "22", "25", "20", 
"22", "22", "19", "30", "19", "22", "19", "26", "23", "25", "21", 
"36", "25", "22", "23", "22", "23", "22", "20", "21", "29", "22", 
"19", "22", "22", "60", "29", "21", "20", "21", "23", "21", "23", 
"19", "60", "59", "20", "23", "60", "23", "24", "22", "22", "27", 
"23", "19", "22", "18", "21", "22", "19", "68", "26", "21", "20"
), StrategischeWahl2021 = c("0", "0", "0", "0", "0", "0", "0", 
"0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", 
"0", "0", "0", "1", "1", "1", "0", "0", "0", "0", "1", "0", "0", 
"0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "1", "0", 
"1", "1", "0", "1", "1", "0", "1", "0", "1", "0", "0", "0", "0", 
"0", "0", "1", "0", "0", "0", "1", "1", "0", "0", "0", "0", "0", 
"0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "1", "0", "1", 
"0", "0", "1", "0", "1", "1", "0", "0", "0", "1", "1", "0", "1", 
"0", "1", "1", "0", "0", "0", "0", "0", "1", "0", "0", "0", "1", 
"0", "1", "1", "1", "1", "0", "1", "1", "0", "0", "0", "0", "1", 
"0", "1", "0", "0", "0", "1", "0", "0", "0", "1", "1", "0", "0", 
"1", "0", "1", "0", "0", "0", "0", "0", "1", "0", "0", "0", "1", 
"1", "0", "0", "0", "0", "1", "0", "1", "0", "0", "0", "1", "0", 
"1", "0", "0", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", 
"0", "0", "1", "1", "0", "1", "0", "1", "0", "0", "0", "0", "0", 
"1", "0", "1", "0", "1", "0", "0", "1", "0", "0", "0", "0", "0", 
"0", "0", "1", "1", "0", "0", "0", "1", "0", "0", "1", "0", "1", 
"0", "0", "0", "0", "0", "1", "0", "1", "1", "0", "1", "0", "0", 
"1", "0", "0", "0", "0", "1", "0", "1", "0", "1", "1", "1", "0", 
"0", "0", "0", "1", "0", "0", "0", "1", "1", "0", "0", "0", "1", 
"1", "0", "0", "0", "0")), class = c("grouped_df", "tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -259L), groups = structure(list(
    StrategischeWahl2021 = c("0", "1"), .rows = structure(list(
        c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 
        14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 27L, 
        28L, 29L, 30L, 32L, 33L, 34L, 35L, 36L, 37L, 39L, 40L, 
        41L, 42L, 43L, 44L, 46L, 49L, 52L, 54L, 56L, 57L, 58L, 
        59L, 60L, 61L, 63L, 64L, 65L, 68L, 69L, 70L, 71L, 72L, 
        73L, 74L, 75L, 76L, 77L, 78L, 79L, 81L, 82L, 84L, 86L, 
        87L, 89L, 92L, 93L, 94L, 97L, 99L, 102L, 103L, 104L, 
        105L, 106L, 108L, 109L, 110L, 112L, 117L, 120L, 121L, 
        122L, 123L, 125L, 127L, 128L, 129L, 131L, 132L, 133L, 
        136L, 137L, 139L, 141L, 142L, 143L, 144L, 145L, 147L, 
        148L, 149L, 152L, 153L, 154L, 155L, 157L, 159L, 160L, 
        161L, 163L, 165L, 166L, 168L, 169L, 172L, 173L, 174L, 
        175L, 176L, 177L, 178L, 181L, 183L, 185L, 186L, 187L, 
        188L, 189L, 191L, 193L, 195L, 196L, 198L, 199L, 200L, 
        201L, 202L, 203L, 204L, 207L, 208L, 209L, 211L, 212L, 
        214L, 216L, 217L, 218L, 219L, 220L, 222L, 225L, 227L, 
        228L, 230L, 231L, 232L, 233L, 235L, 237L, 241L, 242L, 
        243L, 244L, 246L, 247L, 248L, 251L, 252L, 253L, 256L, 
        257L, 258L, 259L), c(13L, 24L, 25L, 26L, 31L, 38L, 45L, 
        47L, 48L, 50L, 51L, 53L, 55L, 62L, 66L, 67L, 80L, 83L, 
        85L, 88L, 90L, 91L, 95L, 96L, 98L, 100L, 101L, 107L, 
        111L, 113L, 114L, 115L, 116L, 118L, 119L, 124L, 126L, 
        130L, 134L, 135L, 138L, 140L, 146L, 150L, 151L, 156L, 
        158L, 162L, 164L, 167L, 170L, 171L, 179L, 180L, 182L, 
        184L, 190L, 192L, 194L, 197L, 205L, 206L, 210L, 213L, 
        215L, 221L, 223L, 224L, 226L, 229L, 234L, 236L, 238L, 
        239L, 240L, 245L, 249L, 250L, 254L, 255L)), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -2L), .drop = TRUE))

CodePudding user response:

We need to convert the column 'Q3' to numeric

library(dplyr)
library(ggplot2)
Deskriptive_Statistik %>% 
  ungroup %>% mutate(Q3 = as.numeric(Q3)) %>%
  ggplot() 
  aes(x=StrategischeWahl2021, y=Q3) 
  geom_boxplot(width = .1) 
  geom_jitter(width = .1, alpha = .1) 
  geom_point(data= Alter_Strat2021,
             color="red",
             size=5,
             shape=17)   geom_line(data=Alter_Strat2021,
            group=1,
            color="red")

-output

enter image description here

  • Related