So I am trying to plot a linear regression between income and wether someone voted strategically, but my code does not quite work. When I plotted the linear regression between Age and strategic Voting it worked quite well as you can see here
Using the same code for my income variable however does not work - I have the boxplots but no connecting line
I would appreciate any insight into why it is not working for my income variable. I have the feeling it might have to do something with the NAs introduced in the income variable, but I couldn´t fix it. Thank you very much in Advance ;)
The code used for age and strategic voting:
Alter_Strat2021<- Deskriptive_Statistik %>%
select(Q3, StrategischeWahl2021) %>%
ungroup %>%
group_by(StrategischeWahl2021) %>%
summarise(Q3 = mean(as.numeric(Q3)))
mean(Desk_NumericQ3) -> MeanAlter2021
Deskriptive_Statistik %>%
ungroup %>% mutate(Q3 = as.numeric(Q3)) %>%
ggplot()
aes(x=StrategischeWahl2021, y=Q3)
geom_boxplot(width = .1)
geom_jitter(width = .1, alpha = .1)
geom_point(data= Alter_Strat2021,
color="red",
size=5,
shape=17) geom_line(data=Alter_Strat2021,
group=1,
color="red")
scale_y_continuous(breaks=seq(0,80,5))
xlab("Strategische Wahl 2021")
ylab ("Alter")
ggtitle ("Lineare Regression Zusammenhang zwischen Alter und Strategische Wahl 2021")
scale_x_discrete(labels=c("Nein", "Ja"))
theme_minimal()
The Code used for income and strategic voting:
Einkommen_Strat2021<- Deskriptive_Statistik %>%
select(Q5, StrategischeWahl2021) %>%
ungroup %>%
group_by(StrategischeWahl2021) %>%
summarise(Q5 = mean(as.numeric(Q5)))
mean(Desk_NumericQ5) -> MeanEinkommen
Deskriptive_Statistik %>%
ungroup %>% mutate(Q5 = as.numeric(Q5)) %>%
ggplot()
aes(x=StrategischeWahl2021, y=Q5)
geom_boxplot(width = .1)
geom_jitter(width = .1, alpha = .1)
scale_y_continuous(breaks=seq(0,20000,5000))
geom_point(data= Einkommen_Strat2021,
color="red",
size=5,
shape=17) geom_line(data=Einkommen_Strat2021,
group=1,
color="red")
xlab("Strategische Wahl 2021")
ylab ("Einkommen")
ggtitle ("Lineare Regression Zusammenhang zwischen Einkommen und Strategische Wahl 2021")
Data for age and strategic voting:
dput(Alter_Strat2021)
structure(list(StrategischeWahl2021 = c("0", "1"), Q3 = c(26.8603351955307,
27.6375)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-2L))
> dput(Desk_NumericQ3)
c(24, 20, 20, 19, 21, 33, 27, 20, 53, 31, 21, 22, 21, 20, 25,
21, 24, 29, 53, 20, 21, 22, 48, 28, 20, 23, 29, 29, 23, 41, 29,
21, 29, 47, 23, 53, 34, 19, 23, 24, 29, 29, 20, 22, 29, 25, 21,
22, 29, 20, 30, 21, 23, 19, 23, 18, 25, 22, 28, 25, 22, 21, 24,
24, 29, 55, 20, 20, 21, 20, 28, 22, 21, 22, 20, 31, 22, 20, 31,
22, 22, 30, 20, 22, 18, 23, 55, 22, 25, 25, 21, 39, 22, 20, 49,
58, 20, 19, 21, 22, 29, 23, 32, 35, 20, 20, 21, 28, 24, 28, 60,
70, 43, 21, 25, 60, 34, 54, 24, 25, 23, 21, 48, 20, 25, 24, 21,
25, 22, 24, 21, 22, 21, 18, 22, 21, 22, 18, 19, 71, 23, 26, 18,
24, 21, 51, 37, 41, 23, 25, 22, 35, 21, 18, 22, 29, 26, 21, 22,
23, 43, 22, 23, 22, 21, 69, 20, 25, 54, 20, 26, 28, 23, 28, 38,
21, 22, 78, 23, 25, 25, 63, 32, 33, 20, 21, 20, 23, 21, 24, 19,
24, 37, 21, 26, 24, 21, 23, 21, 19, 22, 22, 25, 20, 22, 22, 19,
30, 19, 22, 19, 26, 23, 25, 21, 36, 25, 22, 23, 22, 23, 22, 20,
21, 29, 22, 19, 22, 22, 60, 29, 21, 20, 21, 23, 21, 23, 19, 60,
59, 20, 23, 60, 23, 24, 22, 22, 27, 23, 19, 22, 18, 21, 22, 19,
68, 26, 21, 20)
Data used for income and strategic voting:
dput(Einkommen_Strat2021)
structure(list(StrategischeWahl2021 = c("0", "1"), Q5 = c(NA_real_,
NA_real_)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-2L))
> dput(Desk_NumericQ5)
structure(c(900, 400, 6000, 4600, 3700, 800, 10000, 1300, 2300,
0, 670, 2500, 0, 8500, 2700, 2000, 1000, 1500, 180, 0, 1300,
450, 4000, 1100, 8000, 3000, 861, 5000, 1250, 2600, 6000, 450,
1400, 450, 4800, 4900, 0, 500, 2500, 1, 2400, 2500, 1700, 0,
750, 450, 3400, 1300, 13000, 1400, 1400, 2700, 150, 2100, 8000,
0, 12000, 600, 450, 4000, 1000, 0, 2000, 600, 0, 2531, 800, 1200,
500, 1100, 0, 2950, 4000, 1500, 1800, 450, 8600, 7000, 750, 0,
5000, 900, 4000, 2000, 6000, 800, 3500, 4000, 3000, 4500, 400,
450, 4000, 1600, 3300, 2500, 1500, 815, 2800, 3500, 100, 2500,
300, 500, 1749, 700, 1250, 450, 1200, 700, 426, 900, 0, 0, 1500,
0, 1250, 1700, 700, 200, 4000, 5500, 3200, 0, 600, 1389, 5000,
900, 600, 3100, 2000, 850, 1535, 1400, 2500, 850, 0, 2700, 777,
700, 5500, 2350, 6000, 219, 3000, 3000, 0, 1500, 1800, 0, 1900,
1600, 2600, 1200, 1000, 2700, 5600, 650, 1200, 450, 15000, 800,
2600, 200, 2300, 2400, 600, 0, 0, 1300, 450, 800, 800, 2000,
0, 2500, 200, 3500, 500, 1600, 20000, 0, 2800, 10000, 700, 4500,
1100, 1200, 8000, 3500, 860, 1000, 1800, 5000, 1000, 600, 950,
0, 3000, 2400, 1600, 1500, 900, 2500, 2300, 1700, 4500, 250,
0, 450, 0, 600, 2800, 1200, 1600), na.action = structure(c(2L,
3L, 10L, 17L, 24L, 32L, 43L, 47L, 53L, 61L, 62L, 73L, 75L, 76L,
79L, 80L, 85L, 91L, 94L, 98L, 104L, 105L, 107L, 143L, 144L, 153L,
160L, 165L, 170L, 179L, 184L, 185L, 197L, 200L, 204L, 211L, 219L,
220L, 221L, 228L, 240L, 241L, 246L, 251L), class = "omit"))
Edit: Data Deskriptive_Statistik
Deskriptive_Statistik <- Deskriptive_Statistik %>%
select(Q3, Q5, StrategischeWahl2021)
> dput(Deskriptive_Statistik)
structure(list(Q3 = c("24", "20", "20", "19", "21", "33", "27",
"20", "53", "31", "21", "22", "21", "20", "25", "21", "24", "29",
"53 ", "20", "21", "22", "48", "28", "20", "23", "29", "29",
"23", "41", "29", "21", "29", "47", "23", "53", "34", "19", "23",
"24", "29", "29", "20", "22", "29", "25", "21", "22", "29", "20",
"30", "21", "23", "19", "23", "18", "25", "22", "28", "25", "22",
"21", "24", "24", "29", "55", "20", "20", "21", "20", "28", "22",
"21", "22", "20", "31", "22", "20", "31", "22", "22", "30", "20",
"22", "18", "23", "55", "22", "25", "25", "21", "39", "22", "20",
"49", "58", "20", "19", "21", "22", "29", "23", "32", "35", "20",
"20", "21", "28", "24", "28", "60", "70", "43", "21", "25", "60",
"34", "54", "24", "25", "23", "21", "48", "20", "25", "24", "21",
"25", "22", "24", "21", "22", "21", "18", "22", "21", "22", "18",
"19", "71", "23", "26", "18", "24", "21", "51", "37", "41", "23",
"25", "22", "35", "21", "18", "22", "29", "26", "21", "22", "23",
"43", "22", "23", "22", "21", "69", "20", "25", "54", "20", "26",
"28", "23", "28", "38", "21", "22", "78", "23", "25", "25", "63",
"32", "33", "20", "21", "20", "23", "21", "24", "19", "24", "37",
"21", "26", "24", "21", "23", "21", "19", "22", "22", "25", "20",
"22", "22", "19", "30", "19", "22", "19", "26", "23", "25", "21",
"36", "25", "22", "23", "22", "23", "22", "20", "21", "29", "22",
"19", "22", "22", "60", "29", "21", "20", "21", "23", "21", "23",
"19", "60", "59", "20", "23", "60", "23", "24", "22", "22", "27",
"23", "19", "22", "18", "21", "22", "19", "68", "26", "21", "20"
), Q5 = c("900", "800", "Verstehe die Frage nicht ", "400", "6000",
"4600", "3700", "800", "10000", "-", "1300", "2300", "0", "670",
"2500", "0", "-", "8500", "2700 ", "2000", "1000", "1500", "180",
"4300", "0.00", "1300", "450", "4000", "1100", "8000", "3000",
"2000", "861", "5000", "1250 ", "2600", "6000", "450", "1400",
"450", "4800", "4900", "-", "0", "500", "2500", "1000", "1",
"2400", "2500", "1700", "0", "1700", "750", "450", "3400", "1300",
"13000", "1400", "1400", "-", "800", "2700", "150", "2100", "8000",
"0", "12000", "600", "450", "4000", "1000", "1500", "0", "-",
"-", "2000", "600", "-", "0", "0", "2531", "800", "1200", "-",
"500", "1100", "0", "2950", "4000", "4500", "1500", "1800", "350",
"450", "8600", "7000", "-", "750", "0", "5000", "900", "4000",
"-", "420", "2000", "-", "6000", "800", "3500", "4000", "3000",
"4500", "400", "450", "4000", "1600", "3300", "2500", "1500",
"815", "2800", "3500", "100", "2500", "300", "500", "1749", "700",
"1250", "450", "1200", "700", "426", "900", "0", "0", "1500",
"0", "1250", "1700", "700", "-", "-", "200", "4000", "5500",
"3200", "0", "600", "1389", "5000", "-", "900", "600", "3100",
"2000", "850", "1535", "450", "1400", "2500", "850", "0", "250",
"2700", "777", "700", "5500", "-", "2350", "6000", "219", "3000",
"3000", "0 ", "1500", "1800", "-", "0", "1900", "1600", "2600",
"3600", "900", "1200", "1000", "2700", "5600", "650", "1200",
"450", "15000", "800", "2600", "200", "2400", "2300", "2400",
"-", "600", "0", "0", "1900", "1300", "450", "800", "800", "2000",
"0", "Keine Ahnung ", "2500", "200", "3500", "500", "1600", "20000",
"0", "-", "3750", "2400", "2800", "10000", "700", "4500", "1100",
"1200", "860", "8000", "3500", "860", "1000", "1800", "5000",
"1000", "600", "950", "0", "3000", "4000", "0", "2400", "1600",
"1500", "900", "300", "2500", "2300", "1700", "4500", "40", "250",
"0", "450", "0", "600", "2800", "1200", "1600"), StrategischeWahl2021 = c("0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "1", "0",
"0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "1", "0", "0",
"0", "0", "0", "0", "1", "0", "1", "1", "0", "1", "1", "0", "1",
"0", "1", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "1",
"1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"1", "0", "0", "1", "0", "1", "0", "0", "1", "0", "1", "1", "0",
"0", "0", "1", "1", "0", "1", "0", "1", "1", "0", "0", "0", "0",
"0", "1", "0", "0", "0", "1", "0", "1", "1", "1", "1", "0", "1",
"1", "0", "0", "0", "0", "1", "0", "1", "0", "0", "0", "1", "0",
"0", "0", "1", "1", "0", "0", "1", "0", "1", "0", "0", "0", "0",
"0", "1", "0", "0", "0", "1", "1", "0", "0", "0", "0", "1", "0",
"1", "0", "0", "0", "1", "0", "1", "0", "0", "1", "0", "0", "1",
"1", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "1", "0",
"1", "0", "0", "0", "0", "0", "1", "0", "1", "0", "1", "0", "0",
"1", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0",
"1", "0", "0", "1", "0", "1", "0", "0", "0", "0", "0", "1", "0",
"1", "1", "0", "1", "0", "0", "1", "0", "0", "0", "0", "1", "0",
"1", "0", "1", "1", "1", "0", "0", "0", "0", "1", "0", "0", "0",
"1", "1", "0", "0", "0", "1", "1", "0", "0", "0", "0")), row.names = 3:261, class = "data.frame")
CodePudding user response:
Not all of the data in Q5 column is numeric so when you perform as.numeric(Q5)
its generates a few NA in the column.
The mean of NA is NA.
To fix it, use: summarize(Q5 = mean(as.numeric(Q5), na.rm=TRUE))
Einkommen_Strat2021<- Deskriptive_Statistik %>%
select(Q5, StrategischeWahl2021) %>%
ungroup %>%
group_by(StrategischeWahl2021) %>%
summarise(Q5 = mean(as.numeric(Q5), na.rm=TRUE))
Einkommen_Strat2021
# A tibble: 2 × 2
StrategischeWahl2021 Q5
<chr> <dbl>
1 0 2229.
2 1 1917.
Now the ggplot function call works as expected.