Home > Mobile >  R: plotting linear regression misses connecting line
R: plotting linear regression misses connecting line

Time:08-24

So I am trying to plot a linear regression between income and wether someone voted strategically, but my code does not quite work. When I plotted the linear regression between Age and strategic Voting it worked quite well as you can see here plot age

Using the same code for my income variable however does not work - I have the boxplots but no connecting line plot income

I would appreciate any insight into why it is not working for my income variable. I have the feeling it might have to do something with the NAs introduced in the income variable, but I couldn´t fix it. Thank you very much in Advance ;)

The code used for age and strategic voting:

Alter_Strat2021<- Deskriptive_Statistik %>% 
  select(Q3, StrategischeWahl2021) %>% 
  ungroup %>%
  group_by(StrategischeWahl2021) %>%
  summarise(Q3 = mean(as.numeric(Q3)))

mean(Desk_NumericQ3) -> MeanAlter2021

Deskriptive_Statistik %>% 
  ungroup %>% mutate(Q3 = as.numeric(Q3)) %>%
  ggplot() 
  aes(x=StrategischeWahl2021, y=Q3) 
  geom_boxplot(width = .1) 
  geom_jitter(width = .1, alpha = .1) 
  geom_point(data= Alter_Strat2021,
             color="red",
             size=5,
             shape=17)   geom_line(data=Alter_Strat2021,
                                   group=1,
                                   color="red") 
  scale_y_continuous(breaks=seq(0,80,5)) 
  xlab("Strategische Wahl 2021") 
  ylab ("Alter") 
  ggtitle ("Lineare Regression Zusammenhang zwischen Alter und Strategische Wahl 2021") 
  scale_x_discrete(labels=c("Nein", "Ja")) 
  theme_minimal()

The Code used for income and strategic voting:

Einkommen_Strat2021<- Deskriptive_Statistik %>% 
  select(Q5, StrategischeWahl2021) %>% 
  ungroup %>%
  group_by(StrategischeWahl2021) %>%
  summarise(Q5 = mean(as.numeric(Q5)))

mean(Desk_NumericQ5) -> MeanEinkommen

Deskriptive_Statistik %>% 
  ungroup %>% mutate(Q5 = as.numeric(Q5)) %>%
  ggplot() 
  aes(x=StrategischeWahl2021, y=Q5) 
  geom_boxplot(width = .1) 
  geom_jitter(width = .1, alpha = .1) 
  scale_y_continuous(breaks=seq(0,20000,5000)) 
  geom_point(data= Einkommen_Strat2021,
             color="red",
             size=5,
             shape=17)   geom_line(data=Einkommen_Strat2021,
                                   group=1,
                                   color="red") 
  xlab("Strategische Wahl 2021") 
  ylab ("Einkommen") 
  ggtitle ("Lineare Regression Zusammenhang zwischen Einkommen und Strategische Wahl 2021")

Data for age and strategic voting:

dput(Alter_Strat2021)
structure(list(StrategischeWahl2021 = c("0", "1"), Q3 = c(26.8603351955307, 
27.6375)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-2L))
> dput(Desk_NumericQ3)
c(24, 20, 20, 19, 21, 33, 27, 20, 53, 31, 21, 22, 21, 20, 25, 
21, 24, 29, 53, 20, 21, 22, 48, 28, 20, 23, 29, 29, 23, 41, 29, 
21, 29, 47, 23, 53, 34, 19, 23, 24, 29, 29, 20, 22, 29, 25, 21, 
22, 29, 20, 30, 21, 23, 19, 23, 18, 25, 22, 28, 25, 22, 21, 24, 
24, 29, 55, 20, 20, 21, 20, 28, 22, 21, 22, 20, 31, 22, 20, 31, 
22, 22, 30, 20, 22, 18, 23, 55, 22, 25, 25, 21, 39, 22, 20, 49, 
58, 20, 19, 21, 22, 29, 23, 32, 35, 20, 20, 21, 28, 24, 28, 60, 
70, 43, 21, 25, 60, 34, 54, 24, 25, 23, 21, 48, 20, 25, 24, 21, 
25, 22, 24, 21, 22, 21, 18, 22, 21, 22, 18, 19, 71, 23, 26, 18, 
24, 21, 51, 37, 41, 23, 25, 22, 35, 21, 18, 22, 29, 26, 21, 22, 
23, 43, 22, 23, 22, 21, 69, 20, 25, 54, 20, 26, 28, 23, 28, 38, 
21, 22, 78, 23, 25, 25, 63, 32, 33, 20, 21, 20, 23, 21, 24, 19, 
24, 37, 21, 26, 24, 21, 23, 21, 19, 22, 22, 25, 20, 22, 22, 19, 
30, 19, 22, 19, 26, 23, 25, 21, 36, 25, 22, 23, 22, 23, 22, 20, 
21, 29, 22, 19, 22, 22, 60, 29, 21, 20, 21, 23, 21, 23, 19, 60, 
59, 20, 23, 60, 23, 24, 22, 22, 27, 23, 19, 22, 18, 21, 22, 19, 
68, 26, 21, 20)

Data used for income and strategic voting:

dput(Einkommen_Strat2021)
structure(list(StrategischeWahl2021 = c("0", "1"), Q5 = c(NA_real_, 
NA_real_)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-2L))
> dput(Desk_NumericQ5)
structure(c(900, 400, 6000, 4600, 3700, 800, 10000, 1300, 2300, 
0, 670, 2500, 0, 8500, 2700, 2000, 1000, 1500, 180, 0, 1300, 
450, 4000, 1100, 8000, 3000, 861, 5000, 1250, 2600, 6000, 450, 
1400, 450, 4800, 4900, 0, 500, 2500, 1, 2400, 2500, 1700, 0, 
750, 450, 3400, 1300, 13000, 1400, 1400, 2700, 150, 2100, 8000, 
0, 12000, 600, 450, 4000, 1000, 0, 2000, 600, 0, 2531, 800, 1200, 
500, 1100, 0, 2950, 4000, 1500, 1800, 450, 8600, 7000, 750, 0, 
5000, 900, 4000, 2000, 6000, 800, 3500, 4000, 3000, 4500, 400, 
450, 4000, 1600, 3300, 2500, 1500, 815, 2800, 3500, 100, 2500, 
300, 500, 1749, 700, 1250, 450, 1200, 700, 426, 900, 0, 0, 1500, 
0, 1250, 1700, 700, 200, 4000, 5500, 3200, 0, 600, 1389, 5000, 
900, 600, 3100, 2000, 850, 1535, 1400, 2500, 850, 0, 2700, 777, 
700, 5500, 2350, 6000, 219, 3000, 3000, 0, 1500, 1800, 0, 1900, 
1600, 2600, 1200, 1000, 2700, 5600, 650, 1200, 450, 15000, 800, 
2600, 200, 2300, 2400, 600, 0, 0, 1300, 450, 800, 800, 2000, 
0, 2500, 200, 3500, 500, 1600, 20000, 0, 2800, 10000, 700, 4500, 
1100, 1200, 8000, 3500, 860, 1000, 1800, 5000, 1000, 600, 950, 
0, 3000, 2400, 1600, 1500, 900, 2500, 2300, 1700, 4500, 250, 
0, 450, 0, 600, 2800, 1200, 1600), na.action = structure(c(2L, 
3L, 10L, 17L, 24L, 32L, 43L, 47L, 53L, 61L, 62L, 73L, 75L, 76L, 
79L, 80L, 85L, 91L, 94L, 98L, 104L, 105L, 107L, 143L, 144L, 153L, 
160L, 165L, 170L, 179L, 184L, 185L, 197L, 200L, 204L, 211L, 219L, 
220L, 221L, 228L, 240L, 241L, 246L, 251L), class = "omit"))

Edit: Data Deskriptive_Statistik

Deskriptive_Statistik <- Deskriptive_Statistik %>% 
    select(Q3, Q5, StrategischeWahl2021)
> dput(Deskriptive_Statistik)
structure(list(Q3 = c("24", "20", "20", "19", "21", "33", "27", 
"20", "53", "31", "21", "22", "21", "20", "25", "21", "24", "29", 
"53 ", "20", "21", "22", "48", "28", "20", "23", "29", "29", 
"23", "41", "29", "21", "29", "47", "23", "53", "34", "19", "23", 
"24", "29", "29", "20", "22", "29", "25", "21", "22", "29", "20", 
"30", "21", "23", "19", "23", "18", "25", "22", "28", "25", "22", 
"21", "24", "24", "29", "55", "20", "20", "21", "20", "28", "22", 
"21", "22", "20", "31", "22", "20", "31", "22", "22", "30", "20", 
"22", "18", "23", "55", "22", "25", "25", "21", "39", "22", "20", 
"49", "58", "20", "19", "21", "22", "29", "23", "32", "35", "20", 
"20", "21", "28", "24", "28", "60", "70", "43", "21", "25", "60", 
"34", "54", "24", "25", "23", "21", "48", "20", "25", "24", "21", 
"25", "22", "24", "21", "22", "21", "18", "22", "21", "22", "18", 
"19", "71", "23", "26", "18", "24", "21", "51", "37", "41", "23", 
"25", "22", "35", "21", "18", "22", "29", "26", "21", "22", "23", 
"43", "22", "23", "22", "21", "69", "20", "25", "54", "20", "26", 
"28", "23", "28", "38", "21", "22", "78", "23", "25", "25", "63", 
"32", "33", "20", "21", "20", "23", "21", "24", "19", "24", "37", 
"21", "26", "24", "21", "23", "21", "19", "22", "22", "25", "20", 
"22", "22", "19", "30", "19", "22", "19", "26", "23", "25", "21", 
"36", "25", "22", "23", "22", "23", "22", "20", "21", "29", "22", 
"19", "22", "22", "60", "29", "21", "20", "21", "23", "21", "23", 
"19", "60", "59", "20", "23", "60", "23", "24", "22", "22", "27", 
"23", "19", "22", "18", "21", "22", "19", "68", "26", "21", "20"
), Q5 = c("900", "800", "Verstehe die Frage nicht ", "400", "6000", 
"4600", "3700", "800", "10000", "-", "1300", "2300", "0", "670", 
"2500", "0", "-", "8500", "2700 ", "2000", "1000", "1500", "180", 
"4300", "0.00", "1300", "450", "4000", "1100", "8000", "3000", 
"2000", "861", "5000", "1250 ", "2600", "6000", "450", "1400", 
"450", "4800", "4900", "-", "0", "500", "2500", "1000", "1", 
"2400", "2500", "1700", "0", "1700", "750", "450", "3400", "1300", 
"13000", "1400", "1400", "-", "800", "2700", "150", "2100", "8000", 
"0", "12000", "600", "450", "4000", "1000", "1500", "0", "-", 
"-", "2000", "600", "-", "0", "0", "2531", "800", "1200", "-", 
"500", "1100", "0", "2950", "4000", "4500", "1500", "1800", "350", 
"450", "8600", "7000", "-", "750", "0", "5000", "900", "4000", 
"-", "420", "2000", "-", "6000", "800", "3500", "4000", "3000", 
"4500", "400", "450", "4000", "1600", "3300", "2500", "1500", 
"815", "2800", "3500", "100", "2500", "300", "500", "1749", "700", 
"1250", "450", "1200", "700", "426", "900", "0", "0", "1500", 
"0", "1250", "1700", "700", "-", "-", "200", "4000", "5500", 
"3200", "0", "600", "1389", "5000", "-", "900", "600", "3100", 
"2000", "850", "1535", "450", "1400", "2500", "850", "0", "250", 
"2700", "777", "700", "5500", "-", "2350", "6000", "219", "3000", 
"3000", "0 ", "1500", "1800", "-", "0", "1900", "1600", "2600", 
"3600", "900", "1200", "1000", "2700", "5600", "650", "1200", 
"450", "15000", "800", "2600", "200", "2400", "2300", "2400", 
"-", "600", "0", "0", "1900", "1300", "450", "800", "800", "2000", 
"0", "Keine Ahnung ", "2500", "200", "3500", "500", "1600", "20000", 
"0", "-", "3750", "2400", "2800", "10000", "700", "4500", "1100", 
"1200", "860", "8000", "3500", "860", "1000", "1800", "5000", 
"1000", "600", "950", "0", "3000", "4000", "0", "2400", "1600", 
"1500", "900", "300", "2500", "2300", "1700", "4500", "40", "250", 
"0", "450", "0", "600", "2800", "1200", "1600"), StrategischeWahl2021 = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "1", "0", 
"0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "1", "0", "0", 
"0", "0", "0", "0", "1", "0", "1", "1", "0", "1", "1", "0", "1", 
"0", "1", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "1", 
"1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
"1", "0", "0", "1", "0", "1", "0", "0", "1", "0", "1", "1", "0", 
"0", "0", "1", "1", "0", "1", "0", "1", "1", "0", "0", "0", "0", 
"0", "1", "0", "0", "0", "1", "0", "1", "1", "1", "1", "0", "1", 
"1", "0", "0", "0", "0", "1", "0", "1", "0", "0", "0", "1", "0", 
"0", "0", "1", "1", "0", "0", "1", "0", "1", "0", "0", "0", "0", 
"0", "1", "0", "0", "0", "1", "1", "0", "0", "0", "0", "1", "0", 
"1", "0", "0", "0", "1", "0", "1", "0", "0", "1", "0", "0", "1", 
"1", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "1", "0", 
"1", "0", "0", "0", "0", "0", "1", "0", "1", "0", "1", "0", "0", 
"1", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", 
"1", "0", "0", "1", "0", "1", "0", "0", "0", "0", "0", "1", "0", 
"1", "1", "0", "1", "0", "0", "1", "0", "0", "0", "0", "1", "0", 
"1", "0", "1", "1", "1", "0", "0", "0", "0", "1", "0", "0", "0", 
"1", "1", "0", "0", "0", "1", "1", "0", "0", "0", "0")), row.names = 3:261, class = "data.frame")

CodePudding user response:

Not all of the data in Q5 column is numeric so when you perform as.numeric(Q5) its generates a few NA in the column.
The mean of NA is NA.

To fix it, use: summarize(Q5 = mean(as.numeric(Q5), na.rm=TRUE))

Einkommen_Strat2021<- Deskriptive_Statistik %>% 
   select(Q5, StrategischeWahl2021) %>% 
   ungroup %>%
   group_by(StrategischeWahl2021) %>%
   summarise(Q5 = mean(as.numeric(Q5), na.rm=TRUE))

Einkommen_Strat2021

# A tibble: 2 × 2
  StrategischeWahl2021    Q5
  <chr>                <dbl>
1 0                    2229.
2 1                    1917.

Now the ggplot function call works as expected.

  • Related