Hi I would like to ask for the aes mapping, if I want to have the mean of wages based on age group but I do not want to adjust the data table, is there a function I can call in the ggplot to have the mean wages based on their age group?
ab_final <- ab %>%
group_by(agegroup,haveKids,educationLevel) %>%
summarise(Wage = mean(Wage), Expenses = mean(Expenses)) %>%
mutate(Wage = ifelse(haveKids, -Wage, Wage), Expenses = ifelse(haveKids,Expenses,-Expenses))
head (ab_final)
agegroup | haveKids | educationLevel | Wage | Expenses |
---|---|---|---|---|
18-25 | FALSE | Bachelors | 73428. | 18582. |
18-25 | FALSE | Graduate | 90757. | 21441. |
18-25 | FALSE | HighSchoolOrCollege | 36027. | 15956. |
18-25 | FALSE | Low | 36598. | 19367. |
18-25 | TRUE | Bachelors | -98265. | -24964. |
18-25 | TRUE | Graduate | -111545. | -25002. |
p <- ggplot(ab_final, aes(x = Wage, y = agegroup, fill = haveKids))
geom_col()
scale_x_continuous(breaks = seq(-60000, 60000, 30000),
labels = paste0("$",as.character(c(seq(60, 0, -30), seq(30, 60, 30))),"k"))
labs (x = "Annual Average Wage (USD)", y = "Age Group", title='Ohio Annual Average Wages based on Age Group')
theme_bw()
theme(axis.ticks.y = element_blank())
scale_fill_manual(values = c("TRUE" = "lightblue", "FALSE" = "lightpink"))
p
The output gives me the sum of wages based on the different age group.
dput(ab_final)
structure(list(agegroup = c("18-25", "18-25", "18-25", "18-25",
"18-25", "18-25", "18-25", "18-25", "26-30", "26-30", "26-30",
"26-30", "26-30", "26-30", "26-30", "26-30", "31-35", "31-35",
"31-35", "31-35", "31-35", "31-35", "31-35", "31-35", "36-40",
"36-40", "36-40", "36-40", "36-40", "36-40", "36-40", "36-40",
"41-45", "41-45", "41-45", "41-45", "41-45", "41-45", "41-45",
"41-45", "46-50", "46-50", "46-50", "46-50", "46-50", "46-50",
"46-50", "46-50", "51-55", "51-55", "51-55", "51-55", "51-55",
"51-55", "51-55", "51-55", "56-60", "56-60", "56-60", "56-60",
"56-60", "56-60", "56-60", "56-60"), haveKids = c(FALSE, FALSE,
FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE,
TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE,
TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE,
FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE,
FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE,
TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE,
TRUE, TRUE), educationLevel = c("Bachelors", "Graduate", "HighSchoolOrCollege",
"Low", "Bachelors", "Graduate", "HighSchoolOrCollege", "Low",
"Bachelors", "Graduate", "HighSchoolOrCollege", "Low", "Bachelors",
"Graduate", "HighSchoolOrCollege", "Low", "Bachelors", "Graduate",
"HighSchoolOrCollege", "Low", "Bachelors", "Graduate", "HighSchoolOrCollege",
"Low", "Bachelors", "Graduate", "HighSchoolOrCollege", "Low",
"Bachelors", "Graduate", "HighSchoolOrCollege", "Low", "Bachelors",
"Graduate", "HighSchoolOrCollege", "Low", "Bachelors", "Graduate",
"HighSchoolOrCollege", "Low", "Bachelors", "Graduate", "HighSchoolOrCollege",
"Low", "Bachelors", "Graduate", "HighSchoolOrCollege", "Low",
"Bachelors", "Graduate", "HighSchoolOrCollege", "Low", "Bachelors",
"Graduate", "HighSchoolOrCollege", "Low", "Bachelors", "Graduate",
"HighSchoolOrCollege", "Low", "Bachelors", "Graduate", "HighSchoolOrCollege",
"Low"), Wage = c(73427.6242255194, 90756.8740271891, 36027.1045766046,
36597.8823458904, -98265.2264842072, -111544.761238973, -40888.1302113056,
-29690.7404136359, 63434.2899782702, 79826.8839356714, 32912.6351345271,
28951.8407896055, -67638.6009175875, -98570.8320239257, -46688.2105971457,
-2365.18889956123, 73507.9183782092, 83276.4013393718, 35053.1036609163,
35918.5441251045, -105208.124255318, -100419.654285681, -48013.5199894127,
-31465.9994442994, 73863.5692259624, 91219.6688660635, 37944.7293875051,
24295.1828359983, -71489.157887881, -113628.534898322, -40874.9689695586,
-15048.4351165345, 63622.1379383326, 76162.2011422263, 35856.5165542073,
35290.3184801558, -90556.4678989271, -139740.754762728, -47300.5763646887,
-2351.94028134572, 57111.653529917, 88916.5286764648, 34743.1169364354,
33034.2740885343, -102954.526388641, -110730.908830255, -44183.0808505653,
-2431.62242073533, 75520.2374263526, 97118.4509577243, 40206.2010005338,
15303.2183724372, -100459.961036613, -118603.619362369, -47062.636642258,
-18136.0117958843, 68441.752176008, 78569.1358672976, 33696.7694674256,
39621.6228202485, -96083.9762853549, -113037.604308105, -39670.0761714582,
-76544.9368650725), Expenses = c(18581.7882554702, 21441.1145218955,
15955.8190788926, 19366.6794157381, -24963.6038601631, -25001.8628498845,
-18052.2160481047, -12745.725568342, 19825.5493832019, 21067.8133641346,
15513.3625856376, 12853.4842200847, -26688.4009083829, -25557.0157549876,
-19718.5033101881, -152.186005570974, 21576.3976579329, 22632.772851812,
14712.230494066, 20079.6454981138, -24514.3995124845, -31520.0721153124,
-17579.291010834, -15501.7362071054, 20980.2291762055, 21389.5574110701,
15308.3678040099, 16557.8188855836, -24639.7689642704, -26130.2577363506,
-15954.9566546377, -7768.13947033146, 20491.8443246166, 17922.4189300169,
16909.3747309647, 13233.3579986897, -23432.693758128, -22597.7448653988,
-20468.0995939873, -123.331037209483, 22093.8122932499, 19918.1372430818,
16884.6652423487, 15485.6086554647, -23946.2595731495, -22228.0345344589,
-20282.1042419724, -171.43286214832, 19531.7423065772, 20657.0373190312,
16615.5145240842, 6467.10392954871, -26143.4628401692, -22481.8353859449,
-19962.9682370225, -9238.12956845112, 21714.2834145535, 23397.9260820337,
15825.4708571827, 18634.178657809, -23591.149852639, -25458.1674870612,
-16577.2976554664, -24842.1579584659)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -64L), groups = structure(list(
agegroup = c("18-25", "18-25", "26-30", "26-30", "31-35",
"31-35", "36-40", "36-40", "41-45", "41-45", "46-50", "46-50",
"51-55", "51-55", "56-60", "56-60"), haveKids = c(FALSE,
TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE,
FALSE, TRUE, FALSE, TRUE, FALSE, TRUE), .rows = structure(list(
1:4, 5:8, 9:12, 13:16, 17:20, 21:24, 25:28, 29:32, 33:36,
37:40, 41:44, 45:48, 49:52, 53:56, 57:60, 61:64), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -16L), .drop = TRUE))
CodePudding user response:
I am not sure if I understand your question correctly, but you can use the stat_summary
with fun
to calculate the mean
like, stat_summary(geom = "col", fun.y = mean)
, using the following code:
p <- ggplot(ab_final, aes(x = Wage, y = agegroup, fill = haveKids))
stat_summary(geom = "col", fun.y = mean)
scale_x_continuous(breaks = seq(-60000, 60000, 30000),
labels = paste0("$",as.character(c(seq(60, 0, -30), seq(30, 60, 30))),"k"))
labs (x = "Annual Average Wage (USD)", y = "Age Group", title='Ohio Annual Average Wages based on Age Group')
theme_bw()
theme(axis.ticks.y = element_blank())
scale_fill_manual(values = c("TRUE" = "lightblue", "FALSE" = "lightpink"))
p
Output:
CodePudding user response:
This is a stab in the dark since I do not understand the negative salaries... Perhaps you should consider converting agegroup, haveKids and educationalLevel to factors.
ab$agegroup <- as.factor(ab$agegroup)
ab$haveKids <- as.factor(ab$haveKids)
ab$educationLevel <- as.factor(ab$educationLevel)
pFinal <- ggplot(ab_final, aes(x=agegroup, y=Wage/1000, color=haveKids, label=educationLevel))
geom_jitter(width=.2, alpha=.5, size=2)
scale_color_manual(values=c("brown", "steelblue"))
## scales = "free" each facit has its own scale ****
# facet_grid(rank ~ discipline, scales="free")
theme_bw()
theme(legend.position="none")
facet_grid(educationLevel ~ agegroup, scales="free")
theme_bw()
theme(legend.position="none")
labs(title = "Salaries by age group and educational level", y="")
ggplotly(pFinal)