Home > Enterprise >  Is there a R function for AES mapping to plot the mean of X variable instead of sum?
Is there a R function for AES mapping to plot the mean of X variable instead of sum?

Time:04-24

Hi I would like to ask for the aes mapping, if I want to have the mean of wages based on age group but I do not want to adjust the data table, is there a function I can call in the ggplot to have the mean wages based on their age group?


ab_final <- ab %>%
  group_by(agegroup,haveKids,educationLevel) %>%
  summarise(Wage = mean(Wage), Expenses = mean(Expenses)) %>%
  mutate(Wage = ifelse(haveKids, -Wage, Wage), Expenses = ifelse(haveKids,Expenses,-Expenses))
head (ab_final)

agegroup haveKids educationLevel Wage Expenses
18-25 FALSE Bachelors 73428. 18582.
18-25 FALSE Graduate 90757. 21441.
18-25 FALSE HighSchoolOrCollege 36027. 15956.
18-25 FALSE Low 36598. 19367.
18-25 TRUE Bachelors -98265. -24964.
18-25 TRUE Graduate -111545. -25002.

p <- ggplot(ab_final, aes(x = Wage, y = agegroup, fill = haveKids))  
  geom_col()  
  scale_x_continuous(breaks = seq(-60000, 60000, 30000), 
                     labels = paste0("$",as.character(c(seq(60, 0, -30), seq(30, 60, 30))),"k"))  
  labs (x = "Annual Average Wage (USD)", y = "Age Group", title='Ohio Annual Average Wages based on Age Group')  
  theme_bw()  
  theme(axis.ticks.y = element_blank())  
  scale_fill_manual(values = c("TRUE" = "lightblue", "FALSE" = "lightpink"))

p

The output gives me the sum of wages based on the different age group.

dput(ab_final)

structure(list(agegroup = c("18-25", "18-25", "18-25", "18-25", 
"18-25", "18-25", "18-25", "18-25", "26-30", "26-30", "26-30", 
"26-30", "26-30", "26-30", "26-30", "26-30", "31-35", "31-35", 
"31-35", "31-35", "31-35", "31-35", "31-35", "31-35", "36-40", 
"36-40", "36-40", "36-40", "36-40", "36-40", "36-40", "36-40", 
"41-45", "41-45", "41-45", "41-45", "41-45", "41-45", "41-45", 
"41-45", "46-50", "46-50", "46-50", "46-50", "46-50", "46-50", 
"46-50", "46-50", "51-55", "51-55", "51-55", "51-55", "51-55", 
"51-55", "51-55", "51-55", "56-60", "56-60", "56-60", "56-60", 
"56-60", "56-60", "56-60", "56-60"), haveKids = c(FALSE, FALSE, 
FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, 
TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, 
TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, 
FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, 
FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, 
TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, 
TRUE, TRUE), educationLevel = c("Bachelors", "Graduate", "HighSchoolOrCollege", 
"Low", "Bachelors", "Graduate", "HighSchoolOrCollege", "Low", 
"Bachelors", "Graduate", "HighSchoolOrCollege", "Low", "Bachelors", 
"Graduate", "HighSchoolOrCollege", "Low", "Bachelors", "Graduate", 
"HighSchoolOrCollege", "Low", "Bachelors", "Graduate", "HighSchoolOrCollege", 
"Low", "Bachelors", "Graduate", "HighSchoolOrCollege", "Low", 
"Bachelors", "Graduate", "HighSchoolOrCollege", "Low", "Bachelors", 
"Graduate", "HighSchoolOrCollege", "Low", "Bachelors", "Graduate", 
"HighSchoolOrCollege", "Low", "Bachelors", "Graduate", "HighSchoolOrCollege", 
"Low", "Bachelors", "Graduate", "HighSchoolOrCollege", "Low", 
"Bachelors", "Graduate", "HighSchoolOrCollege", "Low", "Bachelors", 
"Graduate", "HighSchoolOrCollege", "Low", "Bachelors", "Graduate", 
"HighSchoolOrCollege", "Low", "Bachelors", "Graduate", "HighSchoolOrCollege", 
"Low"), Wage = c(73427.6242255194, 90756.8740271891, 36027.1045766046, 
36597.8823458904, -98265.2264842072, -111544.761238973, -40888.1302113056, 
-29690.7404136359, 63434.2899782702, 79826.8839356714, 32912.6351345271, 
28951.8407896055, -67638.6009175875, -98570.8320239257, -46688.2105971457, 
-2365.18889956123, 73507.9183782092, 83276.4013393718, 35053.1036609163, 
35918.5441251045, -105208.124255318, -100419.654285681, -48013.5199894127, 
-31465.9994442994, 73863.5692259624, 91219.6688660635, 37944.7293875051, 
24295.1828359983, -71489.157887881, -113628.534898322, -40874.9689695586, 
-15048.4351165345, 63622.1379383326, 76162.2011422263, 35856.5165542073, 
35290.3184801558, -90556.4678989271, -139740.754762728, -47300.5763646887, 
-2351.94028134572, 57111.653529917, 88916.5286764648, 34743.1169364354, 
33034.2740885343, -102954.526388641, -110730.908830255, -44183.0808505653, 
-2431.62242073533, 75520.2374263526, 97118.4509577243, 40206.2010005338, 
15303.2183724372, -100459.961036613, -118603.619362369, -47062.636642258, 
-18136.0117958843, 68441.752176008, 78569.1358672976, 33696.7694674256, 
39621.6228202485, -96083.9762853549, -113037.604308105, -39670.0761714582, 
-76544.9368650725), Expenses = c(18581.7882554702, 21441.1145218955, 
15955.8190788926, 19366.6794157381, -24963.6038601631, -25001.8628498845, 
-18052.2160481047, -12745.725568342, 19825.5493832019, 21067.8133641346, 
15513.3625856376, 12853.4842200847, -26688.4009083829, -25557.0157549876, 
-19718.5033101881, -152.186005570974, 21576.3976579329, 22632.772851812, 
14712.230494066, 20079.6454981138, -24514.3995124845, -31520.0721153124, 
-17579.291010834, -15501.7362071054, 20980.2291762055, 21389.5574110701, 
15308.3678040099, 16557.8188855836, -24639.7689642704, -26130.2577363506, 
-15954.9566546377, -7768.13947033146, 20491.8443246166, 17922.4189300169, 
16909.3747309647, 13233.3579986897, -23432.693758128, -22597.7448653988, 
-20468.0995939873, -123.331037209483, 22093.8122932499, 19918.1372430818, 
16884.6652423487, 15485.6086554647, -23946.2595731495, -22228.0345344589, 
-20282.1042419724, -171.43286214832, 19531.7423065772, 20657.0373190312, 
16615.5145240842, 6467.10392954871, -26143.4628401692, -22481.8353859449, 
-19962.9682370225, -9238.12956845112, 21714.2834145535, 23397.9260820337, 
15825.4708571827, 18634.178657809, -23591.149852639, -25458.1674870612, 
-16577.2976554664, -24842.1579584659)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -64L), groups = structure(list(
    agegroup = c("18-25", "18-25", "26-30", "26-30", "31-35", 
    "31-35", "36-40", "36-40", "41-45", "41-45", "46-50", "46-50", 
    "51-55", "51-55", "56-60", "56-60"), haveKids = c(FALSE, 
    TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, 
    FALSE, TRUE, FALSE, TRUE, FALSE, TRUE), .rows = structure(list(
        1:4, 5:8, 9:12, 13:16, 17:20, 21:24, 25:28, 29:32, 33:36, 
        37:40, 41:44, 45:48, 49:52, 53:56, 57:60, 61:64), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -16L), .drop = TRUE))

CodePudding user response:

I am not sure if I understand your question correctly, but you can use the stat_summary with fun to calculate the mean like, stat_summary(geom = "col", fun.y = mean), using the following code:

p <- ggplot(ab_final, aes(x = Wage, y = agegroup, fill = haveKids))  
  stat_summary(geom = "col", fun.y = mean)  
  scale_x_continuous(breaks = seq(-60000, 60000, 30000), 
                     labels = paste0("$",as.character(c(seq(60, 0, -30), seq(30, 60, 30))),"k"))  
  labs (x = "Annual Average Wage (USD)", y = "Age Group", title='Ohio Annual Average Wages based on Age Group')  
  theme_bw()  
  theme(axis.ticks.y = element_blank())  
  scale_fill_manual(values = c("TRUE" = "lightblue", "FALSE" = "lightpink"))

p

Output:

enter image description here

CodePudding user response:

This is a stab in the dark since I do not understand the negative salaries... Perhaps you should consider converting agegroup, haveKids and educationalLevel to factors.

ab$agegroup <- as.factor(ab$agegroup)
ab$haveKids <- as.factor(ab$haveKids)
ab$educationLevel <- as.factor(ab$educationLevel)  

pFinal <- ggplot(ab_final, aes(x=agegroup, y=Wage/1000, color=haveKids, label=educationLevel))  
  
  geom_jitter(width=.2, alpha=.5, size=2)  
  scale_color_manual(values=c("brown", "steelblue"))  
  
  ## scales = "free"   each facit has its own scale ****  
  #  facet_grid(rank ~ discipline, scales="free")  
  theme_bw()  
  theme(legend.position="none")  
facet_grid(educationLevel ~ agegroup, scales="free")  
  theme_bw()  
  theme(legend.position="none")  
  labs(title = "Salaries by age group and educational level", y="")
ggplotly(pFinal)
  • Related