Home > Enterprise >  Boxplot two variables, color them based on mean of a third variable
Boxplot two variables, color them based on mean of a third variable

Time:09-28

I'm trying to make a boxplot where my MFR (manufacturers) are displayed on the x axis and the rating is on the y axis. However I want to color the different boxplots based on the mean shelf value. (Shelf is a value between 1 and 3)

I tried this code:

boxplot(rating ~ mfr, col= brewer.pal(nrow(aggregate(shelf ~ mfr, FUN=mean, data=Cereals)), "Blues"), 
        names =c("American Home Food Products", "General Mills", "Kellogs", "Nabisco", "Post", "Quaker Oats", "Ralston Purina"), 
        data = Cereals)

My boxplot is colored, but it is colored in different kind of blues from left to right and not based on the highest mean value per manufacturer. How can I solve this? What do I need to change?

My data is: dput(Cereals.csv)

structure(list(name = c("100% Bran", "100% Natural Bran", "All-Bran", 
"All-Bran with Extra Fiber", "Almond Delight", "Apple Cinnamon Cheerios", 
"Apple Jacks", "Basic 4", "Bran Chex", "Bran Flakes", "Cap'n'Crunch", 
"Cheerios", "Cinnamon Toast Crunch", "Clusters", "Cocoa Puffs", 
"Corn Chex", "Corn Flakes", "Corn Pops", "Count Chocula", "Cracklin' Oat Bran", 
"Cream of Wheat (Quick)", "Crispix", "Crispy Wheat & Raisins", 
"Double Chex", "Froot Loops", "Frosted Flakes", "Frosted Mini-Wheats", 
"Fruit & Fibre Dates; Walnuts; and Oats", "Fruitful Bran", "Fruity Pebbles", 
"Golden Crisp", "Golden Grahams", "Grape Nuts Flakes", "Grape-Nuts", 
"Great Grains Pecan", "Honey Graham Ohs", "Honey Nut Cheerios", 
"Honey-comb", "Just Right Crunchy  Nuggets", "Just Right Fruit & Nut", 
"Kix", "Life", "Lucky Charms", "Maypo", "Muesli Raisins; Dates; & Almonds", 
"Muesli Raisins; Peaches; & Pecans", "Mueslix Crispy Blend", 
"Multi-Grain Cheerios", "Nut&Honey Crunch", "Nutri-Grain Almond-Raisin", 
"Nutri-grain Wheat", "Oatmeal Raisin Crisp", "Post Nat. Raisin Bran", 
"Product 19", "Puffed Rice", "Puffed Wheat", "Quaker Oat Squares", 
"Quaker Oatmeal", "Raisin Bran", "Raisin Nut Bran", "Raisin Squares", 
"Rice Chex", "Rice Krispies", "Shredded Wheat", "Shredded Wheat 'n'Bran", 
"Shredded Wheat spoon size", "Smacks", "Special K", "Strawberry Fruit Wheats", 
"Total Corn Flakes", "Total Raisin Bran", "Total Whole Grain", 
"Triples", "Trix", "Wheat Chex", "Wheaties", "Wheaties Honey Gold"
), mfr = c("N", "Q", "K", "K", "R", "G", "K", "G", "R", "P", 
"Q", "G", "G", "G", "G", "R", "K", "K", "G", "K", "N", "K", "G", 
"R", "K", "K", "K", "P", "K", "P", "P", "G", "P", "P", "P", "Q", 
"G", "P", "K", "K", "G", "Q", "G", "A", "R", "R", "K", "G", "K", 
"K", "K", "G", "P", "K", "Q", "Q", "Q", "Q", "K", "G", "K", "R", 
"K", "N", "N", "N", "K", "K", "N", "G", "G", "G", "G", "G", "R", 
"G", "G"), type = c("C", "C", "C", "C", "C", "C", "C", "C", "C", 
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "H", "C", 
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", 
"C", "C", "C", "C", "C", "C", "C", "C", "H", "C", "C", "C", "C", 
"C", "C", "C", "C", "C", "C", "C", "C", "C", "H", "C", "C", "C", 
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", 
"C", "C", "C"), calories = c(70L, 120L, 70L, 50L, 110L, 110L, 
110L, 130L, 90L, 90L, 120L, 110L, 120L, 110L, 110L, 110L, 100L, 
110L, 110L, 110L, 100L, 110L, 100L, 100L, 110L, 110L, 100L, 120L, 
120L, 110L, 100L, 110L, 100L, 110L, 120L, 120L, 110L, 110L, 110L, 
140L, 110L, 100L, 110L, 100L, 150L, 150L, 160L, 100L, 120L, 140L, 
90L, 130L, 120L, 100L, 50L, 50L, 100L, 100L, 120L, 100L, 90L, 
110L, 110L, 80L, 90L, 90L, 110L, 110L, 90L, 110L, 140L, 100L, 
110L, 110L, 100L, 100L, 110L), protein = c(4L, 3L, 4L, 4L, 2L, 
2L, 2L, 3L, 2L, 3L, 1L, 6L, 1L, 3L, 1L, 2L, 2L, 1L, 1L, 3L, 3L, 
2L, 2L, 2L, 2L, 1L, 3L, 3L, 3L, 1L, 2L, 1L, 3L, 3L, 3L, 1L, 3L, 
1L, 2L, 3L, 2L, 4L, 2L, 4L, 4L, 4L, 3L, 2L, 2L, 3L, 3L, 3L, 3L, 
3L, 1L, 2L, 4L, 5L, 3L, 3L, 2L, 1L, 2L, 2L, 3L, 3L, 2L, 6L, 2L, 
2L, 3L, 3L, 2L, 1L, 3L, 3L, 2L), fat = c(1L, 5L, 1L, 0L, 2L, 
2L, 0L, 2L, 1L, 0L, 2L, 2L, 3L, 2L, 1L, 0L, 0L, 0L, 1L, 3L, 0L, 
0L, 1L, 0L, 1L, 0L, 0L, 2L, 0L, 1L, 0L, 1L, 1L, 0L, 3L, 2L, 1L, 
0L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 3L, 2L, 1L, 1L, 2L, 0L, 2L, 1L, 
0L, 0L, 0L, 1L, 2L, 1L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), sodium = c(130L, 15L, 260L, 
140L, 200L, 180L, 125L, 210L, 200L, 210L, 220L, 290L, 210L, 140L, 
180L, 280L, 290L, 90L, 180L, 140L, 80L, 220L, 140L, 190L, 125L, 
200L, 0L, 160L, 240L, 135L, 45L, 280L, 140L, 170L, 75L, 220L, 
250L, 180L, 170L, 170L, 260L, 150L, 180L, 0L, 95L, 150L, 150L, 
220L, 190L, 220L, 170L, 170L, 200L, 320L, 0L, 0L, 135L, 0L, 210L, 
140L, 0L, 240L, 290L, 0L, 0L, 0L, 70L, 230L, 15L, 200L, 190L, 
200L, 250L, 140L, 230L, 200L, 200L), fiber = c(10, 2, 9, 14, 
1, 1.5, 1, 2, 4, 5, 0, 2, 0, 2, 0, 0, 1, 1, 0, 4, 1, 1, 2, 1, 
1, 1, 3, 5, 5, 0, 0, 0, 3, 3, 3, 1, 1.5, 0, 1, 2, 0, 2, 0, 0, 
3, 3, 3, 2, 0, 3, 3, 1.5, 6, 1, 0, 1, 2, 2.7, 5, 2.5, 2, 0, 0, 
3, 4, 3, 1, 1, 3, 0, 4, 3, 0, 0, 3, 3, 1), carbo = c(5, 8, 7, 
8, 14, 10.5, 11, 18, 15, 13, 12, 17, 13, 13, 12, 22, 21, 13, 
12, 10, 21, 21, 11, 18, 11, 14, 14, 12, 14, 13, 11, 15, 15, 17, 
13, 12, 11.5, 14, 17, 20, 21, 12, 12, 16, 16, 16, 17, 15, 15, 
21, 18, 13.5, 11, 20, 13, 10, 14, -1, 14, 10.5, 15, 23, 22, 16, 
19, 20, 9, 16, 15, 21, 15, 16, 21, 13, 17, 17, 16), sugars = c(6L, 
8L, 5L, 0L, 8L, 10L, 14L, 8L, 6L, 5L, 12L, 1L, 9L, 7L, 13L, 3L, 
2L, 12L, 13L, 7L, 0L, 3L, 10L, 5L, 13L, 11L, 7L, 10L, 12L, 12L, 
15L, 9L, 5L, 3L, 4L, 11L, 10L, 11L, 6L, 9L, 3L, 6L, 12L, 3L, 
11L, 11L, 13L, 6L, 9L, 7L, 2L, 10L, 14L, 3L, 0L, 0L, 6L, -1L, 
12L, 8L, 6L, 2L, 3L, 0L, 0L, 0L, 15L, 3L, 5L, 3L, 14L, 3L, 3L, 
12L, 3L, 3L, 8L), potass = c(280L, 135L, 320L, 330L, -1L, 70L, 
30L, 100L, 125L, 190L, 35L, 105L, 45L, 105L, 55L, 25L, 35L, 20L, 
65L, 160L, -1L, 30L, 120L, 80L, 30L, 25L, 100L, 200L, 190L, 25L, 
40L, 45L, 85L, 90L, 100L, 45L, 90L, 35L, 60L, 95L, 40L, 95L, 
55L, 95L, 170L, 170L, 160L, 90L, 40L, 130L, 90L, 120L, 260L, 
45L, 15L, 50L, 110L, 110L, 240L, 140L, 110L, 30L, 35L, 95L, 140L, 
120L, 40L, 55L, 90L, 35L, 230L, 110L, 60L, 25L, 115L, 110L, 60L
), vitamins = c(25L, 0L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 0L, 25L, 25L, 
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 
25L, 25L, 100L, 100L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 
25L, 25L, 25L, 25L, 25L, 100L, 0L, 0L, 25L, 0L, 25L, 25L, 25L, 
25L, 25L, 0L, 0L, 0L, 25L, 25L, 25L, 100L, 100L, 100L, 25L, 25L, 
25L, 25L, 25L), shelf = c(3L, 3L, 3L, 3L, 3L, 1L, 2L, 3L, 1L, 
3L, 2L, 1L, 2L, 3L, 2L, 1L, 1L, 2L, 2L, 3L, 2L, 3L, 3L, 3L, 2L, 
1L, 2L, 3L, 3L, 2L, 1L, 2L, 3L, 3L, 3L, 2L, 1L, 1L, 3L, 3L, 2L, 
2L, 2L, 2L, 3L, 3L, 3L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
1L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 3L, 3L, 3L, 3L, 
2L, 1L, 1L, 1L), weight = c(1, 1, 1, 1, 1, 1, 1, 1.33, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.25, 1.33, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.3, 1, 1, 1, 1, 1, 1, 1.5, 1, 
1, 1.33, 1, 1.25, 1.33, 1, 0.5, 0.5, 1, 1, 1.33, 1, 1, 1, 1, 
0.83, 1, 1, 1, 1, 1, 1, 1.5, 1, 1, 1, 1, 1, 1), cups = c(0.33, 
1, 0.33, 0.5, 0.75, 0.75, 1, 0.75, 0.67, 0.67, 0.75, 1.25, 0.75, 
0.5, 1, 1, 1, 1, 1, 0.5, 1, 1, 0.75, 0.75, 1, 0.75, 0.8, 0.67, 
0.67, 0.75, 0.88, 0.75, 0.88, 0.25, 0.33, 1, 0.75, 1.33, 1, 0.75, 
1.5, 0.67, 1, 1, 1, 1, 0.67, 1, 0.67, 0.67, 1, 0.5, 0.67, 1, 
1, 1, 0.5, 0.67, 0.75, 0.5, 0.5, 1.13, 1, 1, 0.67, 0.67, 0.75, 
1, 1, 1, 1, 1, 0.75, 1, 0.67, 1, 0.75), rating = c(68.402973, 
33.983679, 59.425505, 93.704912, 34.384843, 29.509541, 33.174094, 
37.038562, 49.120253, 53.313813, 18.042851, 50.764999, 19.823573, 
40.400208, 22.736446, 41.445019, 45.863324, 35.782791, 22.396513, 
40.448772, 64.533816, 46.895644, 36.176196, 44.330856, 32.207582, 
31.435973, 58.345141, 40.917047, 41.015492, 28.025765, 35.252444, 
23.804043, 52.076897, 53.371007, 45.811716, 21.871292, 31.072217, 
28.742414, 36.523683, 36.471512, 39.241114, 45.328074, 26.734515, 
54.850917, 37.136863, 34.139765, 30.313351, 40.105965, 29.924285, 
40.69232, 59.642837, 30.450843, 37.840594, 41.50354, 60.756112, 
63.005645, 49.511874, 50.828392, 39.259197, 39.7034, 55.333142, 
41.998933, 40.560159, 68.235885, 74.472949, 72.801787, 31.230054, 
53.131324, 59.363993, 38.839746, 28.592785, 46.658844, 39.106174, 
27.753301, 49.787445, 51.592193, 36.187559), sugars1 = c(6, 8, 
5, 0, 8, 10, 14, 8, 6, 5, 12, 1, 9, 7, 13, 3, 2, 12, 13, 7, 0, 
3, 10, 5, 13, 11, 7, 10, 12, 12, 15, 9, 5, 3, 4, 11, 10, 11, 
6, 9, 3, 6, 12, 3, 11, 11, 13, 6, 9, 7, 2, 10, 14, 3, 0, 0, 6, 
6.92207792207792, 12, 8, 6, 2, 3, 0, 0, 0, 15, 3, 5, 3, 14, 3, 
3, 12, 3, 3, 8), carbo1 = c(5, 8, 7, 8, 14, 10.5, 11, 18, 15, 
13, 12, 17, 13, 13, 12, 22, 21, 13, 12, 10, 21, 21, 11, 18, 11, 
14, 14, 12, 14, 13, 11, 15, 15, 17, 13, 12, 11.5, 14, 17, 20, 
21, 12, 12, 16, 16, 16, 17, 15, 15, 21, 18, 13.5, 11, 20, 13, 
10, 14, 14.5974025974026, 14, 10.5, 15, 23, 22, 16, 19, 20, 9, 
16, 15, 21, 15, 16, 21, 13, 17, 17, 16), sugars_per = c(6, 8, 
5, 0, 8, 10, 14, 6.01503759398496, 6, 5, 12, 1, 9, 7, 13, 3, 
2, 12, 13, 7, 0, 3, 10, 5, 13, 11, 7, 8, 9.02255639097744, 12, 
15, 9, 5, 3, 4, 11, 10, 11, 6, 6.92307692307692, 3, 6, 12, 3, 
11, 11, 8.66666666666667, 6, 9, 5.26315789473684, 2, 8, 10.5263157894737, 
3, 0, 0, 6, 6.92207792207792, 9.02255639097744, 8, 6, 2, 3, 0, 
0, 0, 15, 3, 5, 3, 9.33333333333333, 3, 3, 12, 3, 3, 8), protein_per = c(4, 
3, 4, 4, 2, 2, 2, 2.25563909774436, 2, 3, 1, 6, 1, 3, 1, 2, 2, 
1, 1, 3, 3, 2, 2, 2, 2, 1, 3, 2.4, 2.25563909774436, 1, 2, 1, 
3, 3, 3, 1, 3, 1, 2, 2.30769230769231, 2, 4, 2, 4, 4, 4, 2, 2, 
2, 2.25563909774436, 3, 2.4, 2.25563909774436, 3, 2, 4, 4, 5, 
2.25563909774436, 3, 2, 1, 2, 2.40963855421687, 3, 3, 2, 6, 2, 
2, 2, 3, 2, 1, 3, 3, 2), fiber_per = c(10, 2, 9, 14, 1, 1.5, 
1, 1.50375939849624, 4, 5, 0, 2, 0, 2, 0, 0, 1, 1, 0, 4, 1, 1, 
2, 1, 1, 1, 3, 4, 3.7593984962406, 0, 0, 0, 3, 3, 3, 1, 1.5, 
0, 1, 1.53846153846154, 0, 2, 0, 0, 3, 3, 2, 2, 0, 2.25563909774436, 
3, 1.2, 4.51127819548872, 1, 0, 2, 2, 2.7, 3.7593984962406, 2.5, 
2, 0, 0, 3.6144578313253, 4, 3, 1, 1, 3, 0, 2.66666666666667, 
3, 0, 0, 3, 3, 1), fat_per = c(1, 5, 1, 0, 2, 2, 0, 1.50375939849624, 
1, 0, 2, 2, 3, 2, 1, 0, 0, 0, 1, 3, 0, 0, 1, 0, 1, 0, 0, 1.6, 
0, 1, 0, 1, 1, 0, 3, 2, 1, 0, 1, 0.769230769230769, 1, 2, 1, 
1, 3, 3, 1.33333333333333, 1, 1, 1.50375939849624, 0, 1.6, 0.75187969924812, 
0, 0, 0, 1, 2, 0.75187969924812, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 
1, 0.666666666666667, 1, 1, 1, 1, 1, 1), carbo_per = c(5, 8, 
7, 8, 14, 10.5, 11, 13.5338345864662, 15, 13, 12, 17, 13, 13, 
12, 22, 21, 13, 12, 10, 21, 21, 11, 18, 11, 14, 14, 9.6, 10.5263157894737, 
13, 11, 15, 15, 17, 13, 12, 11.5, 14, 17, 15.3846153846154, 21, 
12, 12, 16, 16, 16, 11.3333333333333, 15, 15, 15.7894736842105, 
18, 10.8, 8.27067669172932, 20, 26, 20, 14, 14.5974025974026, 
10.5263157894737, 10.5, 15, 23, 22, 19.2771084337349, 19, 20, 
9, 16, 15, 21, 10, 16, 21, 13, 17, 17, 16)), row.names = c(NA, 
-77L), class = "data.frame")

CodePudding user response:

Using ggplot2

library(dplyr)
library(ggplot2)

dummy <- Cereals %>% 
  select(mfr, shelf) %>%
  group_by(mfr) %>%
  summarise(colo = mean(shelf))

Cereals %>% 
  select(mfr, shelf,rating) %>%
  full_join(dummy, by = "mfr") %>%
  ggplot() 
  geom_boxplot(aes(x = mfr, y = rating, group = mfr, fill  = colo))

enter image description here

Base R way

dummy <- Cereals %>% 
  select(mfr, shelf) %>%
  group_by(mfr) %>%
  summarise(colo = mean(shelf))

boxplot(rating ~ mfr, data = Cereals, col = brewer.pal(dummy$colo, "Blues"))

enter image description here

  • Related