Home > Software engineering >  How to draw boxplot by multiple groups using ggplot2?
How to draw boxplot by multiple groups using ggplot2?

Time:12-16

I try to get a boxplot with the following specifications for the following variables: assets, liability.

My data is firms financial statement and firms are classified big and small firms (categorical variable lbg30). Time (years) is also categorized by two period pre-crisis and post-crisis (categorical variable postcrisis). So I want to draw boxplots of assets and liability for small firms vs big firms and also pre-crisis vs post-crisis.

structure(list(firmid = structure(c("000020", "000020", "000020", 
"000020", "000020", "000020", "000021", "000021", "000020", "000021"
), label = "거래소코드", format.stata = "%9s"), year = structure(c(1991, 
1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000), format.stata = "%9.0g"), 
    postcrisis = structure(c(0, 0, 0, 0, 0, 0, 0, 1, 1, 1), format.stata = "%9.0g"), 
    firm_kor = structure(c("동화약품(주)", "동화약품(주)", 
    "동화약품(주)", "동화약품(주)", "동화약품(주)", 
    "동화약품(주)", "동화약(주)", "동화약(주)", 
    "동화약품(주)", "동화약(주)"), label = "회사명", format.stata = "Ds"), 
    business_group = structure(c("동화약", "동화약", "동화약", 
    "동화약", "동화약", "동화약", "동화약", "동화약", 
    "동화약", "동화약"), label = "그룹사명", format.stata = "3s"), 
    lbg30 = structure(c(0, 0, 0, 0, 0, 0, 1, 1, 0, 1), format.stata = "%9.0g"), 
    lbg = structure(c(0, 0, 0, 0, 0, 0, 1, 1, 0, 1), label = "기업규모코드", format.stata = ".0gc"), 
    bg = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), format.stata = "%9.0g"), 
    size = structure(c("", "", "", "", "", "", "", "", "", ""
    ), label = "기업규모명", format.stata = "s"), assets = structure(c(12150840320, 
    15652244480, 16448676864, 19630718976, 29004148736, 28329910272, 
    27457734656, 62851514368, 59374006272, 50737635328), format.stata = "%9.0g"), 
    liability = structure(c(54948823040, 66054799360, 81120837632, 
    106961879040, 122920968192, 140161728512, 162787033088, 159752404992, 
    149670641664, 105075081216), format.stata = "%9.0g"), sales = structure(c(88381997056, 
    102572998656, 114394996736, 119775002624, 128408002560, 134840000512, 
    143815000064, 141186007040, 136299003904, 138230005760), format.stata = "%9.0g"), 
    profit = structure(c(44432998400, 50231001088, 55298998272, 
    58389999616, 63920001024, 62578999296, 67171000320, 69623996416, 
    59872002048, 53057998848), format.stata = "%9.0g"), ebit = structure(c(19534999552, 
    19583000576, 21048999936, 21987000320, 25397999616, 23047999488, 
    21745000448, 26130999296, 23641999360, 1.458e 09), label = "Earning before interest and taxes (million won)", format.stata = "%9.0g"), 
    va = structure(c(25720190976, 32258500608, 35595018240, 34623062016, 
    41200451584, 43741118464, 48058458112, 50603368448, 70541492224, 
    22522920960), format.stata = "%9.0g"), va_pw = structure(c(26930000, 
    32920000, 36430000, 34010000, 41870000, 45090000, 49540000, 
    55730000, 88180000, 30440000), format.stata = "%9.0g"), va_ratio = structure(c(29.1000003814697, 
    31.4500007629395, 31.1200008392334, 28.9099998474121, 32.0900001525879, 
    32.439998626709, 33.4199981689453, 35.8400001525879, 51.75, 
    16.2900009155273), format.stata = "%9.0g"), k_productivity = structure(c(819.200012207031, 
    588.530029296875, 744.309997558594, 608.419982910156, 702.099975585938, 
    779.320007324219, 911.700012207031, 991.530029296875, 1964.06994628906, 
    502.309997558594), format.stata = "%9.0g"), k_productivity_gross = structure(c(31.4400005340576, 
    33.1300010681152, 29.6599998474121, 23.4799995422363, 25.0900001525879, 
    23.0799999237061, 22.5599994659424, 19.7700004577637, 26.2700004577637, 
    9.72999954223633), format.stata = "%9.0g"), wb = structure(c(8572080128, 
    9890159616, 10399187968, 12745639936, 14407654400, 15426884608, 
    17462267904, 16719245312, 14328732672, 15299931136), format.stata = "%9.0g"), 
    deprec = structure(c(1540752000, 1781939968, 2044096000, 
    2322487040, 2697072896, 3057124096, 3395273984, 1194128000, 
    1957659008, 2335313920), format.stata = "%9.0g"), cogs = structure(c(43948998656, 
    52342001664, 59095998464, 61384998912, 64488001536, 72260001792, 
    76643999744, 71562002432, 76427001856, 85172002816), format.stata = "%9.0g"), 
    land = structure(c(3962739968, 4533998080, 4673968128, 5412840960, 
    15167494144, 15234676736, 15215484928, 44340424704, 43726028800, 
    34115977216), format.stata = "%9.0g"), facilities = structure(c(5593545216, 
    6132070912, 7142042112, 8962248704, 9655307264, 9766675456, 
    9834147840, 12669279232, 12533188608, 12470169600), format.stata = "%9.0g"), 
    structures = structure(c(428073984, 439003008, 453208992, 
    453208992, 462492992, 487492992, 493648000, 298323008, 309323008, 
    352156992), format.stata = "%9.0g"), machinery = structure(c(7848509952, 
    12346684416, 13176728576, 15883726848, 18024470528, 20001619968, 
    5274594816, 20351035392, 21328994304, 25123174400), format.stata = "%9.0g"), 
    mold_pattern = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), format.stata = "%9.0g"), 
    machinery_heavy = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0), format.stata = "%9.0g"), equipment = structure(c(0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0), format.stata = "%9.0g"), devices = structure(c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0), format.stata = "%9.0g")), row.names = c(NA, 
-10L), class = c("tbl_df", "tbl", "data.frame"))

CodePudding user response:

As lbg30 of your data has only 0, I modified half of those to 1, where df is data you provided.

df1 <- df
df1$lbg30[6:10] <- 1

Then you may try

library(ggplot2)
library(dplyr)
df1 %>%
    mutate(postcrisis = as.factor(postcrisis) %>% recode(., '0' = 'a', '1' = 'b'),
           lbg30 = as.factor(lbg30) %>% recode(., '0' = 'aaa', '1' = 'bbb'))  %>%
  ggplot(aes(x = lbg30, group = lbg30))  
  
  geom_boxplot(aes(y = assets, color = "red"))  
  geom_boxplot(aes(y = liability, color = "blue"))  
  scale_colour_manual(name = 'the colour', 
                      values =c('blue'='blue','red'='red'), labels = c('liability','assets'))  
  facet_wrap(.~postcrisis) 

enter image description here

  • Related