I try to get a boxplot with the following specifications for the following variables:
assets, liability
.
My data is firms financial statement and firms are classified big and small firms (categorical variable lbg30
). Time (years) is also categorized by two period pre-crisis and post-crisis (categorical variable postcrisis
). So I want to draw boxplots of assets and liability for small firms vs big firms and also pre-crisis vs post-crisis.
structure(list(firmid = structure(c("000020", "000020", "000020",
"000020", "000020", "000020", "000021", "000021", "000020", "000021"
), label = "거래소코드", format.stata = "%9s"), year = structure(c(1991,
1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000), format.stata = "%9.0g"),
postcrisis = structure(c(0, 0, 0, 0, 0, 0, 0, 1, 1, 1), format.stata = "%9.0g"),
firm_kor = structure(c("동화약품(주)", "동화약품(주)",
"동화약품(주)", "동화약품(주)", "동화약품(주)",
"동화약품(주)", "동화약(주)", "동화약(주)",
"동화약품(주)", "동화약(주)"), label = "회사명", format.stata = "Ds"),
business_group = structure(c("동화약", "동화약", "동화약",
"동화약", "동화약", "동화약", "동화약", "동화약",
"동화약", "동화약"), label = "그룹사명", format.stata = "3s"),
lbg30 = structure(c(0, 0, 0, 0, 0, 0, 1, 1, 0, 1), format.stata = "%9.0g"),
lbg = structure(c(0, 0, 0, 0, 0, 0, 1, 1, 0, 1), label = "기업규모코드", format.stata = ".0gc"),
bg = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), format.stata = "%9.0g"),
size = structure(c("", "", "", "", "", "", "", "", "", ""
), label = "기업규모명", format.stata = "s"), assets = structure(c(12150840320,
15652244480, 16448676864, 19630718976, 29004148736, 28329910272,
27457734656, 62851514368, 59374006272, 50737635328), format.stata = "%9.0g"),
liability = structure(c(54948823040, 66054799360, 81120837632,
106961879040, 122920968192, 140161728512, 162787033088, 159752404992,
149670641664, 105075081216), format.stata = "%9.0g"), sales = structure(c(88381997056,
102572998656, 114394996736, 119775002624, 128408002560, 134840000512,
143815000064, 141186007040, 136299003904, 138230005760), format.stata = "%9.0g"),
profit = structure(c(44432998400, 50231001088, 55298998272,
58389999616, 63920001024, 62578999296, 67171000320, 69623996416,
59872002048, 53057998848), format.stata = "%9.0g"), ebit = structure(c(19534999552,
19583000576, 21048999936, 21987000320, 25397999616, 23047999488,
21745000448, 26130999296, 23641999360, 1.458e 09), label = "Earning before interest and taxes (million won)", format.stata = "%9.0g"),
va = structure(c(25720190976, 32258500608, 35595018240, 34623062016,
41200451584, 43741118464, 48058458112, 50603368448, 70541492224,
22522920960), format.stata = "%9.0g"), va_pw = structure(c(26930000,
32920000, 36430000, 34010000, 41870000, 45090000, 49540000,
55730000, 88180000, 30440000), format.stata = "%9.0g"), va_ratio = structure(c(29.1000003814697,
31.4500007629395, 31.1200008392334, 28.9099998474121, 32.0900001525879,
32.439998626709, 33.4199981689453, 35.8400001525879, 51.75,
16.2900009155273), format.stata = "%9.0g"), k_productivity = structure(c(819.200012207031,
588.530029296875, 744.309997558594, 608.419982910156, 702.099975585938,
779.320007324219, 911.700012207031, 991.530029296875, 1964.06994628906,
502.309997558594), format.stata = "%9.0g"), k_productivity_gross = structure(c(31.4400005340576,
33.1300010681152, 29.6599998474121, 23.4799995422363, 25.0900001525879,
23.0799999237061, 22.5599994659424, 19.7700004577637, 26.2700004577637,
9.72999954223633), format.stata = "%9.0g"), wb = structure(c(8572080128,
9890159616, 10399187968, 12745639936, 14407654400, 15426884608,
17462267904, 16719245312, 14328732672, 15299931136), format.stata = "%9.0g"),
deprec = structure(c(1540752000, 1781939968, 2044096000,
2322487040, 2697072896, 3057124096, 3395273984, 1194128000,
1957659008, 2335313920), format.stata = "%9.0g"), cogs = structure(c(43948998656,
52342001664, 59095998464, 61384998912, 64488001536, 72260001792,
76643999744, 71562002432, 76427001856, 85172002816), format.stata = "%9.0g"),
land = structure(c(3962739968, 4533998080, 4673968128, 5412840960,
15167494144, 15234676736, 15215484928, 44340424704, 43726028800,
34115977216), format.stata = "%9.0g"), facilities = structure(c(5593545216,
6132070912, 7142042112, 8962248704, 9655307264, 9766675456,
9834147840, 12669279232, 12533188608, 12470169600), format.stata = "%9.0g"),
structures = structure(c(428073984, 439003008, 453208992,
453208992, 462492992, 487492992, 493648000, 298323008, 309323008,
352156992), format.stata = "%9.0g"), machinery = structure(c(7848509952,
12346684416, 13176728576, 15883726848, 18024470528, 20001619968,
5274594816, 20351035392, 21328994304, 25123174400), format.stata = "%9.0g"),
mold_pattern = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), format.stata = "%9.0g"),
machinery_heavy = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0), format.stata = "%9.0g"), equipment = structure(c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0), format.stata = "%9.0g"), devices = structure(c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0), format.stata = "%9.0g")), row.names = c(NA,
-10L), class = c("tbl_df", "tbl", "data.frame"))
CodePudding user response:
As lbg30
of your data has only 0
, I modified half of those to 1
, where df
is data you provided.
df1 <- df
df1$lbg30[6:10] <- 1
Then you may try
library(ggplot2)
library(dplyr)
df1 %>%
mutate(postcrisis = as.factor(postcrisis) %>% recode(., '0' = 'a', '1' = 'b'),
lbg30 = as.factor(lbg30) %>% recode(., '0' = 'aaa', '1' = 'bbb')) %>%
ggplot(aes(x = lbg30, group = lbg30))
geom_boxplot(aes(y = assets, color = "red"))
geom_boxplot(aes(y = liability, color = "blue"))
scale_colour_manual(name = 'the colour',
values =c('blue'='blue','red'='red'), labels = c('liability','assets'))
facet_wrap(.~postcrisis)