I have a firm dataset and I want to calculate an index called "5 firm ratio", which is computed as the sum of top 5 assets over total sum of assets of every firm in each year. And eventually I want to draw a yearly plot of the index on the plane.
i.e., (a1 a2 ... a5)/sum(a1 ... aN)
in each year
I tried the following and want to correct it.
df_group = df %>% group_by(year) %>%
mutate(fivefirm = sum(tail(asset,5))/sum(asset))
FYI, kiscode is industry classification code and chaebol indicates whether this firm belongs to large business group or not.
structure(list(kiscode = structure(c("350257", "500607", "351253",
"320498", "351156", "360252", "360317", "360228", "380911", "381152",
"310140", "310166", "370274", "310026", "350648", "310123", "350117",
"380806", "320129", "340251", "370118", "370177", "370150", "360287",
"360090", "380784", "351350", "370053", "310190", "381160", "380938",
"380733", "310590", "351067", "310476", "350451", "310174", "340324",
"310433", "350320", "381250", "310131", "320099", "340154", "320218",
"350591", "380989", "380954", "380474", "360155", "320293", "350729",
"320536", "350940", "320242", "380725", "320692", "380377", "370070",
"320730", "350478", "380407", "370088", "350745", "350354", "350893",
"370355", "370266", "350281", "320234", "350826", "351245", "330019",
"350664", "100013", "320102", "350605", "381519", "320234", "320536",
"381497", "350427", "310476", "370169", "350940", "360090", "380377",
"320307", "351067", "310239", "380989", "360341", "350672", "320692",
"381225", "310026", "500607", "380784", "370177", "360279", "350605",
"320129", "350729", "350893", "351253", "380180", "370150", "350478",
"350044", "310247", "370070", "381250", "380725", "380644", "350109",
"350117", "382892", "320242", "351075", "320498", "380938", "360309",
"350354", "350508", "340251", "310166", "350257", "320099", "381519",
"350338", "370274", "310425", "340324", "360228", "350826", "380407",
"320293", "330019", "320684", "351245", "350664", "310433", "310204",
"380954", "350222", "370118", "320730", "310174", "352691", "351350",
"350591", "360244", "320218", "380130", "320722", "310123", "351229",
"320226", "380075", "351202", "380733", "310590", "380911", "350451",
"320102", "351270", "360317", "351237", "370266", "310131", "310140",
"370088", "320706", "350320", "360155", "380458", "310190", "351040",
"380148", "381160", "350648", "100013", "350281", "351156", "381152",
"350745", "370053", "360252", "360287", "340154", "350605", "350109",
"380148", "320684", "350893", "350354", "500607", "350990", "320218",
"310476", "310140", "351350", "340332", "310190", "320498", "370118",
"320099", "383490", "351245", "382647", "380725", "350591", "350257",
"381179", "350672", "380733", "370088", "381098", "382663", "351091",
"360279", "320510", "310425", "350320", "340324", "360252", "380180",
"350184", "360287", "370290", "380911", "310433", "330027", "370177",
"380407", "360201", "381977", "100013", "360066", "320730", "350338",
"350427", "350648", "370169", "380644", "360228", "320706", "351067",
"350281", "350451", "310247", "320102", "380377", "381152", "370053",
"351431", "380938", "340251", "350729", "360317", "350850", "381489",
"350940", "360341", "320170", "350664", "350540", "340510", "310212",
"381497", "350044", "352055", "310590", "370150", "380989", "320722",
"340014", "360180", "360325", "370070", "340243", "320242", "382256",
"360090", "370274", "381128", "370185", "382850", "370266", "351156",
"340081", "351555", "380954", "370142", "351253", "320234", "330019",
"310166", "350060", "370100"), format.stata = "%6s"), year = structure(c(1980,
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980,
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980,
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980,
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980,
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980,
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980,
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980,
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981,
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981,
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981,
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981,
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981,
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981,
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981,
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981,
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981,
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981,
1981, 1981, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982,
1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982,
1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982,
1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982,
1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982,
1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982,
1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982,
1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982,
1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982,
1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982,
1982, 1982), format.stata = "%9.0g"), chaebol = structure(c(0,
0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0,
1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0,
0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1,
0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1,
0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0,
0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1,
0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1,
0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1,
1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1,
0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1,
1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0,
1, 1, 0, 0, 0), format.stata = "%8.0g"), asset = structure(c(18285348000,
131784602000, 62865288000, 33838416000, 6360425000, 541330024000,
22984591000, 132764602000, 213239161000, 6541576000, 49222411000,
14139029000, 21186479000, 24339944000, 28434461000, 34758175000,
17158988000, 15005232000, 217819358000, 25009066000, 58045333000,
36533137000, 41099893000, 57162344000, 83874586000, 44872170000,
45377311000, 28931137000, 30997921000, 6330526000, 50391070000,
16471520000, 44011174000, 130526305000, 47629107000, 16930929000,
76886460000, 29793379000, 110228999000, 4901957000, 13125490000,
32708711000, 46811999000, 26297430000, 159598422000, 36722653000,
14065113000, 299788090000, 4559409000, 16989303000, 5749583000,
11320038000, 7566338000, 375368800000, 72924040000, 255463903000,
36229805000, 84495668000, 147094202000, 134657126000, 18782347000,
253086691000, 13385963000, 16256468000, 23786698000, 47589241000,
8763943000, 187501484000, 6170263000, 93930236000, 91398637000,
791687216000, 30833118000, 25927689000, 33738516000, 38081496000,
20336333000, 74751893000, 191479919000, 10653474000, 6876018000,
13974417000, 46282129000, 17031189000, 363957318000, 108913679000,
99681110000, 12349372000, 153999084000, 63909181000, 19277127000,
81424045000, 11976339000, 40404825000, 13215386000, 25694216000,
218146702000, 50509180000, 46809912000, 8186674000, 26102731000,
261698899000, 16279150000, 76399748000, 72255263000, 84773886000,
53674439000, 20085234000, 9232070000, 12270735000, 152733380000,
13605821000, 290913632000, 28622103000, 9080301000, 28835122000,
898727627000, 79616356000, 17597661000, 37693427000, 70049637000,
39413617000, 29126200000, 8434708000, 36338306000, 18034628000,
17198072000, 62211779000, 112464191000, 37197557000, 25438818000,
206704722000, 45275691000, 141347351000, 108806329000, 306891682000,
6972372000, 38738858000, 55206596000, 895706255000, 32565599000,
114178659000, 56217409000, 349187101000, 46372238000, 73244335000,
146703938000, 84758892000, 76188179000, 51280724000, 44309538000,
65066954000, 196115732000, 52854113000, 75028310000, 47956445000,
40110855000, 140101705000, 27473652000, 4121326000, 25702217000,
40537799000, 246498647000, 19932395000, 33344051000, 5409390000,
24963711000, 7037719000, 235262813000, 40815511000, 66612981000,
16495717000, 163887186000, 6185102000, 18941997000, 49019044000,
40537839000, 17564451000, 49208650000, 9704707000, 28922397000,
34829185000, 10982438000, 9184309000, 6185990000, 16737065000,
31329834000, 644676975000, 88048087000, 26251573000, 27469120000,
7539382000, 49168982000, 57080458000, 99468197000, 33985447000,
295995241000, 16875791000, 209381233000, 57214594000, 65376644000,
50187232000, 27010755000, 52298727000, 43373300000, 86486024000,
61934565000, 5454213000, 877343839000, 805364332000, 387369573000,
48176811000, 17128437000, 28948285000, 11612665000, 28047437000,
49375445000, 11628617000, 8682362000, 4047181000, 8839565000,
14301022000, 180493246000, 5770236000, 41816496000, 781158427000,
116189234000, 48910197000, 87905113000, 292031399000, 267092157000,
131779013000, 16964336000, 62884490000, 334764308000, 25365019000,
4516634000, 36124923000, 24881240000, 156283629000, 51135551000,
15585793000, 27925392000, 15710872000, 29351467000, 132157887000,
163214030000, 157749355000, 15887310000, 22510695000, 16378770000,
38660509000, 109948441000, 7883521000, 31927561000, 16596812000,
84148272000, 36350911000, 19811755000, 26648455000, 13662286000,
38018537000, 410847930000, 85228778000, 7427020000, 49662424000,
3784861000, 7163233000, 34358626000, 8669466000, 8562206000,
31810724000, 50925801000, 55307038000, 19619887000, 74557346000,
21591028000, 10828948000, 7466092000, 191497268000, 18034235000,
91374424000, 3689361000, 111727361000, 31199301000, 8270699000,
88922503000, 12778574000, 272883005000, 11747181000, 27675400000,
3093261000, 428329114000, 222219204000, 65271110000, 197553636000,
38142678000, 22150240000, 9740877000, 13121069000), format.stata = ".0g")), row.names = c(NA,
-300L), class = c("tbl_df", "tbl", "data.frame"))
CodePudding user response:
A possible solution is:
library(tidyverse)
df %>%
group_by(year) %>%
summarise(ind=(slice_max(df,asset,n = 5, with_ties=F) %>% select(asset) %>% sum)
/ sum(asset), .groups = "drop")
#> # A tibble: 3 × 2
#> year ind
#> <dbl> <dbl>
#> 1 1980 0.696
#> 2 1981 0.436
#> 3 1982 0.427
CodePudding user response:
There isn't a sale
column in you original df. So I guess you want the use asset
instead. You need to arrange df in descending order for your code to get the top 5 assets (you original code use tail()
). Also, I would use summarize()
and not mutate()
.
df_group = tbl %>% group_by(year) %>%
arrange(desc(asset)) %>%
summarize(fivefirm = sum(tail(asset,5))/sum(asset))