Home > Mobile >  How to compute the proportion of sum of top N values over total sum in tidyverse
How to compute the proportion of sum of top N values over total sum in tidyverse

Time:11-29

I have a firm dataset and I want to calculate an index called "5 firm ratio", which is computed as the sum of top 5 assets over total sum of assets of every firm in each year. And eventually I want to draw a yearly plot of the index on the plane.

i.e., (a1 a2 ... a5)/sum(a1 ... aN) in each year

I tried the following and want to correct it.

df_group = df %>% group_by(year) %>%
mutate(fivefirm = sum(tail(asset,5))/sum(asset)) 

FYI, kiscode is industry classification code and chaebol indicates whether this firm belongs to large business group or not.

structure(list(kiscode = structure(c("350257", "500607", "351253", 
"320498", "351156", "360252", "360317", "360228", "380911", "381152", 
"310140", "310166", "370274", "310026", "350648", "310123", "350117", 
"380806", "320129", "340251", "370118", "370177", "370150", "360287", 
"360090", "380784", "351350", "370053", "310190", "381160", "380938", 
"380733", "310590", "351067", "310476", "350451", "310174", "340324", 
"310433", "350320", "381250", "310131", "320099", "340154", "320218", 
"350591", "380989", "380954", "380474", "360155", "320293", "350729", 
"320536", "350940", "320242", "380725", "320692", "380377", "370070", 
"320730", "350478", "380407", "370088", "350745", "350354", "350893", 
"370355", "370266", "350281", "320234", "350826", "351245", "330019", 
"350664", "100013", "320102", "350605", "381519", "320234", "320536", 
"381497", "350427", "310476", "370169", "350940", "360090", "380377", 
"320307", "351067", "310239", "380989", "360341", "350672", "320692", 
"381225", "310026", "500607", "380784", "370177", "360279", "350605", 
"320129", "350729", "350893", "351253", "380180", "370150", "350478", 
"350044", "310247", "370070", "381250", "380725", "380644", "350109", 
"350117", "382892", "320242", "351075", "320498", "380938", "360309", 
"350354", "350508", "340251", "310166", "350257", "320099", "381519", 
"350338", "370274", "310425", "340324", "360228", "350826", "380407", 
"320293", "330019", "320684", "351245", "350664", "310433", "310204", 
"380954", "350222", "370118", "320730", "310174", "352691", "351350", 
"350591", "360244", "320218", "380130", "320722", "310123", "351229", 
"320226", "380075", "351202", "380733", "310590", "380911", "350451", 
"320102", "351270", "360317", "351237", "370266", "310131", "310140", 
"370088", "320706", "350320", "360155", "380458", "310190", "351040", 
"380148", "381160", "350648", "100013", "350281", "351156", "381152", 
"350745", "370053", "360252", "360287", "340154", "350605", "350109", 
"380148", "320684", "350893", "350354", "500607", "350990", "320218", 
"310476", "310140", "351350", "340332", "310190", "320498", "370118", 
"320099", "383490", "351245", "382647", "380725", "350591", "350257", 
"381179", "350672", "380733", "370088", "381098", "382663", "351091", 
"360279", "320510", "310425", "350320", "340324", "360252", "380180", 
"350184", "360287", "370290", "380911", "310433", "330027", "370177", 
"380407", "360201", "381977", "100013", "360066", "320730", "350338", 
"350427", "350648", "370169", "380644", "360228", "320706", "351067", 
"350281", "350451", "310247", "320102", "380377", "381152", "370053", 
"351431", "380938", "340251", "350729", "360317", "350850", "381489", 
"350940", "360341", "320170", "350664", "350540", "340510", "310212", 
"381497", "350044", "352055", "310590", "370150", "380989", "320722", 
"340014", "360180", "360325", "370070", "340243", "320242", "382256", 
"360090", "370274", "381128", "370185", "382850", "370266", "351156", 
"340081", "351555", "380954", "370142", "351253", "320234", "330019", 
"310166", "350060", "370100"), format.stata = "%6s"), year = structure(c(1980, 
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 
1981, 1981, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 
1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 
1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 
1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 
1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 
1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 
1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 
1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 
1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 
1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 
1982, 1982), format.stata = "%9.0g"), chaebol = structure(c(0, 
0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 
1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 
0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 
0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 
0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 
0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 
0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 
0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 
1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 
0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 
1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 
1, 1, 0, 0, 0), format.stata = "%8.0g"), asset = structure(c(18285348000, 
131784602000, 62865288000, 33838416000, 6360425000, 541330024000, 
22984591000, 132764602000, 213239161000, 6541576000, 49222411000, 
14139029000, 21186479000, 24339944000, 28434461000, 34758175000, 
17158988000, 15005232000, 217819358000, 25009066000, 58045333000, 
36533137000, 41099893000, 57162344000, 83874586000, 44872170000, 
45377311000, 28931137000, 30997921000, 6330526000, 50391070000, 
16471520000, 44011174000, 130526305000, 47629107000, 16930929000, 
76886460000, 29793379000, 110228999000, 4901957000, 13125490000, 
32708711000, 46811999000, 26297430000, 159598422000, 36722653000, 
14065113000, 299788090000, 4559409000, 16989303000, 5749583000, 
11320038000, 7566338000, 375368800000, 72924040000, 255463903000, 
36229805000, 84495668000, 147094202000, 134657126000, 18782347000, 
253086691000, 13385963000, 16256468000, 23786698000, 47589241000, 
8763943000, 187501484000, 6170263000, 93930236000, 91398637000, 
791687216000, 30833118000, 25927689000, 33738516000, 38081496000, 
20336333000, 74751893000, 191479919000, 10653474000, 6876018000, 
13974417000, 46282129000, 17031189000, 363957318000, 108913679000, 
99681110000, 12349372000, 153999084000, 63909181000, 19277127000, 
81424045000, 11976339000, 40404825000, 13215386000, 25694216000, 
218146702000, 50509180000, 46809912000, 8186674000, 26102731000, 
261698899000, 16279150000, 76399748000, 72255263000, 84773886000, 
53674439000, 20085234000, 9232070000, 12270735000, 152733380000, 
13605821000, 290913632000, 28622103000, 9080301000, 28835122000, 
898727627000, 79616356000, 17597661000, 37693427000, 70049637000, 
39413617000, 29126200000, 8434708000, 36338306000, 18034628000, 
17198072000, 62211779000, 112464191000, 37197557000, 25438818000, 
206704722000, 45275691000, 141347351000, 108806329000, 306891682000, 
6972372000, 38738858000, 55206596000, 895706255000, 32565599000, 
114178659000, 56217409000, 349187101000, 46372238000, 73244335000, 
146703938000, 84758892000, 76188179000, 51280724000, 44309538000, 
65066954000, 196115732000, 52854113000, 75028310000, 47956445000, 
40110855000, 140101705000, 27473652000, 4121326000, 25702217000, 
40537799000, 246498647000, 19932395000, 33344051000, 5409390000, 
24963711000, 7037719000, 235262813000, 40815511000, 66612981000, 
16495717000, 163887186000, 6185102000, 18941997000, 49019044000, 
40537839000, 17564451000, 49208650000, 9704707000, 28922397000, 
34829185000, 10982438000, 9184309000, 6185990000, 16737065000, 
31329834000, 644676975000, 88048087000, 26251573000, 27469120000, 
7539382000, 49168982000, 57080458000, 99468197000, 33985447000, 
295995241000, 16875791000, 209381233000, 57214594000, 65376644000, 
50187232000, 27010755000, 52298727000, 43373300000, 86486024000, 
61934565000, 5454213000, 877343839000, 805364332000, 387369573000, 
48176811000, 17128437000, 28948285000, 11612665000, 28047437000, 
49375445000, 11628617000, 8682362000, 4047181000, 8839565000, 
14301022000, 180493246000, 5770236000, 41816496000, 781158427000, 
116189234000, 48910197000, 87905113000, 292031399000, 267092157000, 
131779013000, 16964336000, 62884490000, 334764308000, 25365019000, 
4516634000, 36124923000, 24881240000, 156283629000, 51135551000, 
15585793000, 27925392000, 15710872000, 29351467000, 132157887000, 
163214030000, 157749355000, 15887310000, 22510695000, 16378770000, 
38660509000, 109948441000, 7883521000, 31927561000, 16596812000, 
84148272000, 36350911000, 19811755000, 26648455000, 13662286000, 
38018537000, 410847930000, 85228778000, 7427020000, 49662424000, 
3784861000, 7163233000, 34358626000, 8669466000, 8562206000, 
31810724000, 50925801000, 55307038000, 19619887000, 74557346000, 
21591028000, 10828948000, 7466092000, 191497268000, 18034235000, 
91374424000, 3689361000, 111727361000, 31199301000, 8270699000, 
88922503000, 12778574000, 272883005000, 11747181000, 27675400000, 
3093261000, 428329114000, 222219204000, 65271110000, 197553636000, 
38142678000, 22150240000, 9740877000, 13121069000), format.stata = ".0g")), row.names = c(NA, 
-300L), class = c("tbl_df", "tbl", "data.frame"))

CodePudding user response:

A possible solution is:

library(tidyverse)

df %>% 
  group_by(year) %>% 
  summarise(ind=(slice_max(df,asset,n = 5, with_ties=F) %>% select(asset) %>% sum)
             / sum(asset), .groups = "drop")

#> # A tibble: 3 × 2
#>    year   ind
#>   <dbl> <dbl>
#> 1  1980 0.696
#> 2  1981 0.436
#> 3  1982 0.427

CodePudding user response:

There isn't a sale column in you original df. So I guess you want the use asset instead. You need to arrange df in descending order for your code to get the top 5 assets (you original code use tail()). Also, I would use summarize() and not mutate().

df_group = tbl %>% group_by(year) %>%
        arrange(desc(asset)) %>%
        summarize(fivefirm = sum(tail(asset,5))/sum(asset))
  • Related