I want to compute the index called HHI and plot its time trend. X-axis is year and Y-axis is HHI. On the single plane, I want to put a single trend without considering groups and at the same time plot each trend line depending on groups. My initial try is the following, but it does not work. Any help?
df_new <- df %>%
group_by(year) %>%
mutate(share = (asset/sum(asset)) * 100,
year = as_factor(year)) %>%
summarize(hhi = sum(share^2))
df_new %>%
ggplot()
geom_line(aes(x = year, y = hhi))
structure(list(kiscode = structure(c("350257", "500607", "351253",
"320498", "351156", "360252", "360317", "360228", "380911", "381152",
"310140", "310166", "370274", "310026", "350648", "310123", "350117",
"380806", "320129", "340251", "370118", "370177", "370150", "360287",
"360090", "380784", "351350", "370053", "310190", "381160", "380938",
"380733", "310590", "351067", "310476", "350451", "310174", "340324",
"310433", "350320", "381250", "310131", "320099", "340154", "320218",
"350591", "380989", "380954", "380474", "360155", "320293", "350729",
"320536", "350940", "320242", "380725", "320692", "380377", "370070",
"320730", "350478", "380407", "370088", "350745", "350354", "350893",
"370355", "370266", "350281", "320234", "350826", "351245", "330019",
"350664", "100013", "320102", "350605", "381519", "320234", "320536",
"381497", "350427", "310476", "370169", "350940", "360090", "380377",
"320307", "351067", "310239", "380989", "360341", "350672", "320692",
"381225", "310026", "500607", "380784", "370177", "360279", "350605",
"320129", "350729", "350893", "351253", "380180", "370150", "350478",
"350044", "310247", "370070", "381250", "380725", "380644", "350109",
"350117", "382892", "320242", "351075", "320498", "380938", "360309",
"350354", "350508", "340251", "310166", "350257", "320099", "381519",
"350338", "370274", "310425", "340324", "360228", "350826", "380407",
"320293", "330019", "320684", "351245", "350664", "310433", "310204",
"380954", "350222", "370118", "320730", "310174", "352691", "351350",
"350591", "360244", "320218", "380130", "320722", "310123", "351229",
"320226", "380075", "351202", "380733", "310590", "380911", "350451",
"320102", "351270", "360317", "351237", "370266", "310131", "310140",
"370088", "320706", "350320", "360155", "380458", "310190", "351040",
"380148", "381160", "350648", "100013", "350281", "351156", "381152",
"350745", "370053", "360252", "360287", "340154", "350605", "350109",
"380148", "320684", "350893", "350354", "500607", "350990", "320218",
"310476"), format.stata = "%6s"), year = structure(c(1980, 1980,
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980,
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980,
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980,
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980,
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980,
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980,
1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1981,
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981,
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981,
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981,
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981,
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981,
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981,
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981,
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981,
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981,
1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981, 1981,
1981, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982, 1982
), format.stata = "%9.0g"), group = structure(c(0, 0, 0, 1,
0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1,
0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0,
0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0,
1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0,
0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0,
0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1,
0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0,
0, 1, 0, 0, 0, 1, 1), format.stata = "%8.0g"), asset = structure(c(18285348000,
131784602000, 62865288000, 33838416000, 6360425000, 541330024000,
22984591000, 132764602000, 213239161000, 6541576000, 49222411000,
14139029000, 21186479000, 24339944000, 28434461000, 34758175000,
17158988000, 15005232000, 217819358000, 25009066000, 58045333000,
36533137000, 41099893000, 57162344000, 83874586000, 44872170000,
45377311000, 28931137000, 30997921000, 6330526000, 50391070000,
16471520000, 44011174000, 130526305000, 47629107000, 16930929000,
76886460000, 29793379000, 110228999000, 4901957000, 13125490000,
32708711000, 46811999000, 26297430000, 159598422000, 36722653000,
14065113000, 299788090000, 4559409000, 16989303000, 5749583000,
11320038000, 7566338000, 375368800000, 72924040000, 255463903000,
36229805000, 84495668000, 147094202000, 134657126000, 18782347000,
253086691000, 13385963000, 16256468000, 23786698000, 47589241000,
8763943000, 187501484000, 6170263000, 93930236000, 91398637000,
791687216000, 30833118000, 25927689000, 33738516000, 38081496000,
20336333000, 74751893000, 191479919000, 10653474000, 6876018000,
13974417000, 46282129000, 17031189000, 363957318000, 108913679000,
99681110000, 12349372000, 153999084000, 63909181000, 19277127000,
81424045000, 11976339000, 40404825000, 13215386000, 25694216000,
218146702000, 50509180000, 46809912000, 8186674000, 26102731000,
261698899000, 16279150000, 76399748000, 72255263000, 84773886000,
53674439000, 20085234000, 9232070000, 12270735000, 152733380000,
13605821000, 290913632000, 28622103000, 9080301000, 28835122000,
898727627000, 79616356000, 17597661000, 37693427000, 70049637000,
39413617000, 29126200000, 8434708000, 36338306000, 18034628000,
17198072000, 62211779000, 112464191000, 37197557000, 25438818000,
206704722000, 45275691000, 141347351000, 108806329000, 306891682000,
6972372000, 38738858000, 55206596000, 895706255000, 32565599000,
114178659000, 56217409000, 349187101000, 46372238000, 73244335000,
146703938000, 84758892000, 76188179000, 51280724000, 44309538000,
65066954000, 196115732000, 52854113000, 75028310000, 47956445000,
40110855000, 140101705000, 27473652000, 4121326000, 25702217000,
40537799000, 246498647000, 19932395000, 33344051000, 5409390000,
24963711000, 7037719000, 235262813000, 40815511000, 66612981000,
16495717000, 163887186000, 6185102000, 18941997000, 49019044000,
40537839000, 17564451000, 49208650000, 9704707000, 28922397000,
34829185000, 10982438000, 9184309000, 6185990000, 16737065000,
31329834000, 644676975000, 88048087000, 26251573000, 27469120000,
7539382000, 49168982000, 57080458000, 99468197000, 33985447000,
295995241000, 16875791000, 209381233000, 57214594000), format.stata = ".0g")), row.names = c(NA,
-200L), class = c("tbl_df", "tbl", "data.frame"))
CodePudding user response:
You cannot set the date as a factor and plot a line through it. Maybe keep it as numeric and set the breaks:
df_group = df %>% group_by(year,group) %>%
mutate(share = (asset/sum(asset)) * 100) %>%
summarize(hhi = sum(share^2)) %>%
mutate(group = factor(group))
ggplot(df_group,aes(x=year,y=hhi,col=group))
geom_line() scale_x_continuous(breaks = unique(df$year))
Then to add the overall:
df_all = df %>% group_by(year) %>%
mutate(share = (asset/sum(asset)) * 100) %>%
summarize(hhi = sum(share^2)) %>%
mutate(group = "all")
ggplot(rbind(df_group,df_all),
aes(x=year,y=hhi,col=factor(group)))
geom_line() scale_x_continuous(breaks = unique(df$year))
You can also consider converting it to date, see something like this link