I have creater a dataframe as example for different stocks (S1-S10) and their returns.
dates S1 S2 S3 S4 S5 S6 S7 S8 S9 S10
1 1990 0.22348250 0.060841072 0.045310892 0.25138670 0.114635775 0.06274749 0.32943865 0.11360111 0.007917013 0.198302156
2 1991 0.03936638 0.038865494 0.131434096 0.03934674 0.230446325 0.03907835 0.06295245 0.16572889 0.076442559 0.294755835
3 1992 0.23761437 0.124962186 0.270411728 0.03052463 0.084085012 0.04361429 0.30395396 0.00547834 0.246511480 0.002167857
4 1993 0.02152955 0.026277704 0.558839796 0.25456037 0.009149054 0.04792050 0.04207311 0.01600126 0.010868678 0.023984114
5 1994 0.20228639 0.128382838 0.021868270 0.25458375 0.053629440 0.03955403 0.07121921 0.04028781 0.096863998 0.029832491
6 1995 0.19152864 0.130068221 0.049160473 0.20355840 0.182559298 0.03596715 0.03641926 0.06129930 0.126346577 0.054713422
7 1996 0.23218175 0.111162658 0.108207594 0.11787836 0.011782798 0.27190237 0.14576307 0.01161600 0.014724020 0.080137804
8 1997 0.21180905 0.041096120 0.110944871 0.12150402 0.162546013 0.07068482 0.15903466 0.25115971 0.066643069 0.077361949
9 1998 0.05481769 0.023742951 0.043425701 0.01497890 0.042790468 0.14766542 0.02918536 0.30227679 0.033345436 0.718680714
10 1999 0.01303581 0.008666384 0.006056669 0.01638333 0.014752510 0.01055621 0.04093158 0.11050919 0.133858318 0.166623380
Additional, I have a list with Scores (example) for each stock and each year. Every year I will sort the stocks (S1-S10) according to their Score. So for the first year, the sorted table looks as follows:
S3 S5 S9 S1 S4 S6 S7 S2 S8 S10
1990 0.1717736 0.1334458 0.06098671 0.05461788 0.05420852 0.04970982 0.04710081 0.04012442 0.04006076 0.02960202
Furthermore, I created a table with the top3 Stocks according to the Score.
ISIN
1 S3
2 S5
3 S9
My aim is now to get the returns for the 3 Stocks (S3,S5,S9) for the first year from the first table with all the returns. How can I do that in a simple way which is also able to include in a loop for all the other years.
Thank your very much for your help!
CodePudding user response:
If you have a table of returns and a table of scores, you can do something like this:
library(data.table)
setDT(returns)
setDT(scores)
merge(
melt(returns,id="dates",variable.name = "stock", value.name = "return"),
melt(scores, id="dates",variable.name = "stock", value.name="score")[order(dates,-score)][, .SD[1:3], by=.(dates)][, score_rank:=rep(1:3,10)],
by=c("dates", "stock")
)
Equivalent tidyverse option
inner_join(
returns %>% pivot_longer(cols = !dates,names_to = "stock",values_to="return"),
scores %>% pivot_longer(cols=!dates,names_to="stock", values_to = "score") %>%
arrange(dates,desc(score)) %>%
group_by(dates) %>%
slice_head(n = 3) %>%
mutate(score_rank=1:3),
by=c("dates", "stock")
)
Output:
dates stock return score score_rank
<int> <fctr> <num> <num> <int>
1: 1990 S3 0.045310892 0.17177360 1
2: 1990 S5 0.114635775 0.13344580 2
3: 1990 S9 0.007917013 0.06098671 3
4: 1991 S1 0.039366380 0.67436839 2
5: 1991 S7 0.062952450 0.64310993 3
6: 1991 S10 0.294755835 0.72593338 1
7: 1992 S2 0.124962186 0.94091606 2
8: 1992 S4 0.030524630 0.91537706 3
9: 1992 S6 0.043614290 0.95210009 1
10: 1993 S3 0.558839796 0.54894898 3
11: 1993 S4 0.254560370 0.88473749 1
12: 1993 S9 0.010868678 0.85222319 2
13: 1994 S3 0.021868270 0.72819013 3
14: 1994 S9 0.096863998 0.88647338 2
15: 1994 S10 0.029832491 0.89224489 1
16: 1995 S1 0.191528640 0.52342368 3
17: 1995 S9 0.126346577 0.85203851 1
18: 1995 S10 0.054713422 0.68035484 2
19: 1996 S3 0.108207594 0.99714671 1
20: 1996 S9 0.014724020 0.95571008 2
21: 1996 S10 0.080137804 0.93393686 3
22: 1997 S6 0.070684820 0.80203993 1
23: 1997 S9 0.066643069 0.78962934 2
24: 1997 S10 0.077361949 0.76333522 3
25: 1998 S2 0.023742951 0.95326491 2
26: 1998 S4 0.014978900 0.96919113 1
27: 1998 S7 0.029185360 0.85824055 3
28: 1999 S1 0.013035810 0.76017244 3
29: 1999 S6 0.010556210 0.77262997 2
30: 1999 S7 0.040931580 0.79878399 1
dates stock return score score_rank
Input
library(data.table)
returns = structure(list(dates = 1990:1999, S1 = c(0.2234825, 0.03936638,
0.23761437, 0.02152955, 0.20228639, 0.19152864, 0.23218175, 0.21180905,
0.05481769, 0.01303581), S2 = c(0.060841072, 0.038865494, 0.124962186,
0.026277704, 0.128382838, 0.130068221, 0.111162658, 0.04109612,
0.023742951, 0.008666384), S3 = c(0.045310892, 0.131434096, 0.270411728,
0.558839796, 0.02186827, 0.049160473, 0.108207594, 0.110944871,
0.043425701, 0.006056669), S4 = c(0.2513867, 0.03934674, 0.03052463,
0.25456037, 0.25458375, 0.2035584, 0.11787836, 0.12150402, 0.0149789,
0.01638333), S5 = c(0.114635775, 0.230446325, 0.084085012, 0.009149054,
0.05362944, 0.182559298, 0.011782798, 0.162546013, 0.042790468,
0.01475251), S6 = c(0.06274749, 0.03907835, 0.04361429, 0.0479205,
0.03955403, 0.03596715, 0.27190237, 0.07068482, 0.14766542, 0.01055621
), S7 = c(0.32943865, 0.06295245, 0.30395396, 0.04207311, 0.07121921,
0.03641926, 0.14576307, 0.15903466, 0.02918536, 0.04093158),
S8 = c(0.11360111, 0.16572889, 0.00547834, 0.01600126, 0.04028781,
0.0612993, 0.011616, 0.25115971, 0.30227679, 0.11050919),
S9 = c(0.007917013, 0.076442559, 0.24651148, 0.010868678,
0.096863998, 0.126346577, 0.01472402, 0.066643069, 0.033345436,
0.133858318), S10 = c(0.198302156, 0.294755835, 0.002167857,
0.023984114, 0.029832491, 0.054713422, 0.080137804, 0.077361949,
0.718680714, 0.16662338)), row.names = c(NA, -10L), class = "data.frame")
scores = structure(list(dates = c(1990, 1991, 1992, 1993, 1994, 1995,
1996, 1997, 1998, 1999), S3 = c(0.1717736, 0.0826668301597238,
0.150193300796673, 0.548948981100693, 0.728190132649615, 0.460348989348859,
0.99714671052061, 0.570633312221617, 0.152109869290143, 0.165023590438068
), S5 = c(0.1334458, 0.203297924017534, 0.335005605360493, 0.512913924409077,
0.641863208264112, 0.474032786209136, 0.105274769943208, 0.261479944223538,
0.231785625452176, 0.432228550780565), S9 = c(0.06098671, 0.141129324445501,
0.633016546489671, 0.852223185822368, 0.886473377235234, 0.852038505952805,
0.95571007928811, 0.789629340171814, 0.632387412479147, 0.634164091199636
), S1 = c(0.05461788, 0.674368392676115, 0.821303177857772, 0.225072045112029,
0.319124778266996, 0.523423680569977, 0.0977244963869452, 0.230580852134153,
0.164935799082741, 0.760172437177971), S4 = c(0.05420852, 0.395722589921206,
0.915377061348408, 0.884737493470311, 0.0933721493929625, 0.349144774954766,
0.883127712178975, 0.617036611540243, 0.969191126758233, 0.536874044453725
), S6 = c(0.04970982, 0.450498258927837, 0.952100092312321, 0.273910752963275,
0.35602370928973, 0.327704650117084, 0.521468475693837, 0.802039931761101,
0.557341758161783, 0.772629965795204), S7 = c(0.04710081, 0.643109925789759,
0.419676352757961, 0.200271958485246, 0.0809331068303436, 0.344844142906368,
0.683793851174414, 0.187082261545584, 0.858240547589958, 0.798783992184326
), S2 = c(0.04012442, 0.238682345021516, 0.94091605511494, 0.0884579077828676,
0.0896228931378573, 0.204645883990452, 0.561662087915465, 0.746485061943531,
0.953264913987368, 0.728270860854536), S8 = c(0.04006076, 0.118304285453632,
0.784008525079116, 0.0789051840547472, 0.642171601299196, 0.421911529498175,
0.327139612054452, 0.517780556110665, 0.449964761035517, 0.367198353400454
), S10 = c(0.02960202, 0.725933377398178, 0.0461170475464314,
0.509178411681205, 0.892244888702407, 0.680354839190841, 0.933936856221408,
0.763335221679881, 0.473462641937658, 0.111220514168963)), row.names = c(NA,
-10L), class = "data.frame")