Home > Software engineering >  Finding the best possible combinations based on multiple conditions with R dplyr
Finding the best possible combinations based on multiple conditions with R dplyr

Time:10-17

My goal


My goal is to find, based on the index, the best combinations of 10 players that have a score sum between 95.5-100.4.

The detail


There is an important detail. From the 10 players, there should be based on the role column 2C (two Centers), 4F (four Forwards), 4*G (four Guards).

Atm I am struggling with for loops, but I am sure there is something bright in the dplyr package that I am missing. Any help or guidance is highly appreciated.

set.seed(123)
players <- paste("player",rep(1:20))
score <- runif(20, min=4, max=16.7)
index <- runif(20, min=-1, max=9)
role <- rep(c("C","F","F","G","G"),4)

df <- data.frame(players, score, index,role)
df
#>      players     score      index role
#> 1   player 1  7.652235  7.8953932    C
#> 2   player 2 14.011475  5.9280341    F
#> 3   player 3  9.194007  5.4050681    F
#> 4   player 4 15.214321  8.9426978    G
#> 5   player 5 15.943935  5.5570580    G
#> 6   player 6  4.578568  6.0853047    C
#> 7   player 7 10.706940  4.4406602    F
#> 8   player 8 15.333722  4.9414202    F
#> 9   player 9 11.003225  1.8915974    G
#> 10 player 10  9.799007  0.4711365    G
#> 11 player 11 16.151783  8.6302423    C
#> 12 player 12  9.757344  8.0229905    F
#> 13 player 13 12.605147  5.9070528    F
#> 14 player 14 11.272444  6.9546742    G
#> 15 player 15  5.307143 -0.7538632    G
#> 16 player 16 15.427777  3.7779597    C
#> 17 player 17  7.125314  6.5845954    F
#> 18 player 18  4.534156  1.1640794    F
#> 19 player 19  8.164593  2.1818101    G
#> 20 player 20 16.122196  1.3162579    G

Created on 2021-10-16 by the reprex package (v2.0.1)

Thank you for your time

UPDATE:


My logic so far is:

  1. to transpose my df
df <- as.data.frame(t(df))
  1. and create all possible combinations of 10 players
combn(df, 10, simplify=FALSE)

Now I need to select the list which the right roles and sum between 95.5-100.4. Damn there should be a more clever way.

CodePudding user response:

I used data.table. choose(4, 2) * choose(8, 4) * choose(8, 4) = 29400 combinations. Basically brute force. I'm sure there's a more elegant solution.

library(data.table)
library(magrittr)

set.seed(123)
players <- paste("player",rep(1:20))
score <- runif(20, min=4, max=16.7)
index <- runif(20, min=-1, max=9)
role <- rep(c("C","F","F","G","G"),4)

dt <- data.table(players, score, index,role)
centers <- dt[role == "C"]
forwards <- dt[role == "F"]
guards <- dt[role == "G"]
c_combos <- combn(nrow(centers), 2) %>% t() %>% as.data.table()
c_combos <- lapply(c_combos, function(x) centers[x])
c_combos <- data.table(c_combos[[1]]$players, c_combos[[2]]$players, 
                       c_combos[[1]]$score   c_combos[[2]]$score,
                       c_combos[[1]]$index   c_combos[[2]]$index) %>% 
  setnames(c('C1', 'C2', 'score_c', 'index_c'))
f_combos <- combn(nrow(forwards), 4) %>% t() %>% as.data.table()
f_combos <- lapply(f_combos, function(x) forwards[x])
f_combos <- data.table(f_combos[[1]]$players, f_combos[[2]]$players, 
                       f_combos[[3]]$players, f_combos[[4]]$players,
                       f_combos[[1]]$score   f_combos[[2]]$score   
                         f_combos[[3]]$score   f_combos[[4]]$score,
                       f_combos[[1]]$index   f_combos[[2]]$index   
                         f_combos[[3]]$index   f_combos[[4]]$index) %>% 
  setnames(c('F1', 'F2', 'F3', 'F4', 'score_f', 'index_f'))
g_combos <- combn(nrow(guards), 4) %>% t() %>% as.data.table()
g_combos <- lapply(g_combos, function(x) guards[x])
g_combos <- data.table(g_combos[[1]]$players, g_combos[[2]]$players, 
                       g_combos[[3]]$players, g_combos[[4]]$players,
                       g_combos[[1]]$score   g_combos[[2]]$score   
                         g_combos[[3]]$score   g_combos[[4]]$score,
                       g_combos[[1]]$index   g_combos[[2]]$index   
                         g_combos[[3]]$index   g_combos[[4]]$index) %>% 
  setnames(c('G1', 'G2', 'G3', 'G4', 'score_g', 'index_g'))

combined <- expand.grid(1:nrow(c_combos), 1:nrow(f_combos), 1:nrow(g_combos))
ans <- rbindlist(lapply(1:nrow(combined), 
                               function(x) data.table(c_combos[combined$Var1[x]],
                                                 f_combos[combined$Var2[x]],
                                                 g_combos[combined$Var3[x]])))
ans[, score := score_c   score_f   score_g]
ans[, index := index_c   index_f   index_g]
ans[, c('score_c', 'score_f', 'score_g', 'index_c', 'index_f', 'index_g') := NULL]
ans[score            
  • Related