How do i find all possible permutations of the first 1-2 letters of each word?-CodePudding

I have the following data:

inputs <- c("Master", "Bachelor", "School")

I would like to have all possible permutations of the first 1-2 letters of each word.

first_letter <- sapply(inputs, substr, start = 1, stop = 1)
"M" "B"  "S"

second_letter <- sapply(inputs, substr, start = 1, stop = 2)
"Ma"     "Ba"     "Sc"

Desired output:

All permutations of the first letters in every order, see the columns of variable "all_order" (see section "What i tried"). Also in both variations, so either take the first value of "first_letter" or first value "second_letter" but not both at the same time.

MBaS, MBS, MBSc, MBaSc MaBaS, MaBS, MaBSc, MaBaSc,

SBM, SBaM, SBaMa, SBaM ScBM, ScBaM, ScBaMa, ScBaM

BSM, BSMa, BScM, BScMa BaSM, BaSMa, BaScM, BaScMa,

.....

(Let me know if it is explained well enough.)

What i tried:

combs <- combn(rep(seq(inputs), 2), 3)
keep <- !colSums(apply(combs, 2, duplicated))
all_order <- combs[, keep]

all_order
     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,]    1    1    1    1    2    2    3    1
[2,]    2    2    3    2    3    1    1    2
[3,]    3    3    2    3    1    3    2    3

CodePudding user response：

Maybe we can try the code below

d <- do.call(
  rbind,
  combn(
    c(first_letter, second_letter),
    3,
    pracma::perms,
    simplify = FALSE
  )
)

res <- do.call(
  paste0,
  data.frame(d)
)[apply(
  `dim<-`(match(substr(d, 1, 1), first_letter), dim(d)),
  1, 
  function(x) all(!duplicated(x))
)]

which gives

> res
 [1] "SBM"    "SMB"    "BSM"    "BMS"    "MBS"    "MSB"    "ScBM"   "ScMB"  
 [9] "BScM"   "BMSc"   "MBSc"   "MScB"   "BaSM"   "BaMS"   "SBaM"   "SMBa"
[17] "MSBa"   "MBaS"   "ScBaM"  "ScMBa"  "BaScM"  "BaMSc"  "MBaSc"  "MScBa"
[25] "MaSB"   "MaBS"   "SMaB"   "SBMa"   "BSMa"   "BMaS"   "ScMaB"  "ScBMa"
[33] "MaScB"  "MaBSc"  "BMaSc"  "BScMa"  "BaMaS"  "BaSMa"  "MaBaS"  "MaSBa"
[41] "SMaBa"  "SBaMa"  "ScBaMa" "ScMaBa" "BaScMa" "BaMaSc" "MaBaSc" "MaScBa"

CodePudding user response：

We'll use gtools::permutations to calculate the ... permutations of inputs, and then use expand.grid to show all combinations within them.

First, we can do it easily on one order of the inputs with:

expand.grid(c("M","Ma"), c("B","Ba"), c("S","Sc"))
#   Var1 Var2 Var3
# 1    M    B    S
# 2   Ma    B    S
# 3    M   Ba    S
# 4   Ma   Ba    S
# 5    M    B   Sc
# 6   Ma    B   Sc
# 7    M   Ba   Sc
# 8   Ma   Ba   Sc
do.call(paste, c(expand.grid(c("M","Ma"), c("B","Ba"), c("S","Sc")), sep = ""))
# [1] "MBS"    "MaBS"   "MBaS"   "MaBaS"  "MBSc"   "MaBSc"  "MBaSc"  "MaBaSc"

Now that's one order (M < B < S), now we need to rearrange them. We might call all orderings manually, or we can use gtools::permutations to help.

inputlist <- lapply(inputs, substring, 1, 1:2)
str(inputlist)
# List of 3
#  $ : chr [1:2] "M" "Ma"
#  $ : chr [1:2] "B" "Ba"
#  $ : chr [1:2] "S" "Sc"

perms <- gtools::permutations(3, 3)
perms
#      [,1] [,2] [,3]
# [1,]    1    2    3
# [2,]    1    3    2
# [3,]    2    1    3
# [4,]    2    3    1
# [5,]    3    1    2
# [6,]    3    2    1
inputlist[perms[2,]]
# [[1]]
# [1] "M"  "Ma"
# [[2]]
# [1] "S"  "Sc"
# [[3]]
# [1] "B"  "Ba"
inputlist[perms[3,]]
# [[1]]
# [1] "B"  "Ba"
# [[2]]
# [1] "M"  "Ma"
# [[3]]
# [1] "S"  "Sc"

Resulting in

allperms <- do.call(rbind,
  apply(gtools::permutations(3, 3), 1, 
        function(ind) do.call(expand.grid, inputlist[ind]))
)
head(allperms); tail(allperms)
#   Var1 Var2 Var3
# 1    M    B    S
# 2   Ma    B    S
# 3    M   Ba    S
# 4   Ma   Ba    S
# 5    M    B   Sc
# 6   Ma    B   Sc
#    Var1 Var2 Var3
# 43    S   Ba    M
# 44   Sc   Ba    M
# 45    S    B   Ma
# 46   Sc    B   Ma
# 47    S   Ba   Ma
# 48   Sc   Ba   Ma
do.call(paste, c(allperms, list(sep = "")))
#  [1] "MBS"    "MaBS"   "MBaS"   "MaBaS"  "MBSc"   "MaBSc"  "MBaSc"  "MaBaSc" "MSB"    "MaSB"   "MScB"  
# [12] "MaScB"  "MSBa"   "MaSBa"  "MScBa"  "MaScBa" "BMS"    "BaMS"   "BMaS"   "BaMaS"  "BMSc"   "BaMSc" 
# [23] "BMaSc"  "BaMaSc" "BSM"    "BaSM"   "BScM"   "BaScM"  "BSMa"   "BaSMa"  "BScMa"  "BaScMa" "SMB"   
# [34] "ScMB"   "SMaB"   "ScMaB"  "SMBa"   "ScMBa"  "SMaBa"  "ScMaBa" "SBM"    "ScBM"   "SBaM"   "ScBaM" 
# [45] "SBMa"   "ScBMa"  "SBaMa"  "ScBaMa"

CodePudding user response：

You can use the function permutations from package e1071 (Misc Functions of the Department of Statistics, Probability Theory Group (Formerly: E1071)).

library(e1071)

res <- c(substr(inputs,1,1), substr(inputs,1,2))
res
[1] "M"  "B"  "S"  "Ma" "Ba" "Sc"

perm <- unique(matrix(e1071::permutations(6), ncol=3))

# to exclude repetitions find 1,4 2,5 and 3,6
apply(matrix(res[perm], ncol=3), 1, paste, collapse="")[
  rowSums(cbind(rowSums(perm == 1 | perm == 4)==2,
                rowSums(perm == 2 | perm == 5)==2,
                rowSums(perm == 3 | perm == 6)==2))==0]
 [1] "MSBa"   "SMBa"   "SMaBa"  "MaSBa"  "SMaB"   "BMaS"   "SMB"    "SBM"   
 [9] "MBS"    "BMS"    "BSM"    "MSB"    "MaBS"   "MaSB"   "BSMa"   "SBMa"  
[17] "SBaM"   "MBaS"   "MaBaS"  "SBaMa"  "BaSMa"  "BaSM"   "BaMS"   "BaMaS" 
[25] "BMaSc"  "MBSc"   "BMSc"   "MaBSc"  "MBaSc"  "MaBaSc" "BaMSc"  "BaMaSc"
[33] "MaScB"  "BScM"   "MScB"   "BScMa"  "BaScM"  "BaScMa" "MScBa"  "MaScBa"
[41] "ScBMa"  "ScMB"   "ScBM"   "ScMaB"  "ScMBa"  "ScMaBa" "ScBaM"  "ScBaMa"

Alternatively you can also construct the permutations with base R

res <- c(substr(inputs,1,1), substr(inputs,1,2))
res
[1] "M"  "B"  "S"  "Ma" "Ba" "Sc"

perm <- as.matrix(expand.grid(1:6,1:6,1:6))

perm <- perm[colSums(apply(perm, 1, duplicated))==0,]

# to exclude repetitions find 1,4 2,5 and 3,6
apply(matrix(res[perm], ncol=3), 1, paste, collapse="")[
  rowSums(cbind(rowSums(perm == 1 | perm == 4)==2,
                rowSums(perm == 2 | perm == 5)==2,
                rowSums(perm == 3 | perm == 6)==2))==0]
 [1] "SBM"    "ScBM"   "BSM"    "BaSM"   "SBaM"   "ScBaM"  "BScM"   "BaScM" 
 [9] "SMB"    "ScMB"   "MSB"    "MaSB"   "SMaB"   "ScMaB"  "MScB"   "MaScB" 
[17] "BMS"    "BaMS"   "MBS"    "MaBS"   "BMaS"   "BaMaS"  "MBaS"   "MaBaS" 
[25] "SBMa"   "ScBMa"  "BSMa"   "BaSMa"  "SBaMa"  "ScBaMa" "BScMa"  "BaScMa"
[33] "SMBa"   "ScMBa"  "MSBa"   "MaSBa"  "SMaBa"  "ScMaBa" "MScBa"  "MaScBa"
[41] "BMSc"   "BaMSc"  "MBSc"   "MaBSc"  "BMaSc"  "BaMaSc" "MBaSc"  "MaBaSc"