I have the following data:
inputs <- c("Master", "Bachelor", "School")
I would like to have all possible permutations of the first 1-2 letters of each word.
first_letter <- sapply(inputs, substr, start = 1, stop = 1)
"M" "B" "S"
second_letter <- sapply(inputs, substr, start = 1, stop = 2)
"Ma" "Ba" "Sc"
Desired output:
All permutations of the first letters in every order, see the columns of variable "all_order" (see section "What i tried"). Also in both variations, so either take the first value of "first_letter" or first value "second_letter" but not both at the same time.
MBaS, MBS, MBSc, MBaSc MaBaS, MaBS, MaBSc, MaBaSc,
SBM, SBaM, SBaMa, SBaM ScBM, ScBaM, ScBaMa, ScBaM
BSM, BSMa, BScM, BScMa BaSM, BaSMa, BaScM, BaScMa,
.....
(Let me know if it is explained well enough.)
What i tried:
combs <- combn(rep(seq(inputs), 2), 3)
keep <- !colSums(apply(combs, 2, duplicated))
all_order <- combs[, keep]
all_order
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] 1 1 1 1 2 2 3 1
[2,] 2 2 3 2 3 1 1 2
[3,] 3 3 2 3 1 3 2 3
CodePudding user response:
Maybe we can try the code below
d <- do.call(
rbind,
combn(
c(first_letter, second_letter),
3,
pracma::perms,
simplify = FALSE
)
)
res <- do.call(
paste0,
data.frame(d)
)[apply(
`dim<-`(match(substr(d, 1, 1), first_letter), dim(d)),
1,
function(x) all(!duplicated(x))
)]
which gives
> res
[1] "SBM" "SMB" "BSM" "BMS" "MBS" "MSB" "ScBM" "ScMB"
[9] "BScM" "BMSc" "MBSc" "MScB" "BaSM" "BaMS" "SBaM" "SMBa"
[17] "MSBa" "MBaS" "ScBaM" "ScMBa" "BaScM" "BaMSc" "MBaSc" "MScBa"
[25] "MaSB" "MaBS" "SMaB" "SBMa" "BSMa" "BMaS" "ScMaB" "ScBMa"
[33] "MaScB" "MaBSc" "BMaSc" "BScMa" "BaMaS" "BaSMa" "MaBaS" "MaSBa"
[41] "SMaBa" "SBaMa" "ScBaMa" "ScMaBa" "BaScMa" "BaMaSc" "MaBaSc" "MaScBa"
CodePudding user response:
We'll use gtools::permutations
to calculate the ... permutations of inputs
, and then use expand.grid
to show all combinations within them.
First, we can do it easily on one order of the inputs with:
expand.grid(c("M","Ma"), c("B","Ba"), c("S","Sc"))
# Var1 Var2 Var3
# 1 M B S
# 2 Ma B S
# 3 M Ba S
# 4 Ma Ba S
# 5 M B Sc
# 6 Ma B Sc
# 7 M Ba Sc
# 8 Ma Ba Sc
do.call(paste, c(expand.grid(c("M","Ma"), c("B","Ba"), c("S","Sc")), sep = ""))
# [1] "MBS" "MaBS" "MBaS" "MaBaS" "MBSc" "MaBSc" "MBaSc" "MaBaSc"
Now that's one order (M
< B
< S
), now we need to rearrange them. We might call all orderings manually, or we can use gtools::permutations
to help.
inputlist <- lapply(inputs, substring, 1, 1:2)
str(inputlist)
# List of 3
# $ : chr [1:2] "M" "Ma"
# $ : chr [1:2] "B" "Ba"
# $ : chr [1:2] "S" "Sc"
perms <- gtools::permutations(3, 3)
perms
# [,1] [,2] [,3]
# [1,] 1 2 3
# [2,] 1 3 2
# [3,] 2 1 3
# [4,] 2 3 1
# [5,] 3 1 2
# [6,] 3 2 1
inputlist[perms[2,]]
# [[1]]
# [1] "M" "Ma"
# [[2]]
# [1] "S" "Sc"
# [[3]]
# [1] "B" "Ba"
inputlist[perms[3,]]
# [[1]]
# [1] "B" "Ba"
# [[2]]
# [1] "M" "Ma"
# [[3]]
# [1] "S" "Sc"
Resulting in
allperms <- do.call(rbind,
apply(gtools::permutations(3, 3), 1,
function(ind) do.call(expand.grid, inputlist[ind]))
)
head(allperms); tail(allperms)
# Var1 Var2 Var3
# 1 M B S
# 2 Ma B S
# 3 M Ba S
# 4 Ma Ba S
# 5 M B Sc
# 6 Ma B Sc
# Var1 Var2 Var3
# 43 S Ba M
# 44 Sc Ba M
# 45 S B Ma
# 46 Sc B Ma
# 47 S Ba Ma
# 48 Sc Ba Ma
do.call(paste, c(allperms, list(sep = "")))
# [1] "MBS" "MaBS" "MBaS" "MaBaS" "MBSc" "MaBSc" "MBaSc" "MaBaSc" "MSB" "MaSB" "MScB"
# [12] "MaScB" "MSBa" "MaSBa" "MScBa" "MaScBa" "BMS" "BaMS" "BMaS" "BaMaS" "BMSc" "BaMSc"
# [23] "BMaSc" "BaMaSc" "BSM" "BaSM" "BScM" "BaScM" "BSMa" "BaSMa" "BScMa" "BaScMa" "SMB"
# [34] "ScMB" "SMaB" "ScMaB" "SMBa" "ScMBa" "SMaBa" "ScMaBa" "SBM" "ScBM" "SBaM" "ScBaM"
# [45] "SBMa" "ScBMa" "SBaMa" "ScBaMa"
CodePudding user response:
You can use the function permutations
from package e1071
(Misc Functions of the Department of Statistics, Probability
Theory Group (Formerly: E1071)).
library(e1071)
res <- c(substr(inputs,1,1), substr(inputs,1,2))
res
[1] "M" "B" "S" "Ma" "Ba" "Sc"
perm <- unique(matrix(e1071::permutations(6), ncol=3))
# to exclude repetitions find 1,4 2,5 and 3,6
apply(matrix(res[perm], ncol=3), 1, paste, collapse="")[
rowSums(cbind(rowSums(perm == 1 | perm == 4)==2,
rowSums(perm == 2 | perm == 5)==2,
rowSums(perm == 3 | perm == 6)==2))==0]
[1] "MSBa" "SMBa" "SMaBa" "MaSBa" "SMaB" "BMaS" "SMB" "SBM"
[9] "MBS" "BMS" "BSM" "MSB" "MaBS" "MaSB" "BSMa" "SBMa"
[17] "SBaM" "MBaS" "MaBaS" "SBaMa" "BaSMa" "BaSM" "BaMS" "BaMaS"
[25] "BMaSc" "MBSc" "BMSc" "MaBSc" "MBaSc" "MaBaSc" "BaMSc" "BaMaSc"
[33] "MaScB" "BScM" "MScB" "BScMa" "BaScM" "BaScMa" "MScBa" "MaScBa"
[41] "ScBMa" "ScMB" "ScBM" "ScMaB" "ScMBa" "ScMaBa" "ScBaM" "ScBaMa"
Alternatively you can also construct the permutations with base R
res <- c(substr(inputs,1,1), substr(inputs,1,2))
res
[1] "M" "B" "S" "Ma" "Ba" "Sc"
perm <- as.matrix(expand.grid(1:6,1:6,1:6))
perm <- perm[colSums(apply(perm, 1, duplicated))==0,]
# to exclude repetitions find 1,4 2,5 and 3,6
apply(matrix(res[perm], ncol=3), 1, paste, collapse="")[
rowSums(cbind(rowSums(perm == 1 | perm == 4)==2,
rowSums(perm == 2 | perm == 5)==2,
rowSums(perm == 3 | perm == 6)==2))==0]
[1] "SBM" "ScBM" "BSM" "BaSM" "SBaM" "ScBaM" "BScM" "BaScM"
[9] "SMB" "ScMB" "MSB" "MaSB" "SMaB" "ScMaB" "MScB" "MaScB"
[17] "BMS" "BaMS" "MBS" "MaBS" "BMaS" "BaMaS" "MBaS" "MaBaS"
[25] "SBMa" "ScBMa" "BSMa" "BaSMa" "SBaMa" "ScBaMa" "BScMa" "BaScMa"
[33] "SMBa" "ScMBa" "MSBa" "MaSBa" "SMaBa" "ScMaBa" "MScBa" "MaScBa"
[41] "BMSc" "BaMSc" "MBSc" "MaBSc" "BMaSc" "BaMaSc" "MBaSc" "MaBaSc"