Earlier I needed to write an algorithm for my work, and I wrote it,but it was pretty hard for me, and algorithm is obviously not in the best realization, so I would to ask professionals how it can be upgraded, where I had issues, some tricks.
Given
Pattern, that describes strings variations: abc[de[f,g],hk]
, which gives
abcdef
abcdeg
abchk
Pattern consists of "arrays", that followed by strings: abc[...]
, and strings adj,kg,q
Another possible more complex example: utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]
.
Conditions
- Strings itself can contain only letters and numbers. There couldn't be
abc[h\,k,b]
orabc[h\[k,b]
that givesabch,k
orabch[k
. - "Arrays" always not empty, and has at least 2 elements.
- There can be any order of "array", or "only string" value, i.e.:
abc[a,b[c,d]]
orabc[a[b,c],d]
. The order is strict from left to right, there can not be from patternabc[d,e]
combinationseabc
ordabc
. abc[d,e]
doesn't givesabcde
norabced
string, onlyabcd
andabce
.- Pattern always starts with string with array:
something[...]
. - There can be string without array:
abc[a,bc[d,f]]
, but array without string is not allowed:abc[a,[d,f]]
. - There can be an empty string, i.e.:
a[,b]
, that givesa
andab
My solution
function getStrings(pat) {
if(pat.indexOf('[') == -1)
return pat;
String.prototype.insert = function(index, string) {
if (index > 0) {
return this.substring(0, index) string this.substr(index);
}
return string this;
};
function getArray(str, start, isSource = false) {
if (start < 0) return null;
var n = 0;
var ret = "";
var i = start;
for (; i < str.length; i ) {
if (str[i] == "[") n ;
else if (str[i] == "]") n--;
if (n == 0) break;
}
var ret = {
str: "",
arr: "",
end: 0,
};
ret.arr = str.slice(start, i) "]";
ret.end = i;
start--;
var end = start;
for (
;
start > 0 &&
str[start] != "," &&
str[start] != "]" &&
str[start] != "[";
start--
) {}
if(!isSource)
start ;
end ;
ret.str = str.slice(start, end);
return ret;
}
function getElement(source, start) {
var ret = [];
start ;
for (
;
start < source.length && source[start] != "," && source[start] != "]";
start
)
ret[ret.length] = source[start];
return ret;
}
var source = getArray(pat, pat.indexOf("["), true); // parsing
var ar = source.arr;
source.arrs = getArrays(source); // parsing
source.source = true;
var fi = "";
var temp_out = [];
var tol = 0;
return getVariations(source); // getting variations of parsed
function getVariations(source) {
if (source.arrs == undefined) {
} else
for (var i = 0; i < source.arrs.length; i ) {
if (source.source) fi = source.str;
if (!source.arrs[i].arrs) {
temp_out[tol] = fi source.arrs[i].str;
tol ;
} else {
var lafi = fi;
fi = source.arrs[i].str;
getVariations(source.arrs[i]);
if(i != source.arrs.length - 1)
fi = lafi;
}
if (source.source && i == source.arrs.length - 1) {
var temp = temp_out;
temp_out = [];
tol = 0;
return temp;
}
}
}
function getArrays(source) {
var la = 1;
var start = 0;
var arrs = [];
if (!source.arr) return;
while (start != -1) {
start = source.arr.indexOf("[", la);
var qstart = source.arr.indexOf(",", la);
if(source.arr[la] == ',')
qstart = source.arr.indexOf(",", la 1);
var pu = false;
if(qstart != la && qstart != -1 && qstart < start && start != -1)
{
pu = true;
var str = source.arr;
var buf = [];
qstart--;
var i = -1;
for(i = qstart; i > 0 && str[i] != '[' && str[i] != ','; i--)
{}
i ;
for(; i < str.length && str[i]!= ','; i )
{
buf[buf.length] = str[i];
}
if(buf.length == 0)
{
la = start;
alert("1!")
}
else
{
buf = buf.join('');
arrs[arrs.length] = {str:buf};
la = buf.length 1;
}
}
else
if (start != -1) {
arrs[arrs.length] = getArray(source.arr, start);
la = arrs[arrs.length - 1].end 1;
} else {
start = source.arr.indexOf(",", la);
if (start != -1) {
var ret = getElement(source.arr, start);
arrs[arrs.length] = ret;
la = ret.length;
}
}
}
for (var i = 0; i < arrs.length; i )
if (typeof arrs[i] != "string" && arrs[i].arr) {
arrs[i].arrs = getArrays(arrs[i]);
var st = arrs[i].arr;
if (occ(arrs[i].arr, "[") == 1 && occ(arrs[i].arr, "]") == 1) {
st = st.replaceAll("[", '["');
st = st.replaceAll("]", '"]');
st = st.replaceAll(",", '","');
st = JSON.parse(st);
for (var j = 0; j < st.length; j ) st[j] = { str: st[j] };
arrs[i].arrs = st;
}
} else if (typeof arrs[i] == "string") {
arrs[i] = { str: arrs[i] };
}
RecursArrs(arrs);
return arrs;
}
function RecursArrs(arrs) {
for (var i = 0; i < arrs.length; i ) {
if (!arrs[i].source)
if (arrs[i].arr) {
delete arrs[i].arr;
delete arrs[i].end;
}
if (!arrs[i].str) {
try{
arrs[i] = { str: arrs[i].join("") };
}catch(er)
{
arrs[i] = {str:''};
}
if (i && arrs[i - 1].str == arrs[i].str) {
arrs.splice(i, 1);
i--;
}
} else if (arrs[i].arrs) RecursArrs(arrs[i].arrs);
}
}
function occ(string, word) {
return string.split(word).length - 1;
}
}
// getStrings('IE5E[COR[R[,G[A,E,I]],S,T,U,V,W,X,Y,Z],EORRG[I,M]]')
CodePudding user response:
I would use a regular expression to break up the input into tokens. In this case I chose to take pairs of (letters, delimiter), where the delimiter is one of "["
, "]"
, ","
. The letters part could be empty.
Then I would use a recursive function like you did, but I went for a recursive generator function.
Here is the suggested implementation:
function* getStrings(pattern) {
const tokens = pattern.matchAll(/([^[\],]*)([[\],])?/g);
function* dfs(recur=false) {
let expectToken = true;
while (true) {
const [, token, delim] = tokens.next().value;
if (delim === "[") {
for (const deep of dfs(true)) yield token deep;
} else {
if (token || expectToken) yield token;
if (delim === "]" && !recur) throw "Invalid pattern: too many `]`";
if (!delim && recur) throw "Invalid pattern: missing `]`";
if (delim !== ",") return;
}
expectToken = delim !== "["; // After [...] we don't expect a letter
}
}
yield* dfs();
}
const input = 'IE5E[COR[R[,G[A,E,I]],S,T,U,V,W,X,Y,Z],EORRG[I,M]]';
for (const s of getStrings(input))
console.log(s);
This implementation should match the patterns according to the given restrictions, but it will also allow the following:
- An "array" can start without a prefix of letters. So
[a,b]
is allowed and will produce the same output asa,b
. - An "array" may be followed immediately by letters or a new "array", but this will be interpreted as if they were separated by a comma. So
x[a,b]c
will be interpreted asx[a,b],c
- An "array" can be empty. In that case the array is ignored. So
x[]
is the same asx
.
There is some basic error checking: an error will be generated when the brackets are not balanced.
CodePudding user response:
We can do this in an inside-out fashion. If we replace the innermost group (e.g. 'de[fg]'
with its expansion, 'def,deg'
, and recur until there are no more groups remaining, we will have created a comma-separated list of final strings, which we can simply split apart and return.
const _expand = (
s,
match = s .match (/(.*?)(\w*)\[([^\[\]] )\](.*)/),
[_, a, b, c, d] = match || []
) => match ? _expand (a c .split (',') .map (x => b x) .join (',') d) : s
const expand = (s) => _expand (s) .split (',')
console .log (expand ('abc[de[f,g],hk]'))
console .log (expand ('utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]'))
.as-console-wrapper {max-height: 100% !important; top: 0}
Our main recursive function -- _expand
-- uses a regular expression that extracts the first group, and breaks it into constituent parts, and puts it back together by mapping over the parts of the array. Then our public function, expand
simply calls the recursive one and splits the result into an array.
For example, this is how the recursive calls would be handled for the string, 'utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]'
:
'utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]' //-->
// ^^^^^^^^^
'utvk[fvu,gnu,gnk,gnr,nl,q[t[ij,lo[z,x]],bm]]' //-->
// ^^^^^^^
'utvk[fvu,gnu,gnk,gnr,nl,q[t[ij,loz,lox],bm]]' //-->
// ^^^^^^^^^^^^^^^^^
'utvk[fvu,gnu,gnk,gnr,nl,q[tij,tloz,tlox,bm]]' //-->
// ^^^^^^^^^^^^^^^^^^^
'utvk[fvu,gnu,gnk,gnr,nl,qtij,qtloz,qtlox,qbm]' //-->
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
'utvkfvu,utvkgnu,utvkgnk,utvkgnr,utvknl,utvkqtij,utvkqtloz,utvkqtlox,utvkqbm'
Update: Regex explanation:
The regex used here can be broken down into six sections:
(.*?)
: captures (non-greedy) an initial set of characters, stored asa
(\w*)
: captures our letters before an opening brace, stored asb
\[
: captures an opening brace ([
)([^\[\]] )
: captures everything but braces ([
or]
), stored asc
\]
: captures a closing brace (]
)(.*)
: captures everything after the closing brace, stored asd
The point is for the group inside the braces to include no other braces. An example might look like this:
utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]
`---- ---'\/|`- -'|`---------- -----------'
\ | \ \ \__ \
| \ \_ \__ \____ \
a: (.*?) \_ \_ \ \ \
~~~~~ | \ \__ \ \
b: (\w*) | \ \ \
~~~~~ | \ \ \
[: \[ | \ \
~~ | \ \
c: ([^\[\]] ) \ \
~~~~~~~~~~ | |
]: \] |
~~ |
d: (.*)
~~~~