Improving combinations from abc[d[e,f],gh] pattern algorithm-CodePudding

Earlier I needed to write an algorithm for my work, and I wrote it,but it was pretty hard for me, and algorithm is obviously not in the best realization, so I would to ask professionals how it can be upgraded, where I had issues, some tricks.

Given

Pattern, that describes strings variations: abc[de[f,g],hk], which gives

abcdef
abcdeg
abchk

Pattern consists of "arrays", that followed by strings: abc[...], and strings adj,kg,q

Another possible more complex example: utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]].

Conditions

Strings itself can contain only letters and numbers. There couldn't be abc[h\,k,b] or abc[h\[k,b] that gives abch,k or abch[k.
"Arrays" always not empty, and has at least 2 elements.
There can be any order of "array", or "only string" value, i.e.: abc[a,b[c,d]] or abc[a[b,c],d]. The order is strict from left to right, there can not be from pattern abc[d,e] combinations eabc or dabc.
abc[d,e] doesn't gives abcde nor abced string, only abcd and abce.
Pattern always starts with string with array: something[...].
There can be string without array: abc[a,bc[d,f]], but array without string is not allowed: abc[a,[d,f]].
There can be an empty string, i.e.: a[,b], that gives a and ab

My solution

function getStrings(pat) {
    if(pat.indexOf('[') == -1)
    return pat;

        String.prototype.insert = function(index, string) {
        if (index > 0) {
            return this.substring(0, index)   string   this.substr(index);
        }
    
        return string   this;
        };
    
        function getArray(str, start, isSource = false) {
        if (start < 0) return null;
    
        var n = 0;
        var ret = "";
        var i = start;
    
        for (; i < str.length; i  ) {
            if (str[i] == "[") n  ;
            else if (str[i] == "]") n--;
    
            if (n == 0) break;
        }
    
        var ret = {
            str: "",
            arr: "",
            end: 0,
        };
        ret.arr = str.slice(start, i)   "]";
        ret.end = i;
    
        start--;
        var end = start;
        for (
            ;
            start > 0 &&
            str[start] != "," &&
            str[start] != "]" &&
            str[start] != "[";
            start--
        ) {}
    
        if(!isSource)
        start  ;
        end  ;
    
        ret.str = str.slice(start, end);
    
        return ret;
        }
    
        function getElement(source, start) {
        var ret = [];
        start  ;
    
        for (
            ;
            start < source.length && source[start] != "," && source[start] != "]";
            start  
        )
            ret[ret.length] = source[start];
    
        return ret;
        }
    
        var source = getArray(pat, pat.indexOf("["), true); // parsing
    
        var ar = source.arr;
    
        source.arrs = getArrays(source); // parsing
        source.source = true;
        
    
        var fi = "";
        var temp_out = [];
        var tol = 0;
    
        return getVariations(source); // getting variations of parsed
    
    
        function getVariations(source) {
        if (source.arrs == undefined) {
        } else
            for (var i = 0; i < source.arrs.length; i  ) {
            if (source.source) fi = source.str;
    
            if (!source.arrs[i].arrs) {
                temp_out[tol] = fi   source.arrs[i].str;
                tol  ;
            } else {
                var lafi = fi;
                fi  = source.arrs[i].str;
    
                getVariations(source.arrs[i]);
                
                if(i != source.arrs.length - 1)
                fi = lafi;
            }
    
            if (source.source && i == source.arrs.length - 1) {
                var temp = temp_out;
                temp_out = [];
                tol = 0;
                return temp;
            }
            }
        }
    
        function getArrays(source) {
        var la = 1;
        var start = 0;
        var arrs = [];
    
        if (!source.arr) return;
    
        while (start != -1) {
            start = source.arr.indexOf("[", la);
            var qstart = source.arr.indexOf(",", la);
            if(source.arr[la] == ',')
            qstart = source.arr.indexOf(",", la 1);
    
            var pu = false;
    
    
            if(qstart != la && qstart != -1 && qstart < start && start != -1)
            {
            pu = true;
            var str = source.arr;
            var buf = [];
            qstart--;
            var i = -1;
    
            for(i = qstart; i > 0 && str[i] != '[' && str[i] != ','; i--)
            {}
            i  ;
    
            for(; i < str.length && str[i]!= ','; i  )
            {
                buf[buf.length] = str[i];
            }
    
            if(buf.length == 0)
            {
                la = start;
                alert("1!")
            }
            else
            {
                
                buf = buf.join('');
                arrs[arrs.length] = {str:buf};
                la  = buf.length 1;
            }
            }
            else
            if (start != -1) {
            arrs[arrs.length] = getArray(source.arr, start);
            la = arrs[arrs.length - 1].end   1;
            } else {
            
            start = source.arr.indexOf(",", la);
            if (start != -1) {
                var ret = getElement(source.arr, start);
                arrs[arrs.length] = ret;
                la  = ret.length;
            }
            }
        }
    
    
        for (var i = 0; i < arrs.length; i  )
            if (typeof arrs[i] != "string" && arrs[i].arr) {
            arrs[i].arrs = getArrays(arrs[i]);
            var st = arrs[i].arr;
    
            if (occ(arrs[i].arr, "[") == 1 && occ(arrs[i].arr, "]") == 1) {
                st = st.replaceAll("[", '["');
                st = st.replaceAll("]", '"]');
                st = st.replaceAll(",", '","');
                st = JSON.parse(st);
    
                for (var j = 0; j < st.length; j  ) st[j] = { str: st[j] };
                arrs[i].arrs = st;
            }
            } else if (typeof arrs[i] == "string") {
            arrs[i] = { str: arrs[i] };
            }
    
        RecursArrs(arrs);
    
        return arrs;
        }
    
        function RecursArrs(arrs) {
        for (var i = 0; i < arrs.length; i  ) {
            if (!arrs[i].source)
            if (arrs[i].arr) {
                delete arrs[i].arr;
                delete arrs[i].end;
            }
            if (!arrs[i].str) {
                try{
            arrs[i] = { str: arrs[i].join("") };
                }catch(er)
                {
                    arrs[i] = {str:''};
                }
            if (i && arrs[i - 1].str == arrs[i].str) {
                arrs.splice(i, 1);
                i--;
            }
            } else if (arrs[i].arrs) RecursArrs(arrs[i].arrs);
        }
        }
    
        function occ(string, word) {
        return string.split(word).length - 1;
        }
    
}

// getStrings('IE5E[COR[R[,G[A,E,I]],S,T,U,V,W,X,Y,Z],EORRG[I,M]]')

CodePudding user response：

I would use a regular expression to break up the input into tokens. In this case I chose to take pairs of (letters, delimiter), where the delimiter is one of "[", "]", ",". The letters part could be empty.

Then I would use a recursive function like you did, but I went for a recursive generator function.

Here is the suggested implementation:

function* getStrings(pattern) {
    const tokens = pattern.matchAll(/([^[\],]*)([[\],])?/g);
    
    function* dfs(recur=false) {
        let expectToken = true;
        while (true) {
            const [, token, delim] = tokens.next().value;
            if (delim === "[") {
                for (const deep of dfs(true)) yield token   deep;
            } else {
                if (token || expectToken) yield token;
                if (delim === "]" && !recur) throw "Invalid pattern: too many `]`";
                if (!delim && recur) throw "Invalid pattern: missing `]`";
                if (delim !== ",") return;
            }
            expectToken = delim !== "["; // After [...] we don't expect a letter
        }
    }
    yield* dfs();
}


const input = 'IE5E[COR[R[,G[A,E,I]],S,T,U,V,W,X,Y,Z],EORRG[I,M]]';
for (const s of getStrings(input))
    console.log(s);

This implementation should match the patterns according to the given restrictions, but it will also allow the following:

An "array" can start without a prefix of letters. So [a,b] is allowed and will produce the same output as a,b.
An "array" may be followed immediately by letters or a new "array", but this will be interpreted as if they were separated by a comma. So x[a,b]c will be interpreted as x[a,b],c
An "array" can be empty. In that case the array is ignored. So x[] is the same as x.

There is some basic error checking: an error will be generated when the brackets are not balanced.

CodePudding user response：

We can do this in an inside-out fashion. If we replace the innermost group (e.g. 'de[fg]' with its expansion, 'def,deg', and recur until there are no more groups remaining, we will have created a comma-separated list of final strings, which we can simply split apart and return.

const _expand = (
  s, 
  match = s .match (/(.*?)(\w*)\[([^\[\]] )\](.*)/), 
  [_, a, b, c, d] = match || []
) => match ? _expand (a   c .split (',') .map (x => b   x) .join (',')   d) : s

const expand = (s) => _expand (s) .split (',')

console .log (expand ('abc[de[f,g],hk]'))
console .log (expand ('utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]'))

.as-console-wrapper {max-height: 100% !important; top: 0}

Our main recursive function -- _expand -- uses a regular expression that extracts the first group, and breaks it into constituent parts, and puts it back together by mapping over the parts of the array. Then our public function, expand simply calls the recursive one and splits the result into an array.

For example, this is how the recursive calls would be handled for the string, 'utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]':

'utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]'    //-->
//        ^^^^^^^^^
'utvk[fvu,gnu,gnk,gnr,nl,q[t[ij,lo[z,x]],bm]]'  //-->
//                              ^^^^^^^  
'utvk[fvu,gnu,gnk,gnr,nl,q[t[ij,loz,lox],bm]]'  //-->
//                         ^^^^^^^^^^^^^^^^^
'utvk[fvu,gnu,gnk,gnr,nl,q[tij,tloz,tlox,bm]]'  //-->
//                       ^^^^^^^^^^^^^^^^^^^
'utvk[fvu,gnu,gnk,gnr,nl,qtij,qtloz,qtlox,qbm]' //-->
//   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
'utvkfvu,utvkgnu,utvkgnk,utvkgnr,utvknl,utvkqtij,utvkqtloz,utvkqtlox,utvkqbm'

Update: Regex explanation:

The regex used here can be broken down into six sections:

(.*?): captures (non-greedy) an initial set of characters, stored as a
(\w*): captures our letters before an opening brace, stored as b
\[: captures an opening brace ([)
([^\[\]] ): captures everything but braces ([ or ]), stored as c
\]: captures a closing brace (])
(.*): captures everything after the closing brace, stored as d

The point is for the group inside the braces to include no other braces. An example might look like this:

    utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]
   `---- ---'\/|`- -'|`---------- -----------'
        \     | \  \  \__         \
         |    \  \_ \__  \____     \
a:     (.*?)   \_  \_  \       \    \
       ~~~~~     |   \  \__     \    \
b:             (\w*)  |    \     \    \
               ~~~~~  |     \     \    \
[:                    \[     |     \    \
                      ~~     |      \    \
c:                      ([^\[\]] )   \    \
                        ~~~~~~~~~~   |     |
]:                                   \]    |
                                     ~~    |
d:                                        (.*)
                                          ~~~~