I have a string template that looks like this A[B[C,D[E,F,G[A,B[C,D[E,F[G,H[M,N[O,P]]]]]]]],MMMMM]
. Each set of [X,Y,Z]
means either X, or Y, or Z, so A[B[C,D],E]
would expand to ABC
, ABD
, AE
. I'm trying to generate a list of all possible strings that match such a template.
I've tried the resursive approach like this:
const x = 'A[B[C,D[E,F,G[A,B[C,D[E,F[G,H[M,N[O,P]]]]]]]],MMMMM]';
function expand(template) {
const matches = template.match(/^(.*)\[([^\[\]] )\](.*)$/);
if (! matches) return template;
const expanded = matches[2].split(',').map(x => `${matches[1]}${x}${matches[3]}`);
return expanded.flatMap(option => expand(option));
}
console.log(expand(x));
But my resulting array is 768 elements long, where only 11 elements are unique. For a small template string this is fine - I can remove duplicates. But on a template of 500 characters it doesn't ever stop running.
How can I accomplish this?
CodePudding user response:
Most of this code is a straight-forward top-down recursive descent parser for templates. It suffers from a bit of code repetition, which I might fix at some point, having to do with the similarity between the top-level string and segments within bracketed expressions (what's called a segment is the part between commas).
It doesn't build an AST, which is possibly another weakness, but it seemed a bit overkill to build a tree; it just constructs lists of alternatives as it goes. It puts together the alternatives from a segment by computing the Cartesian product of the concatenated segments, and then it accumulates the segments in a bracketed list in order to return the complete list of alternatives to the caller.
function cartesianProduct(alts1, alts2) {
rv = []
for (const first of alts1)
for (const second of alts2)
rv.push(first second)
return rv
}
// This is really just a special case of cartesianProduct,
// so it's totally unnecessary.
function appendSuffix(alts, suffix) {
for (let idx = 0; idx < alts.length; idx)
alts[idx] = suffix
}
function expandTemplate(tmpl) {
// The 'y' flag effectively makes a JS regular expression
// into a tokenizer; the regexp itself keeps track of the
// scan cursor, but it's up to the caller to ensure that the
// same scanned string is supplied for every call. It's a bit
// fragile, but it's very effective for simple cases.
// Each token alternative has its own capture; in this case,
// all of the metacharacters (comma and brackets) are in the
// second capture as single characters, while the uninterpreted
// strings are returned in the first capture. That makes it
// fairly easy to tell what the token type is.
const rgx = /([^\[\],] )|(.)/y
// Internal function parses a single bracketed expression, and
// returns all the alternatives defined (working recursively).
// It's invoked when a [ is encountered, and it continues until
// it finds the matching ], which it discards.
function helper() {
let rv = []
let segment = ['']
let match = null;
while (match = rgx.exec(tmpl)) {
if (match[1])
appendSuffix(segment, match[1])
else {
switch (match[2]) {
case '[':
segment = cartesianProduct(segment, helper())
break
case ',':
rv.push(segment)
segment = ['']
break
case ']':
rv.push(segment)
return [].concat(...rv)
}
}
}
throw 'Unexpected end of template; unmatched "["'
}
// The main function starts here. It's essentially the
// same as the helper, except that it only parses a single
// segment and expects to return when it reaches the end of
// the input.
let segment = ['']
let match = null;
while (match = rgx.exec(tmpl)) {
if (match[1])
appendSuffix(segment, match[1])
else {
switch (match[2]) {
case '[':
segment = cartesianProduct(segment, helper())
break
case ',':
throw '"," used in top-level template'
case ']':
throw 'Unexpected "]"'
}
}
}
return segment
}
const x = 'A[B[C,D[E,F,G[A,B[C,D[E,F[G,H[M,N[O,P]]]]]]]],MMMMM]';
console.log(expandTemplate(x));
.as-console-wrapper { max-height: 100% !important; top: 0; }
CodePudding user response:
This one is really hacky but it just might and doesn't work. The idea is doing simple search and replace to make the input look like a json. Then parse it and print the paths of the leaves of that tree.
const x = 'A[B[C,D[E,F,G[A,B[C,D[E,F[G,H[M,N[O,P]]]]]]]],MMMMM]';
var str = x;
str = str.replace(/\[/g, ": {")
str = str.replace(/\]/g, "}")
str = str.replace(/,/g, ": null, ")
str = str.replace(/\}: null,/g, "}, ")
str = str.replace(/([a-zA-Z])\}/g, "$1: null}")
str = str.replace(/([a-zA-Z])$/g, "$1: null")
str = str.replace(/null/g, "{}")
// console.log(str)
var tree
eval("tree = {" str "}")
//console.log(tree)
console.log(getPath(tree))
// from https://stackoverflow.com/a/44759609/3807365
function getPath(object) {
function iter(o, p) {
var keys = Object.keys(o);
if (keys.length) {
return keys.forEach(function(k) {
iter(o[k], p k);
});
}
result.push(p);
}
var result = [];
iter(object, []);
return result;
}
.as-console-wrapper {
max-height: 100% !important;
top: 0;
}