Home > Back-end >  Javascript - how to generate all possible strings from a template?
Javascript - how to generate all possible strings from a template?

Time:10-04

I have a string template that looks like this A[B[C,D[E,F,G[A,B[C,D[E,F[G,H[M,N[O,P]]]]]]]],MMMMM]. Each set of [X,Y,Z] means either X, or Y, or Z, so A[B[C,D],E] would expand to ABC, ABD, AE. I'm trying to generate a list of all possible strings that match such a template.

I've tried the resursive approach like this:


const x = 'A[B[C,D[E,F,G[A,B[C,D[E,F[G,H[M,N[O,P]]]]]]]],MMMMM]';

function expand(template) {
    const matches = template.match(/^(.*)\[([^\[\]] )\](.*)$/);
    if (! matches) return template;
    const expanded = matches[2].split(',').map(x => `${matches[1]}${x}${matches[3]}`);
    return expanded.flatMap(option => expand(option));
}

console.log(expand(x));

But my resulting array is 768 elements long, where only 11 elements are unique. For a small template string this is fine - I can remove duplicates. But on a template of 500 characters it doesn't ever stop running.

How can I accomplish this?

CodePudding user response:

Most of this code is a straight-forward top-down recursive descent parser for templates. It suffers from a bit of code repetition, which I might fix at some point, having to do with the similarity between the top-level string and segments within bracketed expressions (what's called a segment is the part between commas).

It doesn't build an AST, which is possibly another weakness, but it seemed a bit overkill to build a tree; it just constructs lists of alternatives as it goes. It puts together the alternatives from a segment by computing the Cartesian product of the concatenated segments, and then it accumulates the segments in a bracketed list in order to return the complete list of alternatives to the caller.

function cartesianProduct(alts1, alts2) {
  rv = []
  for (const first of alts1)
    for (const second of alts2)
      rv.push(first   second)
  return rv
}

// This is really just a special case of cartesianProduct,
// so it's totally unnecessary.
function appendSuffix(alts, suffix) {
  for (let idx = 0; idx < alts.length;   idx)
    alts[idx]  = suffix
}

function expandTemplate(tmpl) {
  // The 'y' flag effectively makes a JS regular expression
  // into a tokenizer; the regexp itself keeps track of the
  // scan cursor, but it's up to the caller to ensure that the
  // same scanned string is supplied for every call. It's a bit
  // fragile, but it's very effective for simple cases. 
  //   Each token alternative has its own capture; in this case,
  // all of the metacharacters (comma and brackets) are in the
  // second capture as single characters, while the uninterpreted
  // strings are returned in the first capture. That makes it
  // fairly easy to tell what the token type is.
  const rgx = /([^\[\],] )|(.)/y

  // Internal function parses a single bracketed expression, and
  // returns all the alternatives defined (working recursively).
  // It's invoked when a [ is encountered, and it continues until
  // it finds the matching ], which it discards.
  function helper() {
    let rv = []
    let segment = ['']
    let match = null;
    while (match = rgx.exec(tmpl)) {
      if (match[1])
        appendSuffix(segment, match[1])
      else { 
        switch (match[2]) {
          case '[':
            segment = cartesianProduct(segment, helper())
            break
          case ',':
            rv.push(segment)
            segment = ['']
            break
          case ']':
            rv.push(segment)
            return [].concat(...rv)
        }
      }
    }
    throw 'Unexpected end of template; unmatched "["'
  }
  
  // The main function starts here. It's essentially the
  // same as the helper, except that it only parses a single
  // segment and expects to return when it reaches the end of
  // the input.
  let segment = ['']
  let match = null;
  while (match = rgx.exec(tmpl)) {
    if (match[1])
      appendSuffix(segment, match[1])
    else {
      switch (match[2]) {
        case '[':
          segment = cartesianProduct(segment, helper())
          break
        case ',':
          throw '"," used in top-level template'
        case ']':
          throw 'Unexpected "]"'
      }
    }
  }
  return segment
}


const x = 'A[B[C,D[E,F,G[A,B[C,D[E,F[G,H[M,N[O,P]]]]]]]],MMMMM]';
console.log(expandTemplate(x));
.as-console-wrapper { max-height: 100% !important; top: 0; }

CodePudding user response:

This one is really hacky but it just might and doesn't work. The idea is doing simple search and replace to make the input look like a json. Then parse it and print the paths of the leaves of that tree.

const x = 'A[B[C,D[E,F,G[A,B[C,D[E,F[G,H[M,N[O,P]]]]]]]],MMMMM]';

var str = x;
str = str.replace(/\[/g, ": {")
str = str.replace(/\]/g, "}")
str = str.replace(/,/g, ": null, ")
str = str.replace(/\}: null,/g, "}, ")
str = str.replace(/([a-zA-Z])\}/g, "$1: null}")
str = str.replace(/([a-zA-Z])$/g, "$1: null")
str = str.replace(/null/g, "{}")
// console.log(str)
var tree
eval("tree  = {"   str   "}")
//console.log(tree)
console.log(getPath(tree))

// from https://stackoverflow.com/a/44759609/3807365
function getPath(object) {
  function iter(o, p) {
    var keys = Object.keys(o);
    if (keys.length) {
      return keys.forEach(function(k) {
        iter(o[k], p   k);
      });
    }
    result.push(p);
  }

  var result = [];
  iter(object, []);
  return result;
}
.as-console-wrapper {
  max-height: 100% !important;
  top: 0;
}

  • Related