Home > other >  How to write a parser to construct the JavaScript AST MemberExpression for the expression a.b[c.d][e
How to write a parser to construct the JavaScript AST MemberExpression for the expression a.b[c.d][e

Time:11-24

Following along the lines of How to clearly represent a.b[c.d][e].f[g[h[i.j]]] as an object tree?, how would you write an algorithm to generate that JS AST from the expression a.b[c.d][e].f[g[h[i.j]]]? I am trying to write a parser to generate some sort of object structure from this expression (ideally more intuitive than the JS AST MemberExpression one, hence that other question). I would like to see how the algorithm works to construct the JavaScript MemberExpression tree.

Currently I have this sort of algorithm to generate some sort of tree (but it seems to be incorrect currently):

const patterns = [
  [/^[a-z][a-z0-9]*(?:-[a-z0-9] )*/, 'name'],
  [/^\[/, 'open'],
  [/^\]/, 'close'],
  [/^\./, 'stem']
]

console.log(parsePath('a.b[c.d][e].f[g[h[i.j]]]'))

function parsePath(str) {
  let node
  let nest = []
  let result = nest
  let stack = [nest]
  while (str.length) {
    nest = stack[stack.length - 1]
    p:
    for (let pattern of patterns) {
      let match = str.match(pattern[0])
      if (match) {
        if (pattern[1] === 'name') {
          node = {
            form: `term`,
            name: match[0],
            link: []
          }
          nest.push(node)
        } else if (pattern[1] === 'stem') {
          stack.push(node.link)
        } else if (pattern[1] === 'open') {
          node = {
            form: 'read',
            link: []
          }
          nest.push(node)
          stack.push(node.link)
        } else if (pattern[1] === 'close') {
          stack.pop()
        }

        str = str.substr(match[0].length)
        break p
      }
    }
  }
  return result[0]
}
<iframe name="sif1" sandbox="allow-forms allow-modals allow-scripts" frameborder="0"></iframe>

The desired result is this (or a better, more intuitive data structure if you are so inclined to create one):

{
  "type": "MemberExpression",
  "object": {
    "type": "MemberExpression",
    "object": {
      "type": "MemberExpression",
      "object": {
        "type": "MemberExpression",
        "object": {
          "type": "MemberExpression",
          "object": {
            "type": "Identifier",
            "name": "a"
          },
          "property": {
            "type": "Identifier",
            "name": "b"
          },
          "computed": false
        },
        "property": {
          "type": "MemberExpression",
          "object": {
            "type": "Identifier",
            "name": "c"
          },
          "property": {
            "type": "Identifier",
            "name": "d"
          },
          "computed": false
        },
        "computed": true
      },
      "property": {
        "type": "Identifier",
        "name": "e"
      },
      "computed": true
    },
    "property": {
      "type": "Identifier",
      "name": "f"
    },
    "computed": false
  },
  "property": {
    "type": "MemberExpression",
    "object": {
      "type": "Identifier",
      "name": "g"
    },
    "property": {
      "type": "MemberExpression",
      "object": {
        "type": "Identifier",
        "name": "h"
      },
      "property": {
        "type": "MemberExpression",
        "object": {
          "type": "Identifier",
          "name": "i"
        },
        "property": {
          "type": "Identifier",
          "name": "j"
        },
        "computed": false
      },
      "computed": true
    },
    "computed": true
  },
  "computed": true
}

The reason I'm struggling (partially) is I don't like this MemberExpression tree structure, it's backward feeling and not very intuitive. So if you could construct a simpler more straightforward data structure that would be ideal (that was the other question), but if not then just an algorithm to construct this would get me going.

Personally, I would rather try to generate this structure, as I find it more intuitive:

{
  type: 'site',
  site: [
    {
      type: 'term',
      term: 'a'
    },
    {
      type: 'term',
      term: 'b'
    },
    {
      type: 'sink',
      sink: [
        {
          type: 'term',
          term: 'c'
        },
        {
          type: 'term',
          term: 'd'
        }
      ]
    },
    {
      type: 'sink',
      sink: [
        {
          type: 'term',
          term: 'e'
        }
      ]
    },
    {
      type: 'term',
      term: 'f'
    },
    {
      type: 'sink',
      sink: [
        {
          type: 'term',
          term: 'g'
        },
        {
          type: 'sink',
          sink: [
            {
              type: 'term',
              term: 'h'
            },
            {
              type: 'sink',
              sink: [
                {
                  type: 'term',
                  term: 'i'
                },
                {
                  type: 'term',
                  term: 'j'
                }
              ]
            }
          ]
        }
      ]
    }
  ]
}

But either one works for me (or both).

If we go with the second one, my next problem will be how to convert that data structure into the MemberExpression tree/data structure :) But I'll try and do that myself first. So it's probably better to construct the MemberExpression in this question, then I can work off that.

CodePudding user response:

  1. Separate the string into groups of object and properties of the first level, like

    [
        "a",
        "b",
        "[c.d]",
        "[e]",
        "f",
        "[g[h[i.j]]]"
    ]
    
  2. Get the object

    1. Take the last item as property.
    2. Check if the property start with bracket then set computed to true and strip the property from the surrounding brackets.
    3. Return an object with
      • type: "MemberExpression"
      • object with the object (2.)
      • property with the result of calling the main function getAST (1.)

function getAST(string) {

    function getObject(parts) {
        if (parts.length === 1) return { type: "Identifier", name: parts[0] };

        let property = parts.pop(),
            computed = false;

        if (property.startsWith('[')) {
            computed = true;
            property = property.slice(1, -1);
        }

        return {
            type: "MemberExpression",
            object: getObject(parts),
            property: getAST(property),
            computed
        };
    }

    let i = 0,
        dot,
        bracket,
        parts = [];

    while (i < string.length) {
        dot = string.indexOf('.', i);
        bracket = string.indexOf('[', i);

        if (dot !== -1 && (bracket === -1 || dot < bracket)) {
            const temp = string.slice(i, dot);
            if (temp) parts.push(temp);
            i = dot   1;
            continue;
        }

        if (bracket !== -1 && (dot === -1 || bracket < dot)) {
            const temp = string.slice(i, bracket);
            if (temp) parts.push(temp);
            i = bracket;

            let open = 1,
                j = i;

            while (  j < string.length) {
                if (string[j] === '[') open  ;
                if (string[j] === ']') open--;
                if (!open) break;
            }

            j  ;
            parts.push(string.slice(i, j));

            i = j;
            continue;
        }
        parts.push(string.slice(i));
        break;
    }

    return getObject(parts);
}

console.log(getAST('a.b[c.d][e].f[g[h[i.j]]]'));
.as-console-wrapper { max-height: 100% !important; top: 0; }
<iframe name="sif2" sandbox="allow-forms allow-modals allow-scripts" frameborder="0"></iframe>

  • Related