Home > Back-end >  Convert a string with a math formula to an object tree?
Convert a string with a math formula to an object tree?

Time:01-01

I am looking for a function that converts a math string passed as an argument (with the operations , -, /, *), that returns an object that contains pieces of the math string, in an ordered/better way, which is easier to traverse.

Characteristics of the input:

  • is a string containing a formula
  • the formula doesn't have =, so it isn't an equation
  • there isn't any floating number, just integers
  • the integers can be positive or negative
  • no variables like x, y, z
  • can include parentheses

Test cases

Example 1: Basic Math (same operation)

N Input (string) Output (object)
1 1 { values: [1], operation: null }
2 1 1 { values: [1,1], operation: " " }
3 1 2 3 { values: [1,2,3], operation: " " }
4 3-2-1 { values: [3,2,1], operation: "-" }
5 10*80 { values: [10,80], operation: "*" }
6 100/10 { values: [100,10], operation: "/" }

Example 2: Formula with 2 operations

➡️ and - samples

N1

input: 1 1-1

output:

{
    values: [
      {
        values: [1, 1],
        operation: " ",
      },
      1,
    ],
    operation: "-",
};

N2

input: 3 2-1 5

output:

{
    values: [
      {
        values: [
          {
            values: [3, 2],
            operation: " ",
          },
          1,
        ],
        operation: "-",
      },
      5,
    ],
    operation: " ",
};

N3

input: 3 2-1 5 10 7

output:

{
    values: [
      {
        values: [
          {
            values: [3, 2],
            operation: " ",
          },
          1,
        ],
        operation: "-",
      },
      5,
      10,
      7
    ],
    operation: " ",
};

➡️ and / samples

N4

input: 1 2/3

output:

{
    values: [
      1,
      {
        values: [2, 3],
        operation: "/",
      },
    ],
    operation: " ",
};

N5

input: 2/3 1

output:

{
    values: [
      {
        values: [2, 3],
        operation: "/",
      },
      1,
    ],
    operation: " ",
};

N6

input: 1/2 3/4 5/6

output:

{
    values: [
      {
        values: [1, 2],
        operation: "/",
      },
      {
        values: [3, 4],
        operation: "/",
      },
      {
        values: [5, 6],
        operation: "/",
      },
    ],
    operation: " ",
};

N7

input: 1/2/3/4/5 6 7 8/9 10/11

output:

{
    values: [
      {
        values: [1, 2, 3, 4, 5],
        operation: "/",
      },
      6,
      7,
      {
        values: [8, 9],
        operation: "/",
      },
      {
        values: [10, 11],
        operation: "/",
      },
    ],
    operation: " ",
};

➡️ / and - samples

N8

input: 1-2/3

output:

{
    values: [
      1,
      {
        values: [2, 3],
        operation: "/",
      },
    ],
    operation: "-",
};

➡️ / and * samples

N9

input: 10/2*5

output:

{
    values: [
      {
        values: [10, 2],
        operation: "/",
      },
      5,
    ],
    operation: "*",
};

Example 3: Formula with 4 operations

N1

input: 10/2*5 1-1*5/3 2*4

output:

{
    values: [
      {
        values: [
          {
            values: [
              {
                values: [
                  {
                    values: [10, 2],
                    operation: "/",
                  },
                  5,
                ],
                operation: "*",
              },
              1,
            ],
            operation: " ",
          },
          {
            values: [
              {
                values: [1, 5],
                operation: "*",
              },
              3,
            ],
            operation: "/",
          },
        ],
        operation: "-",
      },
      {
        values: [2, 4],
        operation: "*",
      },
    ],
    operation: " ",
};

Example 4: Formula with () parenthesis

N1

input: 1 2*(3 2)

output:

{
    values: [
      1,
      {
        values: [
          2,
          {
            values: [3, 2],
            operation: " ",
          },
        ],
        operation: "*",
      },
    ],
    operation: " ",
};

N2

input: (1 2*3)*2

output:

{
    values: [
      {
        values: [
          1,
          {
            values: [2, 3],
            operation: "*",
          },
        ],
        operation: " ",
      },
      2,
    ],
    operation: "*",
};

N3

input: (1/1/10) 1/30 1/50

output:

{
    values: [
      {
        values: [1, 1, 10],
        operation: "/",
      },
      {
        values: [1, 30],
        operation: "/",
      },
      {
        values: [1, 50],
        operation: "/",
      },
    ],
    operation: " ",
  };

Other Cases

N1

input: -(1 2)

output:

{
    values: [
      {
        values: [1, 2],
        operation: " ",
      },
    ],
    operation: "-",
};

...

CodePudding user response:

Here's a function that seems to pass all of your test cases.

Somehow I've written a lot of expression parsers, and I eventually settled on doing it this way in pretty much all cases. This is a recursive descent parser that is just about as simple as a fully-featured expression parser can be.

The secret is the parseTokens function's minPrec parameter, which tells it to parse until it encounters an operator with lower precedence. That makes it easy for the function to call itself recursively at each precedence level.

/**
 * Parse an expression into the required tree
 * @param str {string}
 */
function parseExpression(str) {
    // break string into tokens, in reverse order because pop() is faster than shift()
    const tokens = str.match(/[.0-9Ee] |[^\s]/g).reverse();
    const tree = parseTokens(tokens, 0);
    if (tokens.length) {
        throw new Error(`Unexpected ${tokens.pop()} after expression`);
    }
    return tree;
}

const BINARY_PRECEDENCE = {
    ' ': 0,
    '-': 0,
    '*': 1,
    '/': 1,
}

const UNARY_PRECEDENCE = {
    ' ': 10,
    '-': 10,
}

/**
 * Given an array of tokens in reverse order, return binary expression tree
 * 
 * @param tokens {string[]} tokens
 * @param minPrec {number} stop at operators with precedence smaller than this
 */
function parseTokens(tokens, minPrec) {
    if (!tokens.length) {
        throw new Error('Unexpected end of expression');
    }

    // get the left operand
    let leftToken = tokens.pop();
    let leftVal;
    if (leftToken === '(') {
        leftVal = parseTokens(tokens, 0);
        if (tokens.pop() !== ')') {
            throw new Error('Unmatched (');
        }
    } else if (UNARY_PRECEDENCE[leftToken] != undefined) {
        const operand = parseTokens(tokens, UNARY_PRECEDENCE[leftToken]);
        if (typeof operand === 'number' && leftToken === '-') {
            leftVal = -operand;
        } else if (typeof operand === 'number' && leftToken === ' ') {
            leftVal = operand;
        } else {
            leftVal = {
                operation: leftToken,
                values: [operand]
            }
        }
    } else {
        leftVal = Number(leftToken);
        if (isNaN(leftVal)) {
            throw new Error(`invalid number ${leftToken}`);
        }
    }

    // Parse binary operators until we hit the end or a stop
    while (tokens.length) {
        // end of expression
        const opToken = tokens.pop();
        const opPrec = BINARY_PRECEDENCE[opToken];
        if (opToken === ')' || (opPrec != undefined && opPrec < minPrec)) {
            // We have to stop here.  put the token back and return
            tokens.push(opToken);
            return leftVal;
        }
        if (opPrec == undefined) {
            throw new Error(`invalid operator ${opToken}`)
        }

        // we have a binary operator.  Get the right operand
        const operand = parseTokens(tokens, opPrec   1);
        if (typeof leftVal === 'object' && leftVal.operation === opToken) {
            // Extend the existing operation
            leftVal.values.push(operand);
        } else {
            // Need a new operation
            leftVal = {
                values: [leftVal, operand],
                operation: opToken
            }
        }
    }
    return leftVal;
}

CodePudding user response:

I will post my solution, which is based on this answer to a similar question.

That solution is based on a regular expression which tokenizes the input with named capture groups. Consequently the rest of the code does not have to deal with character comparisons and can focus on the grammar rules. The implementation shows no explicit notion of precedence rules, as they are implied by the grammar.

I repeat here the grammar rules, but see also the explanation in the above referenced answer:

<literal>    ::= [ '-' ] <digit> { <digit> } [ '.' { <digit> } ] ; no white space allowed
<operator2>  ::= '*' | '/'
<operator1>  ::= ' ' | '-'
<factor>     ::= '-' <factor> | '(' <expression> ')' | <literal>
<term>       ::= [ <term> <operator2> ] <factor>
<expression> ::= [ <expression> <operator1> ] <term>

Here I extend that solution with the following specifics:

  • It produces the tree node format instead of the nested array
  • It keeps nodes flat when multiple operands can be combined with the same binary operator
  • It resolves double negations: For example, -(-(1 2)) will produce the same tree as 1 2.

You can try different inputs in the snippet below.

function parse(s) {
    // Create a closure for the two variables needed to iterate the input:
    const get = ((tokens, match=tokens.next().value) =>
            // get: return current token when it is of the required group, and move forward, 
            //      else if it was mandatory, throw an error, otherwise return undefined
            (group, mandatory) => {
                if (match?.groups[group] !== undefined) 
                    return [match?.groups[group], match = tokens.next().value][0];
                if (mandatory) 
                    throw `${s}\n${' '.repeat(match?.index ?? s.length)}^ Expected ${group}`;
            }
        )(  // Get iterator that matches tokens with named capture groups.
            s.matchAll(/(?<number>\d (?:\.\d*)?)|(?<open>\()|(?<close>\))|(?<add>\ |(?<unary>-))|(?<mul>[*\/])|(?<end>$)|\S/g)
        ),
        // negationRule: Removes double negation; toggles sign when number is negated
        negationRule = (a, b=a.values?.[0]) =>
            a.values?.length   (b?.values?.length ?? 1) == 2 ? (b.values?.[0] ?? -b) : a,
        // associativityRule: Flattens operation when first operand has same binary operation
        associativityRule = (a, b=a.values?.[0]) =>
            a.operation === b?.operation && a.values?.length > 1 && b.values.length > 1
                ? {...a, values: [...b.values, ...a.values.slice(1)] } : a,
        // node: Creates a tree node from given operation, applying simplification rules
        node = (operation, ...values) => 
            negationRule(associativityRule({ values, operation })),
        // Grammar rules implementation, using names of regex capture groups, returning nodes
        factor = (op=get("unary")) => 
            op ? node(op, factor()) : get("open") ? expr("close") :  get("number", 1),
        term = (arg=factor(), op=get("mul")) => 
            op ? term(node(op, arg, factor())) : arg,
        expr = (end, arg=term(), op=get("add")) => 
            op ? expr(end, node(op, arg, term())) : (get(end, 1), arg);
    return expr("end");
}


// I/O Management

const [input, select, output] = document.querySelectorAll("input, select, pre");
(input.oninput = () => {
    if (input.value != select.value) select.value = "";
    try {
        output.textContent =  JSON.stringify(parse(input.value), null, 2)
            // put integer-arrays on one line:
            .replace(/\[[^{\]] \]/g, m => m.replace(/\s /g, "").replace(/,/g, ", "));
    } catch(err) {
        output.textContent = err;
    }
})();
select.onchange = () => (input.value = select.value, input.oninput());
input { width: 30em; margin-bottom: 10px; }
Type a math expression or choose one from drop down:<br>
<input value="1 / (1/10   1/30   1/50)">
<select>
    <option>
    <option>1
    <option>1 1
    <option>1 2 3
    <option>3-2-1
    <option>10*80
    <option>100/10
    <option>1 1-1
    <option>3 2-1 5
    <option>3 2-1 5 10 7
    <option>1 2/3
    <option>2/3 1
    <option>1/2 3/4 5/6
    <option>1/2/3/4/5 6 7 8/9 10/11
    <option>1-2/3
    <option>10/2*5
    <option>10/2*5 1-1*5/3 2*4
    <option>1 2*(3 2)
    <option>(1 2*3)*2
    <option>(1/1/10) 1/30 1/50
    <option>-(1 2)
    <option>1/-(-(1 2))
</select>
<pre></pre>

  • Related