How to extract the content of also nested parentheses before and after a specific character?-CodePudding

In the following string:

(10 10)*2*((1 1)*1)√(16) (12*12) 2

I am trying replace ((1 1)*1)√(16) with nthroot(16,(1 1)*1).
Specifically, I want to extract everything in the first sets of brackets on each side of the √.
The brackets themselves could contain many layers of brackets and many different symbols.
Language is JavaScript.

I tried a couple things like <str>.replace(/$(. )$√$(. )$/g, 'nthroot($1,$2)')
but every one of my attempts at learning RegEx fails and I can't figure this out.

CodePudding user response：

I don't think you can currently solve this in a general way with a regular expression in Javascript, since you can't match balanced parentheses recursively.

Personally, I'd approach this by splitting the text into its constituent characters, building groups of parentheses, and joining all back together with some logic. For example:

let text = '(10 10)*2*((1 1)*1)√(16) (12*12) 2';
let changedText = '';
let parts = text.split('');
let parCount = null;
let group = '';
let groups = [];

// Group the original text into nested parentheses and other characters.
for (let i = 0; i < parts.length; i  ) {
    // Keep a track of parentheses nesting; if parCount is larger than 0,
    // then there are unclosed parentheses left in the current group.
    if (parts[i] == '(') parCount  ;
    if (parts[i] == ')') parCount--;

    group  = parts[i];

    // Add every group of balanced parens or single characters.
    if (parCount === 0 && group !== '') {
        groups.push(group);
        group = '';
    }
}

// Join groups, while replacing the root character and surrounding groups
// with the nthroot() syntax.
for (let i = 0; i < groups.length; i  ) {
    let isRoot = i < groups.length - 2 && groups[i   1] == '√';
    let hasParGroups = groups[i][0] == '(' && groups[i   2][0] == '(';

    // If the next group is a root symbol surrounded by parenthesized groups,
    // join them using the nthroot() syntax.
    if (isRoot && hasParGroups) {
        let stripped = groups[i   2].replace(/^\(|\)$/g, '');
        changedText  = `nthroot(${stripped}, ${groups[i]})`;
        // Skip groups that belong to root.
        i = i   2;
    } else {
        // Append non-root groups.
        changedText  = groups[i]
    }
}

console.log('Before:', text, '\n', 'After:', changedText);

Not saying it's pretty, though. ;)

CodePudding user response：

As Javascript does not provide the PCRE recursive parameter (?R), the best is you can do is

let s = "(10 10)*2*((1 1)*1)√(16) (12*12) 2";

s.replace(/\(((?:\([^()] \)[\ \-\*/])?\d )\)√\(((?:\([^()] \)[\ \-\*/])?\d )\)/, 'nthroot($2,$1)');
// RETURNS: (10 10)*2*nthroot(16,(1 1)*1) (12*12) 2

In Python it would look much better with (?R)

import regex

s = ""
regex.sub(r"(?<=[\ \-\*\/])\((.*|(?R))\)√\((\d )\)", r"nthroot(\2,\1)", s)
#  RETURNS: (10 10)*2*nthroot(16,(1 1)*1) (12*12) 2

CodePudding user response：

Parsing tasks, like what the OP is asking for, can not be covered by a regular expression alone.

Especially a token's correct parsing for nested parentheses needs a simple and regex free custom parsing process. Even more, as for the OP's use case one needs to parse a correct/valid parenthesized expression each from a left and a right hand-side token (the ones that are/were separated by √).

A possible approach could be based on a single split/reduce task with the collaboration of some specialized helper functions ...

// retrieves the correct parenthesized expression
// by counting parantheses from a token's left side.
function createFirstValidParenthesizedExpression(token) {
  let expression = '';

  if (token[0] === '(') { // if (token.at(0) === '(') {
    expression = '(';

    const charList = token.split('').slice(1);
    let char;

    let idx = -1;
    let balance = 1;

    while (
      (balance !== 0) &&
      ((char = charList[  idx]) !== undefined)
    ) {
      if (char === '(') {
        balance = balance   1;
      } else if (char === ')') {
        balance = balance - 1;
      }
      expression = expression   char;
    }
    if (balance !== 0) {
      expression = '';
    }
  }
  return expression;
}
// retrieves the correct parenthesized expression
// by counting parantheses from a token's right side.
function createFirstValidParenthesizedExpressionFromRight(token) {
  let expression = '';

  if (token.slice(-1) === ')') { // if (token.at(-1) === ')') {
    expression = ')';

    const charList = token.split('').slice(0, -1);
    let char;

    let idx = charList.length;
    let balance = 1;

    while (
      (balance !== 0) &&
      ((char = charList[--idx]) !== undefined)
    ) {
      if (char === ')') {
        balance = balance   1;
      } else if (char === '(') {
        balance = balance - 1;
      }
      expression = char   expression;
    }
    if (balance !== 0) {
      expression = '';
    }
  }
  return expression;
}

// helper which escapes all the possible math related
// characters which are also regex control characters.
function escapeExpressionChars(expression) {
  return expression.replace(/[- *()/]/g, '\\$&');
}

function createNthRootExpression(leftHandToken, rightHandToken) {
  leftHandToken = leftHandToken.trim();
  rightHandToken = rightHandToken.trim();

  // patterns that match partial 'nthroot' expressions
  // which are free of parentheses.
  const regXSimpleLeftHandExpression = /[\d*/] $/;
  const regXSimpleRightHandExpression = /^[\d*/] |^\([^ -]*\)/;

  // retrieve part of the future 'nthroot' expression
  // from the token to the left of '√'.
  const leftHandExpression =
    leftHandToken.match(regXSimpleLeftHandExpression)?.[0] ||
    createFirstValidParenthesizedExpressionFromRight(leftHandToken);

  // retrieve part of the future 'nthroot' expression
  // from the token to the right of '√'.
  const rightHandExpression =
    rightHandToken.match(regXSimpleRightHandExpression)?.[0] ||
    createFirstValidParenthesizedExpression(rightHandToken);

  leftHandToken = leftHandToken
    .replace(
      // remove the terminating match/expression from the token.
      RegExp(escapeExpressionChars(leftHandExpression)   '$'),
      '',
    );
  rightHandToken = rightHandToken
    .replace(
      // remove the starting match/expression from the token.
      RegExp('^'   escapeExpressionChars(rightHandExpression)),
      ''
    );

  return [

    leftHandToken,
    `nthroot(${ rightHandExpression },${ leftHandExpression })`,
    rightHandToken,

  ].join('');
}

const sampleExpressionOriginal =
  '(10 10)*2*((1 1)*1)√(16) (12*12) 2';
const sampleExpressionEdgeCase =
  '(10 10)*2*((1 1)*1)√16 (12*12) 2√(4*(1 2)) 3';

console.log("    processing the OP's expression    ")
console.log(
  'original value ...\n',
  sampleExpressionOriginal
);
console.log(
  'original value, after split ...',
  sampleExpressionOriginal
    .split('√')
);
console.log(
  'value, after "nthroot" creation ...\n',
  sampleExpressionOriginal
    .split('√')
    .reduce(createNthRootExpression)
);
console.log('\n');

console.log("    processing a more edge case like expression    ")
console.log(
  'original value ...\n',
  sampleExpressionEdgeCase
);
console.log(
  'original value, after split ...',
  sampleExpressionEdgeCase
    .split('√')
);
console.log(
  'value, after "nthroot" creation ...\n',
  sampleExpressionEdgeCase
    .split('√')
    .reduce(createNthRootExpression)
);

.as-console-wrapper { min-height: 100%!important; top: 0; }