Home > Back-end >  Count array's words (12k elements) occurrences in another multidimensional array (20k elements)
Count array's words (12k elements) occurrences in another multidimensional array (20k elements)

Time:09-12

I've been trying to get this one to work, but the way I found it to work, using regex makes it run out of memory and/or it gives me the error: Uncaught SyntaxError: Invalid regular expression: \b \b : Nothing to repeat.

This is the function:

function countSearchTerms() {
  const filteredTerms = nGramsSht.getRange(6, 2, nGramsSht.getLastRow() - 5, 1).getValues().filter(e => e != '');
  const searchTermData = nGramFinalDataSht.getRange(1, 1, nGramFinalDataSht.getLastRow(), 1).getValues().filter(e => e != '');

  let occurrences = [];
  for (let r = 0; r < filteredTerms.length; r  ) {
    let count = 0;
    for (let a = 0; a < searchTermData.length; a  ) {
      if ((new RegExp("\\b"   filteredTerms[r].toString()   "\\b").test(searchTermData[a]))) {
        count  ;
      }
    }
    occurrences.push([count])
  }

  if (occurrences.length > 0) {
    nGramsSht.getRange(6, 3, nGramsSht.getLastRow() - 5, 1).clearContent();
    nGramsSht.getRange(6, 3, occurrences.length, 1).setValues(occurrences);
  }
}

I'd use this answer's approach, but how to count the words in a occurring in b?

function wordcount() {
  const ss = SpreadsheetApp.getActive();
  const sh = ss.getSheetByName("Sheet0");
  sh.clearContents();
  const a = [["A"], ["earth"], ["20"], ["tunnel"], ["house"], ["earth A"], ["$100"], ["house $100"]];
  const b = [["A"], ["A Plane is expensive"], ["peaceful earth"], ["20 years"], ["tunnel"], ["tiny house"], ["earth B612"], ["$100"], ["house $100"]]
  sh.getRange(1, 1, a.length, a[0].length).setValues(a);
  let o = [... new Set(a.slice().flat().join(' ').split(' '))].map(w => [w, sh.createTextFinder(w).matchCase(true).findAll().length]);
  o.unshift(["Words","Count"]);
  sh.getRange(sh.getLastRow()   2,1,o.length,o[0].length).setValues(o);
}

Thanks a lot!

CodePudding user response:

Try it this way:

function wordcount() {
  const ss = SpreadsheetApp.getActive();
  const sh = ss.getSheetByName("Sheet0");
  sh.clearContents();
  const a = [["A"], ["earth"], ["20"], ["tunnel"], ["house"], ["earth A"], ["$100"], ["house $100"]];
  const b = [["A"], ["A Plane is expensive"], ["peaceful earth"], ["20 years"], ["tunnel"], ["tiny house"], ["earth B612"], ["$100"], ["house $100"]]
  sh.getRange(1, 1, b.length, b[0].length).setValues(b);
  let o = [... new Set(a.slice().flat().join(' ').split(' '))].map(w => [w, sh.createTextFinder(w).matchCase(true).findAll().length]);
  o.unshift(["Words", "Count"]);
  sh.getRange(sh.getLastRow()   2, 1, o.length, o[0].length).setValues(o);
}
Search
A
A Plane is expensive
peaceful earth
20 years
tunnel
tiny house
earth B612
$100
house $100
Words Count
A 2
earth 2
20 1
tunnel 1
house 2
$100 2

CodePudding user response:

Maybe your data makes the regexp fail. You should escape it for regexp and check if not empty. I hope it helps

function escapeRegExp(string) {
  return string.replace(/([.* ?^=!:${}()|\[\]\/\\])/g, "\\$1");
}

var term = escapeRegExp(filteredTerms[r].toString().trim())
if (term && (new RegExp("\\b"   term   "\\b").test(searchTermData[a]))) {
  count  ;
}
  • Related