Suppose I have below array of phrases
const senetences = [
{ "text": "And a moment I Yes." },
{ "text": "Wait a moment I Yes." },
{ "text": "And a moment I Hello, Guenta, trenteuno." },
{ "text": "Okay a moment. Hello. Perfect." },
{ "text": "And a moment." },
{ "text": "And a moment I Hello, Guenta, trenteuno." },
{ "text": "Just a moment in Quinta trenteuno." },
{ "text": "And a moment in Quinta trenteuno." },
{ "text": "Wait a moment I Hello, Guenta, trenteuno." },
{ "text": "Just a moment in Quinta trenteuno." }
]
Now I search for... suppose moment
.
So I need to get the some words before and after the exact word moment
and their matching score in the whole array.
Example output
[
"text": "And a moment", "score": 5, "percent": 50,
"text": "moment I Hello", "score": 3, "percent": 30,
"text": "moment in Quinta", "score": 3, "percent": 30,
"text": "Wait a moment", "score": 2, "percent": 20,
"text": "moment I Yes", "score": 2, "percent": 20,
"text": "Just a moment", "score": 2, "percent": 20,
"text": "Okay a moment", "score": 1, "percent": 10
]
score
is the number of time it is occurred and the percent
is the number of time occurred divided by the total number of sentences.
I am able to get the words after looping on senetences
but get stuck after it.
const string = "moment";
const words = [];
senetences.map((a) => {
const arrayString = a.text.toLowerCase().split(' ');
const index = arrayString.indexOf(string.toLowerCase());
words.push(`${arrayString[index - 2]} ${arrayString[index - 1]} ${arrayString[index]}`);
words.push(`${arrayString[index]} ${arrayString[index 1]} ${arrayString[index 2]}`);
})
After that I got stuck how to find in the senetences
array now.
const output = []
senetences.map((a) => {
phrases.map((p) => {
const index = a.text.toLowerCase().indexOf(p)
if (index !== -1) {
output.push(a.text)
}
})
})
CodePudding user response:
You can store the strings and counts in an object:
const sentences = [
{"text": "And a moment I Yes."},
{"text": "Wait a moment I Yes."},
{"text": "And a moment I Hello, Guenta, trenteuno."},
{"text": "Okay a moment. Hello. Perfect."},
{"text": "And a moment."},
{"text": "And a moment I Hello, Guenta, trenteuno."},
{"text": "Just a moment in Quinta trenteuno."},
{"text": "And a moment in Quinta trenteuno."},
{"text": "Wait a moment I Hello, Guenta, trenteuno."},
{"text": "Just a moment in Quinta trenteuno."}
];
const string = "moment";
const words = sentences.reduce((acc, { text }) => {
const arrayString = text.replaceAll(/\.|,/g, '').split(' ');
const index = arrayString.findIndex(el => el.toLowerCase() === string.toLowerCase());
if (index >= 2) {
const key = `${arrayString[index - 2]} ${arrayString[index - 1]} ${arrayString[index]}`;
acc[key] = (acc[key] ?? 0) 1;
}
if (index < arrayString.length - 2) {
const key = `${arrayString[index]} ${arrayString[index 1]} ${arrayString[index 2]}`;
acc[key] = (acc[key] ?? 0) 1;
}
return acc;
}, {})
const result = Object.entries(words)
.map(el => ({ text: el[0], score: el[1], percent: 100 * el[1] / sentences.length }))
.sort((l, r) => r.score - l.score)
console.log(result);
<iframe name="sif1" sandbox="allow-forms allow-modals allow-scripts" frameborder="0"></iframe>
CodePudding user response:
This function should work, i added a precision parameter, to set how many words you want to use in your search, and added some code to replace all non text characters in the sentences.
function searchAndScoreWords(sentences,searchPhrase,precision){
//construct wordsArray
const wordsArray = [];
sentences.map((sentence) => {
const arrayString = sentence.text.toLowerCase().replace(/(\.)|(\,)|(')|(\!)|(\?)/g,'').split(' ');
const index = arrayString.indexOf(searchPhrase.toLowerCase());
if(index!==-1){
//if enough words before the search pharse push the words before phrase to array
if(index>=precision-1){
let words = [];
for(let i=index-precision 1;i<=index;i ){
words.push(arrayString[i]);
}
wordsArray.push(words.join(' '));
}
//if enough words after the search pharse push the words before phrase to array
if(index<=arrayString.length-precision){
let words = [];
for(let i=index;i<=index precision-1;i ){
words.push(arrayString[i]);
}
wordsArray.push(words.join(' '));
}
}
})
//generate scores
let output = [];
for(let i=0;i<wordsArray.length;i ){
let occurrences = 0;
for(let j=0;j<wordsArray.length;j ){
if(wordsArray[i]===wordsArray[j]){
occurrences ;
}
}
if(!output.find(e=>e.text===wordsArray[i])){
output.push({
text:wordsArray[i],
score:occurrences,
percent:occurrences/sentences.length*100
});
}
}
return output;
}
const sentences = [
{ "text": "And a moment, I Yes." },
{ "text": "Wait a moment' I Yes." },
{ "text": "And a moment? I Hello, Guenta, trenteuno." },
{ "text": "Okay a moment. Hello. Perfect." },
{ "text": "And a moment." },
{ "text": "And a moment! I Hello, Guenta, trenteuno." },
{ "text": "Just a moment in Quinta trenteuno." },
{ "text": "And a moment in Quinta trenteuno." },
{ "text": "Wait a moment I Hello, Guenta, trenteuno." },
{ "text": "Just a moment in Quinta trenteuno." }
]
console.log(searchAndScoreWords(sentences,"moment",3));
<iframe name="sif2" sandbox="allow-forms allow-modals allow-scripts" frameborder="0"></iframe>