I have a string with a sentence and an array with some words, I need to sort this array so that the words contained in them are descending based on how often they appear in the string.
ex:
str = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"
array = ["a", "em", "i", "el"]
X = 3
arrayFrequency = ["i", "a", "em", "el"] // They repeat 11, 7, 2, 1 respectively
XarrayFrequency = ["i", "a", "em"] // The first X words most repeated
I tried this way but my brain stopped and I can't think of a way to continue:
str = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"
array = ["a", "em", "i", "el"]
X = 3
for(let word of words) {
console.log(s.split(word).length - 1)
} // output: 7 2 11 1
I was expecting the output returns ["i", "a", "em", "el"] so then I could easily get the first X words from the array
ex: X = 3 XarrayFrequency = ["i", "a", "em"]
CodePudding user response:
const s = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua";
const m = ["a", "em", "i", "el"], x = 3;
// given a string s and substring match m, output the occurrences of m in s
const c = (s, m) => [...s].filter((e,i)=>s.substring(i).startsWith(m)).length;
// given an array of substring matches, get the occurrences of each substring
// and place them in an array as a list of [substring, occurences]
// then sort the array by occurrences descending, take the first x entries,
// and finally extract only the substring out of the [substring, occurences] pairs
const f = (s, m, x) => m.map(i=>[i, c(s,i)]).sort(([a,b],[c,d])=>d-b)
.slice(0,x).map(i=>i[0]);
console.log(f(s, m, 3));
CodePudding user response:
Use Regex to find the occurences.
const str = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua";
const words = ["a", "em", "i", "el"];
const x = 3;
// Dictionary to store counts
var resDict = {}
for (let word of words)
{
// Use regex to find all possible matches
var re = new RegExp(word, 'g');
count = (str.match(re) || []).length;
resDict[word] = count; // store the count for each word
}
// Create the array of key-value pairs
resArray = Object.keys(resDict).map((key) => { return [key, resDict[key]] });
// Sort the array in descending order
resArray.sort(
(first, second) => { return second[1] - first[1] }
);
// Get the list of keys in sorted order of the values
var keys = resArray.map((e) => { return e[0] });
// Print the array based on the number needed
console.log(keys.slice(0, x));
CodePudding user response:
You can divide what you want to do into 3 steps:
Step 1. Compute the frequency of each word in the sentence.
function countWords(words, sentence) {
const counts = words.map(word => {
const regex = new RegExp(word, "g")
const count = (sentence.match(regex) || []).length
return count
})
return counts
}
Step 2. Sort the words based on frequencies from the previous step.
function sortWords(words, counts) {
const sortedWords = words.slice()
sortedWords
.sort((a, b) => {
aIndex = sortedWords.indexOf(a)
bIndex = sortedWords.indexOf(b)
return counts[aIndex] - counts[bIndex]
})
.reverse()
return sortedWords
}
Step 3. The last step is to just call these 2 functions and use slice
to get the x most frequent words.
sentence = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"
words = ["a", "em", "i", "el"]
counts = countWords(words, sentence)
sortedWords = sortWords(words, counts)
const x = 3
// should print: ["i", "a", "em"]
console.log(sorted.slice(0, x))
It's good practice to split your code into multiple functions so you won't get overwhelmed with whatever it is you want to do. This also leads to a testable and more readable code.