I'm trying to detect unique sentences (a string) in an array of words in typescript but i'm not sure if i'm going the right way.
My first approach is to convert the array of words in a big string and then use indexOf to return the indices of the matches ex :
const words: Word[] = [
{ id: 1, content: "date" },
{ id: 2, content: "of" },
{ id: 3, content: "my" },
{ id: 4, content: "birthday" },
{ id: 5, content: "date" },
{ id: 6, content: "of" },
{ id: 7, content: "his" },
{ id: 8, content: "birthday" },
];
const uniqueSentence = "date of his"
const wordsString: string = words.map((w) => w.content).join(" ");
function findText(searchStr: string, str: string) {
var searchStrLen = searchStr.length;
if (searchStrLen == 0) {
return [];
}
var startIndex = 0,
index,
indices = [];
str = str.toLowerCase();
searchStr = searchStr.toLowerCase();
while ((index = str.indexOf(searchStr, startIndex)) > -1) {
indices.push(index);
console.log(str.substring(index));
startIndex = index searchStrLen;
}
return indices;
}
const indices = findText(text, wordsString)
// Here the indices will be equal to [20]
In the case of the exemple above i would like the function findText to return the matching words but i have no clue on how i could acheive it
Test cases :
findText("date") => Error ("Non unique anchor")
findText("date of") => Error ("Non unique anchor")
findText("date of his") => [
{ id: 5, content: "date" },
{ id: 6, content: "of" },
{ id: 7, content: "his" },
]
CodePudding user response:
function findText(searchStr: string, str: string) {
// get both in array form
const splitString = str.split(" ");
const splitSearch = searchStr.split(" ");
let idxs: number[] = [];
splitString.forEach((string, idx) => {
splitSearch.forEach((search) => {
// if string matches anyof search, get the following indices
// as a new array, join both arrays and compare for equality
if (string === search) {
const possibleMatch = splitString.slice(
idx,
idx splitSearch.length,
);
// if equal, push idx of first word
splitSearch.join(" ") === possibleMatch.join(" ") &&
idxs.push(idx);
}
});
});
return idxs;
}
CodePudding user response:
Note: This answer is in response to the original question, which has since been edited in a substantial way that invalidates the answer.
If I understand the question correctly, you can do it by keeping track of the current index of each word as you iterate through them. There are some assumptions in your question (like inserting spaces between each word), and this behavior needs to be accounted for during the iteration (e.g. prepending each word with the space). Here's how you could write a function to find the index of the first word which matches the search text exactly:
type Word = {
id: number;
content: string;
};
const words: Word[] = [
{ id: 1, content: 'date' },
{ id: 2, content: 'of' },
{ id: 3, content: 'my' },
{ id: 4, content: 'birthday' },
{ id: 5, content: 'date' },
{ id: 6, content: 'of' },
{ id: 7, content: 'his' },
{ id: 8, content: 'birthday' },
];
function findIndex (
words: Word[],
searchText: string,
joinString = ' ',
): number {
if (!searchText) return -1;
const indexes: number[] = [];
let index = 0;
let wordString = '';
const iter = words[Symbol.iterator]();
const {done, value: word} = iter.next();
if (done) return -1;
indexes.push(index);
wordString = word.content;
index = word.content.length;
for (const word of iter) {
indexes.push(index joinString.length);
wordString = joinString;
wordString = word.content;
index = joinString.length word.content.length;
}
return indexes.indexOf(wordString.indexOf(searchText));
}
console.log(findIndex(words, '')); // -1 (not found)
console.log(findIndex(words, 'my date')); // -1
console.log(findIndex(words, 'date of his')); // 4
console.log(findIndex(words, 'of my')); // 1
console.log(findIndex(words, 'birthday')); // 3
Compiled JS from the TS Playground:
"use strict";
const words = [
{ id: 1, content: 'date' },
{ id: 2, content: 'of' },
{ id: 3, content: 'my' },
{ id: 4, content: 'birthday' },
{ id: 5, content: 'date' },
{ id: 6, content: 'of' },
{ id: 7, content: 'his' },
{ id: 8, content: 'birthday' },
];
function findIndex(words, searchText, joinString = ' ') {
if (!searchText)
return -1;
const indexes = [];
let index = 0;
let wordString = '';
const iter = words[Symbol.iterator]();
const { done, value: word } = iter.next();
if (done)
return -1;
indexes.push(index);
wordString = word.content;
index = word.content.length;
for (const word of iter) {
indexes.push(index joinString.length);
wordString = joinString;
wordString = word.content;
index = joinString.length word.content.length;
}
return indexes.indexOf(wordString.indexOf(searchText));
}
console.log(findIndex(words, '')); // -1 (not found)
console.log(findIndex(words, 'my date')); // -1
console.log(findIndex(words, 'date of his')); // 4
console.log(findIndex(words, 'of my')); // 1
console.log(findIndex(words, 'birthday')); // 3
CodePudding user response:
I think you should loop over the array instead of joining it, that way you allready have the index / object and can simply return that:
const words = [
{ id: 1, content: "date" },
{ id: 2, content: "of" },
{ id: 3, content: "my" },
{ id: 4, content: "birthday" },
{ id: 5, content: "date" },
{ id: 6, content: "of" },
{ id: 7, content: "his" },
{ id: 8, content: "birthday" },
];
const uniqueSentence = "date of"
const uniq = uniqueSentence.split(' ')
const senteces = words
//instead of map/filter you can do foreach define array and push into that array
.map((a,i) => {
if(
// the first word matches the current position of the array
a.content === uniq[0]
// Check that if you take the next words exactly matches the entire sentence
&& words.slice(i, i uniq.length).map(a => a.content).join(' ') === uniqueSentence)
// Return all the words in the sentence
{ return words.slice(i, i uniq.length) }
//no match, so return false for the filter function
else return false })
//remove misses
.filter(a => a);
// The result will be an array of array, where each array will represent one found instance of the sentence with all words
console.log(senteces)
Edit: changed the code slighly to represent the changed question