I have a long continous string that looks something like this:
let myString = "onetwothreefourfivesixseveneightnineteneleventwelvethirteenfourteen";
It does not have any separators to easily target.
So how can I itrate over it and split the words so it ends up like:
splitString = ["one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen"];
Preferably with JavaScript.
CodePudding user response:
The problem here is the lack of separators as you have mentioned - this makes it impossible for the software to know where the words begin and end.
Given that you know the words that will show up, my technique would be so:
NOTE: This does not take into account the possibility of overlapping words and assumes none of the words are possible subsets of other words...
- Iterate the known words
- Search (indexOf) the string for each known word and note down it's positions in the string
- Sort the the values by the index values
- Generate an array with the values contained in the order found
/**
* This assumes that:
* - Input words are not subsets of other input words
*/
// Find all indices of the input word in the input String
function findAll(inputString, inputWord) {
const indices = [];
let index = 0;
while (index < inputString.length) {
index = inputString.indexOf(inputWord, index);
if (index == -1) break; // -1 means not found so we break here
indices.push({ index, word: inputWord });
index = inputWord.length;
}
return indices;
}
// Split the words into an array of Objects holding their positions and values
function splitWords(inputString, inputWords) {
// For holding the results
let results = [];
// Loop the input words
for (const inputWord of inputWords) {
// Find the indices and concat to the results array
results = results.concat(findAll(inputString, inputWord));
}
return results;
}
// Sort the words and return just an array of Strings
const orderWords = (inputArr) => inputArr.sort((a, b) => a.index - b.index).map(input => input.word);
/**
* Usage like so:
*/
const myString = 'onetwothreefourfivesixseveneightnineteneleventwelvethirteenfourteen';
const inputWords = ["one", "two", "three","four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen"];
const result = splitWords(myString, inputWords);
const ordered = orderWords(result);
console.dir(ordered);
/**
* Result:
[
'one', 'two',
'three', 'four',
'five', 'six',
'seven', 'eight',
'nine', 'ten',
'eleven', 'twelve',
'thirteen', 'four',
'fourteen'
]
*/
CodePudding user response:
If as you said in the comments that you know the expected words then create an array of these words and loop through your string to find these words
note the bellow code takes into account the length of the matched words so that you can find words such as one hundred eighty five
otherwise the loop stops when it finds one
you can read the comments in the code to better understand it
// your string
var myString =
"onetwothreefourfivesixseveneightnineteneleventwelvethirteenfourteentwentyfiveonehundredeightyfiveeightyfive";
// the list of expected words
var possibleWords =
[
"one",
"two",
"three",
"four",
"five",
"six",
"seven",
"eight",
"nine",
"ten",
"eleven",
"twelve",
"thirteen",
"fourteen",
"twenty five",
"one hundred eighty five",
"eighty five",
];
function separateString(mergedString, possibleWords) {
// the resulted array that has all the splited words
var result = [];
// buffer to temporary store the string and match it with the expected words array
var buffer = "";
// The word that has been matched in buffer with possible word in expected words array
var matchedWord = "";
// Index if the matched word
var matchedWordLastIndex = -1;
// Converting your string into array so we can access it by index letter by letter
var splitedString = mergedString.split("");
// For every letter in your string
for (var stringIndex = 0; stringIndex < splitedString.length; stringIndex )
{
// Resetting the variables
matchedWord = "";
buffer = "";
matchedWordLastIndex = -1;
// Look a head from current string index to the end of your string and find every word that matches with expected words
for ( var lookAhead = stringIndex; lookAhead < splitedString.length; lookAhead )
{
// Append letters with each iteration of look ahead with the buffer so we can make words from it
buffer = splitedString[lookAhead];
// loop through expected words to find a match with buffer
for (var i = 0; i < possibleWords.length; i ) {
// if buffer is equal to a word in expected words array: .replace(/ /g, '') removes space if the words inside expected array of words have space such as twenty five to twentyfive
if (buffer == possibleWords[i].replace(/ /g, ''))
{
// check if the found word has more letters than the previouse matched word so we can find words like one hundred eighty five otherwise it will just find one and stops
if(matchedWord.length < buffer.length)
{
// if the word has more letters then put the word into matched word and store the look ahead index into matchedWordLastIndex
matchedWord = possibleWords[i];
matchedWordLastIndex = lookAhead;
}
}
}
}
// if a word has been found
if(matchedWord.length > 0){
// make starting index same as look ahead index since last word found ended there
stringIndex = matchedWordLastIndex;
// put the found word into result array
result.push(matchedWord);
}
}
return result;
}
console.log(separateString(myString, possibleWords));
<iframe name="sif1" sandbox="allow-forms allow-modals allow-scripts" frameborder="0"></iframe>