I'm using components(separatedBy:)
to separate a string into an array of words (using a space " "
as the separator).
let string = "This is a string."
let words = string.components(separatedBy: " ")
print(words) // Result: ["This", "is", "a", "string."]
This works fine, but I also want to get each word's range/index in the parent string. Here's my current brute-force attempt. It kind of works, but there's an extra space after some of the words and I don't know how it performs when applied to other strings.
let string = "This is a string."
let words = string.words(separatedBy: " ")
print(words)
/* Result:
[
Word(range: Range(0..<4), component: "This "),
Word(range: Range(5..<7), component: "is "),
Word(range: Range(8..<9), component: "a "),
Word(range: Range(10..<16), component: "string."),
]
*/
/// ...
struct Word {
var range: Range<Int> /// the component's range in a parent string
var component: String
}
extension String {
func words(separatedBy separator: String) -> [Word] {
var words = [Word]()
var currentWord: Word? /// the current word that the loop pieces together
for stringIndex in indices {
/// get the index as an `Int`
let index = distance(from: startIndex, to: stringIndex)
let letter = String(self[stringIndex])
if var word = currentWord {
word.component.append(letter)
currentWord = word
/// check if the letter is the separator, or if this index is the last one
if letter == separator || stringIndex == indices.last {
word.range = word.range.startIndex..<index
words.append(word)
currentWord = nil
}
} else {
currentWord = Word(range: index..<index 1, component: letter)
}
}
return words
}
}
Is there a built-in function that not only splits a string into an array, but also returns the ranges? If not, is there something I can use that is less brute-force?
CodePudding user response:
You can use enumerate substrings in range and pass byWords options:
extension StringProtocol {
var wordsAndRanges: [(word: String,range: Range<Index>)] {
var result: [(word: String, range: Range<Index>)] = []
enumerateSubstrings(in: startIndex..., options: .byWords) { word, range, _, _ in
guard let word = word else { return }
result.append((word, range))
}
return result
}
}
let string = "This is a string."
for (word, range) in string.wordsAndRanges {
print("word:", word)
print("substring:", string[range])
print("range:", range)
}
Or using a Word struct as you tried:
struct Word {
let range: Range<String.Index>
let component: String
}
extension StringProtocol {
var words: [Word] {
var result: [Word] = []
enumerateSubstrings(in: startIndex..., options: .byWords) { word, range, _, _ in
guard let word = word else { return }
result.append(.init(range: range, component: word))
}
return result
}
}
let string = "This is a string."
for word in string.words {
print("word:", word.component)
print("subsequence", string[word.range])
print("range", word.range)
}