I would like to use swift 5
to programmatically split a sentence into words as well as punctuations including spaces.
- input:
"Hello, I am Albert Einstein."
- output:
["Hello", ",", " ", "I", " ", "am", " ", "Albert", " ", "Einstein", "."]
I adopted the code provided by @Duyen-Hoa (Split text into array while maintaining the punctuation in Swift) and slightly modified it into the following code (I basically just deleted the parts deleting the space). But, I cannot get arraies for including just a space. Instead, I am somehow getting space included at the beginning of the array with each word.
func sentenceSplitter(text_input: String) -> [String] {
var list = [String]()
var currentSubString = "";
text.enumerateSubstrings(in: text.startIndex..<text.endIndex, options: String.EnumerationOptions.byComposedCharacterSequences) { (substring, substringRange, enclosingRange, value) in
if let _subString = substring {
if (!currentSubString.isEmpty &&
(_subString.compare(" ") == .orderedSame
|| _subString.compare(",") == .orderedSame
|| _subString.compare(".") == .orderedSame
|| _subString.compare(";") == .orderedSame
|| _subString.compare("!") == .orderedSame
|| _subString.compare("?") == .orderedSame
)
) {
//create word if see any of those character and currentSubString is not empty
list.append(currentSubString)
currentSubString = _subString
} else {
currentSubString = _subString
}
}
}
//last word
if (!currentSubString.isEmpty) {
list.append(currentSubString)
}
return list
}
Could you let me know what I am doing wrong?
Original code from: Duyen-Hoa
var str = "Hello, I am Albert Einstein."
var list = [String]()
var currentSubString = "";
//enumerate to get all characters including ".", ",", ";", " "
str.enumerateSubstrings(in: str.startIndex..<str.endIndex, options: String.EnumerationOptions.byComposedCharacterSequences) { (substring, substringRange, enclosingRange, value) in
if let _subString = substring {
if (!currentSubString.isEmpty &&
(_subString.compare(" ") == .orderedSame
|| _subString.compare(",") == .orderedSame
|| _subString.compare(".") == .orderedSame
|| _subString.compare(";") == .orderedSame
)
) {
//create word if see any of those character and currentSubString is not empty
list.append(currentSubString)
currentSubString = _subString.trimmingCharacters(in: CharacterSet.whitespaces )
} else {
//add to current sub string if current character is not space.
if (_subString.compare(" ") != .orderedSame) {
currentSubString = _subString
}
}
}
}
//last word
if (!currentSubString.isEmpty) {
list.append(currentSubString)
}
CodePudding user response:
Here you don't need to remove
.trimmingCharacters(in: CharacterSet.whitespaces)
Instead you need to detect where it's ignoring space
from the _subString
and there you need to add
if _subString == " " {
list.append(_subString)
}
And your code will look like below:
let str = "Hello, I am Albert Einstein."
var list = [String]()
var currentSubString = "";
str.enumerateSubstrings(in: str.startIndex..<str.endIndex, options: String.EnumerationOptions.byComposedCharacterSequences) { (substring, substringRange, enclosingRange, value) in
if let _subString = substring {
if (!currentSubString.isEmpty &&
(_subString.compare(" ") == .orderedSame
|| _subString.compare(",") == .orderedSame
|| _subString.compare(".") == .orderedSame
|| _subString.compare(";") == .orderedSame
)
) {
list.append(currentSubString)
//If _subString is a space
if _subString == " " {
list.append(_subString)
}
currentSubString = _subString.trimmingCharacters(in: CharacterSet.whitespaces)
} else {
if (_subString.compare(" ") != .orderedSame) {
currentSubString = _subString
} else {
//If _subString is a space at start
if _subString == " " {
list.append(_subString)
}
}
}
}
}
if (!currentSubString.isEmpty) {
list.append(currentSubString)
}
print(list)
Here input is "Hello, I am Albert Einstein."
And output will be:
["Hello", ",", " ", "I", " ", "am", " ", "Albert", " ", "Einstein", "."]
Hope it will help.