I follow others' guides to rewrite my string split function.
#include<string>
#include<list>
#include<optional>
#include<malloc.h>
#include<string_view>
#include<iostream>
#include<vector>
#include "include/common.h"
using std::list;
using std::string;
using std::optional;
using std::vector;
using std::string_view;
/* there not use optional is better cuz list.length == 0 repr that */
/*
@brief: split string by substring
*/
vector<string_view> split(string content, string_view delim_str) {
size_t tail_pos = 0;
size_t head_pos = std::string::npos;
vector<string_view> str_view_vec;
while((head_pos = content.find(delim_str, tail_pos))
!= std::string::npos)
{
DBG("tail_pos = %zu, head_pos = %zu", tail_pos, head_pos);
str_view_vec.emplace_back(&content[tail_pos], head_pos - tail_pos);
tail_pos = head_pos delim_str.length();
}
if(tail_pos != content.length() - 1) {
str_view_vec.emplace_back(&content[tail_pos], content.length() - tail_pos);
}
return str_view_vec;
}
int main() {
string s("123 12312 123213a as dasd as asd");
std::cout << string_view("asdasd 123") << std::endl;
vector<string_view> l = split(s, string_view(" "));
for(const auto &i : l) {
std::cout << i << std::endl;
}
}
then compile and print:
❯ clang -o String String.cpp -g -std=c 17 -I../ -lstdc ; ./String
asdasd 123
[x] split(), String.cpp:49 : tail_pos = 0, head_pos = 3
[x] split(), String.cpp:49 : tail_pos = 4, head_pos = 9
[x] split(), String.cpp:49 : tail_pos = 10, head_pos = 17
[x] split(), String.cpp:49 : tail_pos = 18, head_pos = 20
[x] split(), String.cpp:49 : tail_pos = 21, head_pos = 25
[x] split(), String.cpp:49 : tail_pos = 26, head_pos = 28
@S
a
as
dasd
as
asd
This result is so weird cuz it doesn't print any chars in 123 12312 123213a
but prints three lines of random chars;
I use lldb to verify the return value and found it seems ok;
* thread #1, name = 'String', stop reason = breakpoint 1.1
frame #0: 0x00000000004027f8 String`split(content=error: summary string parsing error, delim_str=(_M_len = 1, _M_str = " ")) at String.cpp:56:5
53 if(tail_pos != content.length() - 1) {
54 str_view_vec.emplace_back(&content[tail_pos], content.length() - tail_pos);
55 }
-> 56 return str_view_vec;
57 }
58 int main() {
59
(lldb) p str
Available completions:
str_view_vec -- vector<basic_string_view<char, char_traits<char> >, allocator<basic_string_view<char, char_traits<char> > > > &
struct
(lldb) p str_view_vec
(std::vector<std::basic_string_view<char, std::char_traits<char> >, std::allocator<std::basic_string_view<char, std::char_traits<char> > > >) $0 = size=7 {
[0] = (_M_len = 3, _M_str = "123 12312 123213a as dasd as asd")
[1] = (_M_len = 5, _M_str = "12312 123213a as dasd as asd")
[2] = (_M_len = 7, _M_str = "123213a as dasd as asd")
[3] = (_M_len = 2, _M_str = "as dasd as asd")
[4] = (_M_len = 4, _M_str = "dasd as asd")
[5] = (_M_len = 2, _M_str = "as asd")
[6] = (_M_len = 3, _M_str = "asd")
}
(lldb) q
So I want to figure out where these weird chars come from and why alphabetic lines are normal only digital lines are not print well.
CodePudding user response:
Change this
vector<string_view> split(string content, string_view delim_str) {
to this
vector<string_view> split(const string& content, string_view delim_str) {
Your string views are 'pointing' at content
which is destroyed when the split
function exits. Using a reference avoids this.
If you want to avoid this whole issue, then use strings instead of string views. String views don't work independently of the string they view.