This program gives a "Segmentation fault" or a "free(): invalid pointer" based on the input file used. The bug is reproducible.
This is very strange especially because free
is not called.
Here is the complete program.
// Open a file, read line by line,
// and for each (loooong) line, fraction it into
// small, justified lines.
// Justification done by adding spaces to existing spaces.
#include <iostream>
#include <iterator>
#include <sstream>
#include <fstream>
#include <list>
#include <vector>
#include <math.h>
#include <random>
#include <functional>
const int pageWidth = 50; // max width for a (justified) line
typedef std::vector<std::string> WordList;
typedef std::vector<int> SpaceList;
// ========
// HELPERS
// ========
// Helper to return "cccccc...ccc"
std::string repeat (const int n, char c) {
std::string ret;
for (int i{0}; i < n; i) {
ret = c;
}
return ret;
}
// "Random" int between min and max (pseudo-random: reproducible)
unsigned int random_pred(std::size_t salt, unsigned int min, unsigned int max) {
unsigned int output = min (salt % static_cast<int>(max - min 1));
return output;
}
// alpha is greater at center
float alpha_weight(float z) { return std::max(100*(sin(z))*(sin(z)), float(1)); }
// Weight of a space, ie probability to add here blank space
int weight(int x, unsigned int l)
{
float z = 3.141 * x / l;
return alpha_weight(z);
}
// line -> vector of words
WordList splitTextIntoWords( const std::string &text )
{
WordList words;
std::istringstream in(text);
std::copy(std::istream_iterator<std::string>(in),
std::istream_iterator<std::string>(),
std::back_inserter(words));
return words;
}
// ======
// CORE
// ======
// Give each space a weight, a 'probability' to be expanded
SpaceList charge_spaces ( int l, const WordList & words, SpaceList spp)
{
SpaceList sp_weights;
std::string p{ words[0] }, h;
int wg;
for (size_t i = 0; i < words.size()-1; i) {
wg = weight(spp[i], l);
sp_weights.push_back(wg);
}
return sp_weights;
}
// Given weighted list of spaces positions, 'randomly' pick one
int random_sp( const SpaceList& spw, std::size_t salt ) {
std::string m;
unsigned int i{48}, total{0}; // ASCII 48 = ' '
for (const int & n : spw) {
char ch = static_cast<char>(i); // '1'; '2'; '3' ...
std::string segment = repeat(n, ch); // "11"; "2222"; "3333333333333" ....
m = segment;
total = n;
i;
} // now, m like "11112222222222333333333333333333334444444455555", of length <total>
int mqrh = random_pred(salt, 0, total); // Get 0 <= random <= total
char iss = m[mqrh]; // Read the char at this position (for example, more likely '3' here)
int ret = static_cast<int>(iss) - 48; // Example: '3' -> 3
return ret; // Example: return 3
}
// Add spaces to a (small) line, to justify it.
// We need to expand it by <excess> spaces.
std::string justifyLine( std::string line, WordList ww, SpaceList space_positions, int excess )
{
SpaceList spwg = charge_spaces(line.size(), ww, space_positions);
SpaceList spadd{spwg.begin(), spwg.end()}; // number of spaces to add after word <i>
for (size_t k = 0; k < ww.size()-1; k) {
spadd[k] = 1; // By default, 1 space after each word
}
int winner; // Which space will win additional space ?
std::size_t str_hash = std::hash<std::string>{}(line) / 1000; // 'random' seed, reproducible
for (int i{0}; i < excess; i) { // Where to add these <excess> needed spaces ?
std::size_t salt = str_hash 37*i;
winner = random_sp(spwg, salt);
spadd[winner] = spadd[winner] 1; // space after <winner> word is incremented !
spwg[winner] = std::max( spwg[winner] / 10, 1); // Weight of winner is decreased
}
// Build up the justified line
std::string justified;
for (size_t j = 0; j < ww.size()-1; j) {
justified.append(ww[j]); // Add next word
justified.append(spadd[j], ' '); // Add few spaces
}
justified.append(ww.back()); // Add last word
std::cout << justified << std::endl;
return justified;
}
// Fraction a long line in several justified small lines
void justifyText( const std::string& text )
{
WordList words = splitTextIntoWords(text);
std::string line;
WordList ww;
SpaceList space_positions;
int position{0};
int nwords_in_line{0};
for (const std::string& word : words) {
size_t s = word.size();
if (line.size() s 1 <= pageWidth) { // next word fit into the line.
if (!line.empty()) {
line.append(" ");
space_positions.push_back(position );
}
line.append(word);
nwords_in_line ;
ww.push_back(word); // append this word to the list
position = s;
} else { // build a justified small line from the words added up
justifyLine(line, ww, space_positions, pageWidth - position);
line.clear(); // Cleaning for next chunk
ww.clear();
space_positions.clear();
line = word;
position = s;
nwords_in_line = 1;
ww.push_back(word); // don't forget the last word (that overflowed)
}
}
std::cout << line << std::endl; // Remaining of the long line
}
// =====
// main
// =====
int main () {
std::string line;
std::ifstream myfile ("candle.txt");
if (myfile.is_open())
{
while ( getline(myfile,line) )
{
justifyText(line);
}
myfile.close();
}
else std::cerr << "Unable to open file";
return 0;
}
File "candle.txt" is an ASCII text file, here is a copy.
The whole file gives free(): invalid pointer
, always at same position -- see below (1)
If cutting between the two markups in the PREFACE (deleting the chunk between the two CUT HERE
marks), program gives a Segmentation fault
.
Running with Valgrind gives this (very strange because repeat
function does not seem problematic)
Thread 1: status = VgTs_Runnable (lwpid 4487)
==4487== at 0x4838DEF: operator new(unsigned long) (vg_replace_malloc.c:342)
==4487== by 0x4990859: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_mutate(unsigned long, unsigned long, char const*, unsigned long) (in /usr/lib/x86_64-linux-gnu/libstdc .so.6.0.28)
==4487== by 0x4990F34: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::operator =(char) (in /usr/lib/x86_64-linux-gnu/libstdc .so.6.0.28)
==4487== by 0x10A406: repeat[abi:cxx11](int, char) (in /home/fig/Documents/cpp/cil)
==4487== by 0x10A8DD: random_sp(std::vector<int, std::allocator<int> > const&, unsigned long) (in /home/fig/Documents/cpp/cil)
==4487== by 0x10AB34: justifyLine(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, std::vector<int, std::allocator<int> >, int) (in /home/fig/Documents/cpp/cil)
==4487== by 0x10AF71: justifyText(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) (in /home/fig/Documents/cpp/cil)
==4487== by 0x10B195: main (in /home/xxx/Documents/cpp/cil)
Any idea welcome
(1) End of output:
We have several tests for oxygen besides the mere
burning of bodies. You have seen a candle burnt in
oxygen, or in the air; you have seen phosphorus
burnt in the air, or in oxygen; and you have seen
iron-filings burnt in oxygen. But we have other
tests besides these, and I am about to refer to
one or two of them for he purpose of carrying
CodePudding user response:
The crash occurs as a result of a consequence from an earlier bug. There's nothing (directly) wrong with any of the code in your backtrace. Although the bug effectively was traced to one of the function the actual crash was triggered from an earlier invocation of the same code, and the actual bug would be triggered later, after returning from the point of the crash.
One of my stock comments starts with: "Just because this is where the program crashes or reports an error doesn't mean this is where the problem is. C does not work this way." This is followed by explanation that a minimal reproduce example must be provided, which you mostly did. This allowed the problem to be reproduced trivially. The first instance of undefined behavior occured elsewhere, on line 109:
spadd[winner] = spadd[winner] 1;
valgrind has a very useful option: --vgdb-error=1
. This stops execution immediately when valgrind detects memory corruption, on this line, in this case. valgrind then gives instruction for attaching to the current process, with gdb
. Doing so immediately led to the observation that this value of winner
was -48. Modifying spadd[-48]
will only result in tears.
At this point it wasn't too difficult to backtrack to line 91, where -48
came from, which led to the actual bug, an off by 1:
int mqrh = random_pred(salt, 0, total);
total
here was always the same as m.size()
, and this duplicate logic resulted in the bug, since this parameter should be the last value in the range for random_pred
, and not one past it. The expected results here is to pick a random character from m
, so the valid range is 0
to m.size()-1
. If it wasn't for the duplicated logic, being mindful of how random_pred()
's parameters must be defined, the last parameter would've naturally be m.size()-1
. But the duplicated logic resulted in an indirect reference to the underlying value, total
, and this detail was forgot.
Another contributing factor to common kinds of bugs is going against the natural flow of how C defines ranges and sequences: not by the minimum and the maximum value, but the minimum and one past the maximum value. std::string::size()
, std::vector::size
, et. al., is one past the last valid index of the underlying container, and not the last valid index of the container. Similarly, end()
, the ending iterator is not the iterator for the last value in the sequence, but the iterator to the next, non-existent value in the sequence, "one past it".
If random_pred
was designed in harmony with the rest of the C library, its formula would simply involve min salt % (max-min)
instead of min salt % (max-min 1)
. Then it wouldn't matter if its third parameter was total
or m.size()
, it would've naturally worked either way.