#include<iostream>
#include<vector>
#include<cstdlib>
#include<thread>
#include<array>
#include<iterator>
#include<algorithm>
#include<functional>
#include<numeric>//accumulate
int sum_of_digits(int num, int sum = 0) {
//calculate sum of digits recursively till the sum is single digit
if (num == 0) {
if (sum / 10 == 0)
return sum;
return sum_of_digits(sum);
}
return sum_of_digits(num / 10, sum num % 10);
}
template<typename T, typename Iterator>
int sum_of_digits(Iterator begin, Iterator end) {
//not for array temp workout
T copy(std::distance(begin,end));
std::copy(begin, end, copy.begin());
std::for_each(copy.begin(), copy.end(), [](int& i) {i = sum_of_digits(i); });
return sum_of_digits(std::accumulate(begin, end, 0));
}
template<typename T, typename Iterator>
void sum_of_digits(Iterator begin, Iterator end, int& sum) {
sum = sum_of_digits<T, Iterator>(begin, end);
}
template<typename T>
int series_computation(const T& container) {
return sum_of_digits<T, typename T::const_iterator>(container.begin(), container.end());
}
#define MIN_THREAD 4
#define DEBUGG
template<typename T>
int parallel_computation(const T& container, const int min_el) {
if (container.size() < min_el) {
#ifdef DEBUGG
std::cout << "no multithreading" << std::endl;
#endif
return series_computation<T>(container);
}
const unsigned int total_el = container.size();
const unsigned int assumed_thread = total_el / min_el;
const unsigned hardwarethread = std::thread::hardware_concurrency();
const unsigned int thread_count = std::min<unsigned int>(hardwarethread == 0 ? MIN_THREAD : hardwarethread, assumed_thread) - 1;//one thread is main thread
const unsigned int el_per_thread = total_el / thread_count;
#ifdef DEBUGG
std::cout << "thread count: " << thread_count << " element per thread: " << el_per_thread << std::endl;
#endif
std::vector<std::thread> threads;
threads.reserve(thread_count);
using result_type = std::vector<int>;
result_type results(thread_count);
results.reserve(thread_count);
auto it_start = container.begin();
for (int i = 0; i < thread_count; i ) {
auto it_end = it_start;
std::advance(it_end, el_per_thread);
threads.push_back(std::thread([&]() {sum_of_digits<T, typename T::const_iterator>(it_start, it_end, std::ref(results[i])); }));
//threads.push_back( std::thread{ sum_of_digits<T,typename T::const_iterator>, it_start, it_end, std::ref(results[i]) });
it_start = it_end;
std::cout << "iterator " << i << std::endl;
}
results[thread_count - 1] = sum_of_digits<T, typename T::const_iterator>(it_start, container.end());
std::for_each(threads.begin(), threads.end(), std::mem_fn(&std::thread::join));
return series_computation<T>(results);
}
#define SIZE 1000
int main() {
std::vector<int> array(SIZE);
for (auto& curr : array) {
curr = std::rand();
}
for (auto& curr : array) {
//std::cout <<curr<< std::endl;
}
int series_val = series_computation<std::vector<int>>(array);
std::cout << "series val: " << series_val << std::endl;
int parallel_val = parallel_computation<std::vector<int>>(array, 25);
std::cout << "parallel val: " << parallel_val << std::endl;
return 0;
}
I am trying to calculate the sum of digits(recursive) of a randomly generated vector using std::thread
but in the results
vector only the result of the last element(i.e main thread) is stored and the child thread doesnot update the referenced results[i]
.
What is causing this behaviour?
Also, inside the for loop of parallel_combination
, this code works
threads.push_back(std::thread([&]() {sum_of_digits<T, typename T::const_iterator>(it_start, it_end, std::ref(results[i])); }));
but this doesnot and the error is: Error: '<function-style-cast>': cannot convert from 'initializer list' to 'std::thread'
.
What's wrong with the below one?
threads.push_back( std::thread{ sum_of_digits<T,typename T::const_iterator>, it_start, it_end, std::ref(results[i]) });
CodePudding user response:
Your code has a data race here:
threads.push_back(std::thread([&]() {sum_of_digits<T, typename T::const_iterator>(it_start, it_end, std::ref(results[i])); }));
it_start = it_end;
The lambda uses it_start
and then without any synchronization you immediately modify it in the main thread. Capture it_start
and it_end
by-copy. Also std::ref
is pointless there. You are not passing a reference to the std::thread
constructor, but just to a normal direct function call. Also, as I mentioned in the comments, template arguments in a function call can be deduced:
threads.push_back(std::thread([it_start,it_end,&results](){
sum_of_digits(it_start, it_end, results[i]); }));
it_start = it_end;
threads.push_back( std::thread{ sum_of_digits<T,typename T::const_iterator>, it_start, it_end, std::ref(results[i]) });
fails, because there are two function templates of which sum_of_digits<T,typename T::const_iterator>
could be a specialization. It is impossible to pass overload sets to functions and since there is no way to deduce which one you mean, it will fail.
There are some smaller issues in the code, e.g. pointless reserve
after resizing/construction of vectors or
std::for_each(threads.begin(), threads.end(), std::mem_fn(&std::thread::join));
which is not guaranteed to work, because taking the address of a member function of a standard library function has unspecified behavior. Instead just use a loop:
for(auto& t : threads) {
t.join();
}
and possibly some others.