I am implementing a parallel accumulator class in C . The implementation of the class is as follows:
#include <iostream>
#include <thread>
#include <cstring>
#include "cblas.h"
class ParallelAccumulator {
public:
int num_contributions;
double** contributions;
int* is_contributing;
int num_elements;
ParallelAccumulator(int num_contributions, int num_elements) {
this->num_contributions = num_contributions;
this->num_elements = num_elements;
contributions = new double*[num_contributions];
is_contributing = new int[num_contributions];
for (int i = 0; i < num_contributions; i ) {
contributions[i] = new double[num_elements];
is_contributing[i] = 0;
}
}
void reset() {
for (int i = 0; i < num_contributions; i ) {
is_contributing[i] = 0;
}
}
void zero_contributions() {
for (int i = 0; i < num_contributions; i ) {
memset(contributions[i], 0, num_elements * sizeof(double));
}
}
int check_out_contribution() {
for (int i = 0; i < num_contributions; i ) {
if (is_contributing[i] == 0) {
is_contributing[i] = 1;
return i;
}
}
return -1;
}
void check_in_contribution(int contrib_index) {
is_contributing[contrib_index] = 0;
}
void reduce(double* output) {
for (int i = 0; i < num_contributions; i ) {
if (is_contributing[i] == 1) {
cblas_daxpy(num_elements, 1.0, contributions[i], 1, output, 1);
}
}
}
~ParallelAccumulator() {
for (int i = 0; i < num_contributions; i ) {
delete[] contributions[i];
}
delete[] contributions;
delete[] is_contributing;
}
};
However, I am having compilation issues when I create the threads to test the class as follows:
void test_function(ParallelAccumulator& accumulator, double* output, int id) {
int contrib_index = accumulator.check_out_contribution();
if (contrib_index == -1) {
std::cout << "Error: no available contrib arrays" << std::endl;
return;
}
double* contrib = accumulator.contributions[contrib_index];
for (int i = 0; i < accumulator.num_elements; i ) {
contrib[i] = id;
}
accumulator.check_in_contribution(contrib_index);
}
int main() {
int num_contributions = 4;
int num_elements = 10;
double output[num_elements];
ParallelAccumulator accumulator(num_contributions, num_elements);
/* problematic code start here */
std::thread t1(test_function, std::ref(accumulator), output, 1);
std::thread t2(test_function, std::ref(accumulator), output, 2);
std::thread t3(test_function, std::ref(accumulator), output, 3);
std::thread t4(test_function, std::ref(accumulator), output, 4);
/* problematic code end here */
t1.join();
t2.join();
t3.join();
t4.join();
accumulator.reduce(output);
for (int i = 0; i < num_elements; i ) {
std::cout << output[i] << " ";
}
std::cout << std::endl;
return 0;
}
The compilation errors are:
parallel_accumulator.cpp:87:67: error: no matching function for call to 'std::thread::thread(void (&)(ParallelAccumulator&, double*, int), std::reference_wrapper<ParallelAccumulator>, double [num_elements], int)' 87 | std::thread t1(test_function, std::ref(accumulator), output, 1);
| ^ In file included from /usr/local/Cellar/gcc/11.3.0_2/include/c /11/thread:43,
from parallel_accumulator.cpp:2: /usr/local/Cellar/gcc/11.3.0_2/include/c /11/bits/std_thread.h:127:7: note: candidate: 'template<class _Callable, class ... _Args, class> std::thread::thread(_Callable&&, _Args&& ...)' 127 | thread(_Callable&& __f, _Args&&... __args)
| ^~~~~~ /usr/local/Cellar/gcc/11.3.0_2/include/c /11/bits/std_thread.h:127:7: note: template argument deduction/substitution failed: parallel_accumulator.cpp:87:67: note: variable-sized array type 'double (&)[num_elements]' is not a valid template argument 87 | std::thread t1(test_function, std::ref(accumulator), output, 1);
| ^ In file included from /usr/local/Cellar/gcc/11.3.0_2/include/c /11/thread:43,
from parallel_accumulator.cpp:2: /usr/local/Cellar/gcc/11.3.0_2/include/c /11/bits/std_thread.h:157:5: note: candidate: 'std::thread::thread(std::thread&&)' 157 | thread(thread&& __t) noexcept
| ^~~~~~ /usr/local/Cellar/gcc/11.3.0_2/include/c /11/bits/std_thread.h:157:5: note: candidate expects 1 argument, 4 provided /usr/local/Cellar/gcc/11.3.0_2/include/c /11/bits/std_thread.h:121:5: note: candidate: 'std::thread::thread()' 121 | thread() noexcept
= default;
| ^~~~~~
What is the proper syntax to fix the error? What modifications can I make to this implementation to make it work properly and have as much flexibility as possible?
CodePudding user response:
Thanks a lot to all the participants in the discussion above, in particular to @doron. The code with the correct syntax is as follows:
constexpr int num_elements = 10;
std::thread t1(test_function, std::ref(accumulator), &output[0], 1);
std::thread t2(test_function, std::ref(accumulator), &output[0], 2);
std::thread t3(test_function, std::ref(accumulator), &output[0], 3);
std::thread t4(test_function, std::ref(accumulator), &output[0], 4);
CodePudding user response:
There are several ways to call std::thread::thread(), when in doubt, use the simplest one using a lambda..
This sure isn't the most pedantic answer, but it does not penalise performance and always works.
// quick and efficient
std::thread t1([&]() { test_function(accumulator), &output[0], 1); });
// etc...
// If you do not like vague lambda captures
std::thread t1([&accumulator, &output]() { test_function(accumulator), &output[0], 1); });
// etc...