Home > Software engineering >  proper syntax for using std::threads
proper syntax for using std::threads

Time:01-19

I am implementing a parallel accumulator class in C . The implementation of the class is as follows:

#include <iostream>
#include <thread>
#include <cstring>
#include "cblas.h"

class ParallelAccumulator {
public:
    int num_contributions;
    double** contributions;
    int* is_contributing;
    int num_elements;
    
    ParallelAccumulator(int num_contributions, int num_elements) {
        this->num_contributions = num_contributions;
        this->num_elements = num_elements;
        contributions = new double*[num_contributions];
        is_contributing = new int[num_contributions];
        for (int i = 0; i < num_contributions; i  ) {
            contributions[i] = new double[num_elements];
            is_contributing[i] = 0;
        }
    }
    
    void reset() {
        for (int i = 0; i < num_contributions; i  ) {
            is_contributing[i] = 0;
        }
    }
    
    void zero_contributions() {
        for (int i = 0; i < num_contributions; i  ) {
            memset(contributions[i], 0, num_elements * sizeof(double));
        }
    }
    
    int check_out_contribution() {
        for (int i = 0; i < num_contributions; i  ) {
            if (is_contributing[i] == 0) {
                is_contributing[i] = 1;
                return i;
            }
        }
        return -1;
    }
    
    void check_in_contribution(int contrib_index) {
        is_contributing[contrib_index] = 0;
    }
    
    void reduce(double* output) {
        for (int i = 0; i < num_contributions; i  ) {
            if (is_contributing[i] == 1) {
                cblas_daxpy(num_elements, 1.0, contributions[i], 1, output, 1);
            }
        }
    }
    
    ~ParallelAccumulator() {
        for (int i = 0; i < num_contributions; i  ) {
            delete[] contributions[i];
        }
        delete[] contributions;
        delete[] is_contributing;
    }
};

However, I am having compilation issues when I create the threads to test the class as follows:

void test_function(ParallelAccumulator& accumulator, double* output, int id) {
    int contrib_index = accumulator.check_out_contribution();
    if (contrib_index == -1) {
        std::cout << "Error: no available contrib arrays" << std::endl;
        return;
    }
    double* contrib = accumulator.contributions[contrib_index];
    for (int i = 0; i < accumulator.num_elements; i  ) {
        contrib[i] = id;
    }
    accumulator.check_in_contribution(contrib_index);
}

int main() {
    int num_contributions = 4;
    int num_elements = 10;
    double output[num_elements];
    ParallelAccumulator accumulator(num_contributions, num_elements);
    /* problematic code start here  */
    std::thread t1(test_function, std::ref(accumulator), output, 1);
    std::thread t2(test_function, std::ref(accumulator), output, 2);
    std::thread t3(test_function, std::ref(accumulator), output, 3);
    std::thread t4(test_function, std::ref(accumulator), output, 4);
    /* problematic code end here  */
    t1.join();
    t2.join();
    t3.join();
    t4.join();
    accumulator.reduce(output);
    for (int i = 0; i < num_elements; i  ) {
        std::cout << output[i] << " ";
    }
    std::cout << std::endl;
    return 0;
}

The compilation errors are:

parallel_accumulator.cpp:87:67: error: no matching function for call to 'std::thread::thread(void (&)(ParallelAccumulator&, double*, int), std::reference_wrapper<ParallelAccumulator>, double [num_elements], int)'    87 |     std::thread t1(test_function, std::ref(accumulator), output, 1);
      |                                                                   ^ In file included from /usr/local/Cellar/gcc/11.3.0_2/include/c  /11/thread:43,
                 from parallel_accumulator.cpp:2: /usr/local/Cellar/gcc/11.3.0_2/include/c  /11/bits/std_thread.h:127:7: note: candidate: 'template<class _Callable, class ... _Args, class> std::thread::thread(_Callable&&, _Args&& ...)'   127 |       thread(_Callable&& __f, _Args&&... __args)
      |       ^~~~~~ /usr/local/Cellar/gcc/11.3.0_2/include/c  /11/bits/std_thread.h:127:7: note:   template argument deduction/substitution failed: parallel_accumulator.cpp:87:67: note:   variable-sized array type 'double (&)[num_elements]' is not a valid template argument    87 |    std::thread t1(test_function, std::ref(accumulator), output, 1);
      |                                                                   ^ In file included from /usr/local/Cellar/gcc/11.3.0_2/include/c  /11/thread:43,
                 from parallel_accumulator.cpp:2: /usr/local/Cellar/gcc/11.3.0_2/include/c  /11/bits/std_thread.h:157:5: note: candidate: 'std::thread::thread(std::thread&&)'   157 |     thread(thread&& __t) noexcept
      |     ^~~~~~ /usr/local/Cellar/gcc/11.3.0_2/include/c  /11/bits/std_thread.h:157:5: note:   candidate expects 1 argument, 4 provided /usr/local/Cellar/gcc/11.3.0_2/include/c  /11/bits/std_thread.h:121:5: note: candidate: 'std::thread::thread()'   121 |     thread() noexcept
= default;
      |     ^~~~~~

What is the proper syntax to fix the error? What modifications can I make to this implementation to make it work properly and have as much flexibility as possible?

CodePudding user response:

Thanks a lot to all the participants in the discussion above, in particular to @doron. The code with the correct syntax is as follows:

    constexpr int num_elements = 10;
    std::thread t1(test_function, std::ref(accumulator), &output[0], 1);
    std::thread t2(test_function, std::ref(accumulator), &output[0], 2);
    std::thread t3(test_function, std::ref(accumulator), &output[0], 3);
    std::thread t4(test_function, std::ref(accumulator), &output[0], 4);
    

CodePudding user response:

There are several ways to call std::thread::thread(), when in doubt, use the simplest one using a lambda..

This sure isn't the most pedantic answer, but it does not penalise performance and always works.

// quick and efficient
std::thread t1([&]() { test_function(accumulator), &output[0], 1); });
// etc...

// If you do not like vague lambda captures
std::thread t1([&accumulator, &output]() { test_function(accumulator), &output[0], 1); });
// etc...
  • Related