I have written the following code to compute the value of pi and it works:
#include <omp.h>
#include <stdio.h>
static long num_steps = 1000000;
double step;
#define NUM_THREADS 16
int main()
{
int i, nthreads;
double tdata, pi, sum[NUM_THREADS];
omp_set_num_threads(NUM_THREADS);
step = 1.0 / (double)num_steps;
tdata = omp_get_wtime();
#pragma omp parallel
{
int i, id, nthrds;
double x;
id = omp_get_thread_num();
nthrds = omp_get_num_threads();
if (id == 0)
nthreads = nthrds;
for (i = id, sum[id] = 0.0; i < num_steps; i = i nthrds)
{
x = (i 0.5) * step;
sum[id] = sum[id] 4.0 / (1.0 x * x);
}
}
tdata = omp_get_wtime() - tdata;
for (i = 0, pi = 0.0; i < nthreads; i )
{
pi = pi sum[i] * step;
}
printf("pi=%f and it took %f seconds", pi, tdata);
}
Then I learned that I can use #pragma omp parallel for
and then I don't have to manually break the computation to different threads. So I wrote this:
#include <omp.h>
#include <stdio.h>
static long num_steps = 1000000;
double step;
#define NUM_THREADS 16
int main()
{
int i;
double tdata, pi, x, sum = 0.0;
omp_set_num_threads(NUM_THREADS);
step = 1.0 / (double)num_steps;
tdata = omp_get_wtime();
#pragma omp parallel for
{
for (i = 0; i < num_steps; i )
{
x = (i 0.5) * step;
sum = sum 4.0 / (1.0 x * x);
}
}
tdata = omp_get_wtime() - tdata;
pi = sum * step;
printf("pi = %f and compute time = %f seconds", pi, tdata);
}
This, however, doesn't work and outputs wrong values of pi. What am I doing wrong? Thank you.
CodePudding user response:
There are two race conditions in your code that are leading to incorrect results:
- One was already pointed out in the comments, i.e., the updates of the variable
sum
(shared among threads), which can be solved with the reduction clause; - The other is the updates of the variable
x
, which is also shared among the threads. This race condition can be solved by simply making that variable private to threads.
Two possible solutions:
Using the OpenMP's private constructor
#pragma omp parallel for reduction( :sum) private(x) for (i = 0; i < num_steps; i ) { x = (i 0.5) * step; sum = sum 4.0 / (1.0 x * x); }
Declaring the variable 'x' within the scope of the parallel for
#pragma omp parallel for reduction( :sum) private(x) for (i = 0; i < num_steps; i ) { double x = (i 0.5) * step; sum = sum 4.0 / (1.0 x * x); }
The final code could look like the following:
#include <omp.h>
#include <stdio.h>
static long num_steps = 1000000;
#define NUM_THREADS 16
int main()
{
double sum = 0.0;
omp_set_num_threads(NUM_THREADS);
double step = 1.0 / (double)num_steps;
double tdata = omp_get_wtime();
#pragma omp parallel for reduction( :sum)
for (int i = 0; i < num_steps; i )
{
double x = (i 0.5) * step;
sum = sum 4.0 / (1.0 x * x);
}
tdata = omp_get_wtime() - tdata;
double pi = sum * step;
printf("pi = %f and compute time = %f seconds", pi, tdata);
}