I want to convert backwards substitution sequential C code to parallel and I have an error while rank 1-size receiving data MPI_Recv(prev_x, displacements[rank], MPI_FLOAT, rank-1, tag, MPI_COMM_WORLD, &status);
. The logic is a pipeline between processes.
MY code:
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <time.h>
#include <math.h>
int main(int argc, char* argv[]){
int i,j,N;
float **a, *b;
float *local_x, *prev_x, *total_proc_x;
int tag = 100;
//MPI variables
int rank, size;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if(argc != 2){
if(rank == 0) printf("Using : %s <matrix_size>\n", argv[0]);
return 0;
}
N = strtol(argv[1], NULL, 10);
/* Allocate space for matrices */
a = (float **) malloc ( N * sizeof ( float *) );
for ( i = 0; i < N; i )
a[i] = ( float * ) malloc ( (i 1) * sizeof ( float ) );
b = ( float * ) malloc ( N * sizeof ( float ) );
if(rank == 0){
srand ( time ( NULL));
for (i = 0; i < N; i ) {
b[i] = (float)rand()/(RAND_MAX*2.0-1.0);
a[i][i] = 2.0 (float)rand()/(RAND_MAX*2.0-1.0);
for (j = 0; j < i; j )
a[i][j] = (float)rand()/(RAND_MAX*2.0-1.0);
for (j=i; j<N; j )
a[i][j] = 0.0;
}
}
//broadcast data (a,b)
MPI_Bcast(a, N*N, MPI_FLOAT, 0, MPI_COMM_WORLD);
MPI_Bcast(b, N, MPI_FLOAT, 0, MPI_COMM_WORLD);
int block_size = N/size;
int *counts = (int *) malloc(size*sizeof(int));
int *displacements = (int *) malloc(size*sizeof(int));
int start, end;
for(i=0; i<size; i ){
start = 0;
for(j=0; j<i; j ){
start = block_size;
if(size-(j 1) < N%size) start ;
}
end = start block_size;
if(size-(i 1) < N%size) end ;
counts[i] = end - start;
displacements[i] = start;
}
local_x = (float *) malloc(counts[rank]*sizeof(float));
for(i=0; i<counts[rank]; i ){
local_x[i] = 0.0;
}
prev_x = (float *) malloc(displacements[rank]*sizeof(float));
if(rank == 0) printf("Size: %d\n", size);
printf("Rank %d, Displacement: %d, Count: %d\n", rank, displacements[rank], counts[rank]);
//calculation
float sum;
if(rank == 0){
printf("Rank %d, OK\n", rank);
for(i=0; i<counts[0]; i ){
sum = 0.0;
for(j=0; j<i; j ){
sum = sum (local_x[j] * a[i][j]);
}
local_x[i] = (b[i] - sum) / a[i][i];
}
MPI_Send(local_x, displacements[rank 1], MPI_FLOAT, rank 1, tag, MPI_COMM_WORLD);
printf("Process %d sent data to process %d\n", rank, rank 1);
}
if(rank != 0 && rank != (size-1)){
printf("Rank %d, OK\n", rank);
MPI_Recv(prev_x, displacements[rank], MPI_FLOAT, rank-1, tag, MPI_COMM_WORLD, &status);
printf("Process %d received data from process %d", rank, rank-1);
for(i=displacements[rank]; i<(displacements[rank] counts[rank]); i ){
sum = 0.0;
//unowned rows
for(j=0; j<displacements[rank]; j ){
sum = sum (prev_x[j] * a[i][j]);
}
//owned rows
for(j=displacements[rank]; j<i; j ){
sum = sum (local_x[j-displacements[rank]] * a[i][j]);
}
local_x[i] = (b[i] - sum) / a[i][i];
}
//concatenate prev and local x
total_proc_x = (float *) malloc((displacements[rank] counts[rank])*sizeof(float));
for(i=0; i<displacements[rank]; i ){
total_proc_x[i] = prev_x[i];
}
for(i=0; i<counts[rank]; i ){
total_proc_x[i displacements[rank]] = local_x[i];
}
//send to next process
MPI_Send(total_proc_x, displacements[rank 1], MPI_FLOAT, rank 1, tag, MPI_COMM_WORLD);
}
if(rank == (size-1)){
printf("Rank %d, OK\n", rank);
MPI_Recv(prev_x, displacements[rank], MPI_FLOAT, rank-1, tag, MPI_COMM_WORLD, &status);
printf("Process %d received data from process %d", rank, rank-1);
for(i=displacements[rank]; i<(displacements[rank] counts[rank]); i ){
sum = 0.0;
//unowned rows
for(j=0; j<displacements[rank]; j ){
sum = sum (prev_x[j] * a[i][j]);
}
//owned rows
for(j=displacements[rank]; j<i; j ){
sum = sum (local_x[j-displacements[rank]] * a[i][j]);
}
local_x[i] = (b[i] - sum) / a[i][i];
}
//concatenate prev and local x
float *total_proc_x = (float *) malloc((displacements[rank] counts[rank])*sizeof(float));
for(i=0; i<displacements[rank]; i ){
total_proc_x[i] = prev_x[i];
}
for(i=0; i<counts[rank]; i ){
total_proc_x[i displacements[rank]] = local_x[i];
}
/* Print result */
for (i = 0; i < N; i ) {
for (j = 0; j <= i; j )
printf ("%f \t", a[i][j]);
printf ("%f \t%f\n", total_proc_x[i], b[i]);
}
/* Check result */
for (i = 0; i < N; i ) {
sum = 0.0;
for (j = 0; j <= i; j )
sum = sum (total_proc_x[j]*a[i][j]);
if (fabsf(sum - b[i]) > 0.00001) {
printf("%f != %f\n", sum, b[i]);
printf("Validation Failed...\n");
}
}
}
MPI_Finalize();
return 0;
}
Output:
$ mpicc -o backsub_mpi backsub_mpi.c
$ mpiexec -n 4 ./backsub_mpi 20
Rank 1, Displacement: 5, Count: 5
Rank 1, OK
Rank 3, Displacement: 15, Count: 5
Rank 3, OK
Size: 4
Rank 0, Displacement: 0, Count: 5
Rank 0, OK
Process 0 sent data to process 1
Rank 2, Displacement: 10, Count: 5
Rank 2, OK
-----------------------------------------------------------------------------
One of the processes started by mpirun has exited with a nonzero exit
code. This typically indicates that the process finished in error.
If your process did not finish in error, be sure to include a "return
0" or "exit(0)" in your C code before exiting the application.
PID 4105 failed on node n0 (127.0.0.1) due to signal 11.
-----------------------------------------------------------------------------
mpirun failed with exit status 11
CodePudding user response:
Your problem is in how you allocate the matrix
a = (float **) malloc ( N * sizeof ( float *) );
for ( i = 0; i < N; i )
a[i] = ( float * ) malloc ( (i 1) * sizeof ( float ) );
You're trying to allocate a triangular array, but you send it
MPI_Bcast(a, N*N, MPI_FLOAT, 0, MPI_COMM_WORLD);
as a square array.
- Since
a
is an array or pointers, the actual matrix elements are probably not in contiguous memory.
You need to allocate double *a
as a long enough single array, and then do some index translation to figure out where element i,j
goes into this array. Something like i*(i 1)/2 j
.