Home > Back-end >  MPI Backwards Substitution error while slaves receives x from previous processes
MPI Backwards Substitution error while slaves receives x from previous processes

Time:05-09

I want to convert backwards substitution sequential C code to parallel and I have an error while rank 1-size receiving data MPI_Recv(prev_x, displacements[rank], MPI_FLOAT, rank-1, tag, MPI_COMM_WORLD, &status);. The logic is a pipeline between processes.

MY code:

#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <time.h>
#include <math.h>



int main(int argc, char* argv[]){
    int i,j,N;
    float **a, *b;
    float *local_x, *prev_x, *total_proc_x;
    int tag = 100;
    
    //MPI variables
    int rank, size;
    MPI_Status status;
    
    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
    
    
    if(argc != 2){
        if(rank == 0) printf("Using : %s <matrix_size>\n", argv[0]);
        return 0;
    }
    
    N = strtol(argv[1], NULL, 10);
    
    /* Allocate space for matrices */
    a = (float **) malloc ( N * sizeof ( float *) );
    for ( i = 0; i < N; i  ) 
        a[i] = ( float * ) malloc ( (i 1) * sizeof ( float ) );
    b = ( float * ) malloc ( N * sizeof ( float ) );
    
    
    if(rank == 0){      
        srand ( time ( NULL));
        for (i = 0; i < N; i  ) {
            b[i] = (float)rand()/(RAND_MAX*2.0-1.0);
            a[i][i] = 2.0 (float)rand()/(RAND_MAX*2.0-1.0);
            for (j = 0; j < i; j  ) 
                a[i][j] = (float)rand()/(RAND_MAX*2.0-1.0);
            for (j=i; j<N; j  )
                a[i][j] = 0.0;
        } 
        
    }
    
    //broadcast data (a,b)
    MPI_Bcast(a, N*N, MPI_FLOAT, 0, MPI_COMM_WORLD);
    MPI_Bcast(b, N, MPI_FLOAT, 0, MPI_COMM_WORLD);
    
    
        
    int block_size = N/size;
    int *counts = (int *) malloc(size*sizeof(int));
    int *displacements = (int *) malloc(size*sizeof(int));
    int start, end;
    for(i=0; i<size; i  ){
        start = 0;
        for(j=0; j<i; j  ){
            start  = block_size;
            if(size-(j 1) < N%size) start  ;
        }
        end = start   block_size;
        if(size-(i 1) < N%size) end  ;
        counts[i] = end - start;
        displacements[i] = start;
    }
        
    local_x = (float *) malloc(counts[rank]*sizeof(float));
    for(i=0; i<counts[rank]; i  ){
        local_x[i] = 0.0;
    }
    prev_x = (float *) malloc(displacements[rank]*sizeof(float));
    
    if(rank == 0) printf("Size: %d\n", size);
    
    printf("Rank %d, Displacement: %d, Count: %d\n", rank, displacements[rank], counts[rank]);
    //calculation
    float sum;
    if(rank == 0){
        printf("Rank %d, OK\n", rank);
        for(i=0; i<counts[0]; i  ){
            sum = 0.0;          
            for(j=0; j<i; j  ){
                sum = sum   (local_x[j] * a[i][j]);
            }
            local_x[i] = (b[i] - sum) / a[i][i];
        }
        MPI_Send(local_x, displacements[rank 1], MPI_FLOAT, rank 1, tag, MPI_COMM_WORLD);
        printf("Process %d sent data to process %d\n", rank, rank 1);
    }
    
    
    if(rank != 0 && rank != (size-1)){
        printf("Rank %d, OK\n", rank);
        MPI_Recv(prev_x, displacements[rank], MPI_FLOAT, rank-1, tag, MPI_COMM_WORLD, &status);
        printf("Process %d received data from process %d", rank, rank-1);
        for(i=displacements[rank]; i<(displacements[rank]   counts[rank]); i  ){
            sum = 0.0;
            //unowned rows
            for(j=0; j<displacements[rank]; j  ){
                sum = sum   (prev_x[j] * a[i][j]);
            }
            
            //owned rows
            for(j=displacements[rank]; j<i; j  ){
                sum = sum   (local_x[j-displacements[rank]] * a[i][j]);
            }
            
            local_x[i] = (b[i] - sum) / a[i][i];
        }
        
        //concatenate prev and local x
        total_proc_x = (float *) malloc((displacements[rank]   counts[rank])*sizeof(float));
        for(i=0; i<displacements[rank]; i  ){
            total_proc_x[i] = prev_x[i];
        }
        
        for(i=0; i<counts[rank]; i  ){
            total_proc_x[i displacements[rank]] = local_x[i];
        }
        
        //send to next process
        MPI_Send(total_proc_x, displacements[rank 1], MPI_FLOAT, rank 1, tag, MPI_COMM_WORLD);
    }
    
    
    
    if(rank == (size-1)){
        printf("Rank %d, OK\n", rank);
        MPI_Recv(prev_x, displacements[rank], MPI_FLOAT, rank-1, tag, MPI_COMM_WORLD, &status);
        printf("Process %d received data from process %d", rank, rank-1);
        
        for(i=displacements[rank]; i<(displacements[rank]   counts[rank]); i  ){
            sum = 0.0;
            //unowned rows
            for(j=0; j<displacements[rank]; j  ){
                sum = sum   (prev_x[j] * a[i][j]);
            }
            
            //owned rows
            for(j=displacements[rank]; j<i; j  ){
                sum = sum   (local_x[j-displacements[rank]] * a[i][j]);
            }
            
            local_x[i] = (b[i] - sum) / a[i][i];
        }
        
        //concatenate prev and local x
        float *total_proc_x = (float *) malloc((displacements[rank]   counts[rank])*sizeof(float));
        for(i=0; i<displacements[rank]; i  ){
            total_proc_x[i] = prev_x[i];
        }
        
        for(i=0; i<counts[rank]; i  ){
            total_proc_x[i displacements[rank]] = local_x[i];
        }
        

        /* Print result */
        for (i = 0; i < N; i  ) {
            for (j = 0; j <= i; j  )
                printf ("%f \t", a[i][j]);  
            printf ("%f \t%f\n", total_proc_x[i], b[i]);
        }
        
        /* Check result */
        for (i = 0; i < N; i  ) {
            sum = 0.0;
            for (j = 0; j <= i; j  ) 
                sum = sum   (total_proc_x[j]*a[i][j]);  
            if (fabsf(sum - b[i]) > 0.00001) {
                printf("%f != %f\n", sum, b[i]);
                printf("Validation Failed...\n");
            }
        }
    }
    
    MPI_Finalize();
    
    return 0;
    
}


Output:

$ mpicc -o backsub_mpi backsub_mpi.c
$ mpiexec -n 4 ./backsub_mpi 20

Rank 1, Displacement: 5, Count: 5
Rank 1, OK
Rank 3, Displacement: 15, Count: 5
Rank 3, OK
Size: 4
Rank 0, Displacement: 0, Count: 5
Rank 0, OK
Process 0 sent data to process 1
Rank 2, Displacement: 10, Count: 5
Rank 2, OK
-----------------------------------------------------------------------------
One of the processes started by mpirun has exited with a nonzero exit
code.  This typically indicates that the process finished in error.
If your process did not finish in error, be sure to include a "return
0" or "exit(0)" in your C code before exiting the application.

PID 4105 failed on node n0 (127.0.0.1) due to signal 11.
-----------------------------------------------------------------------------
mpirun failed with exit status 11

CodePudding user response:

Your problem is in how you allocate the matrix

a = (float **) malloc ( N * sizeof ( float *) );
for ( i = 0; i < N; i  ) 
    a[i] = ( float * ) malloc ( (i 1) * sizeof ( float ) );
  1. You're trying to allocate a triangular array, but you send it

    MPI_Bcast(a, N*N, MPI_FLOAT, 0, MPI_COMM_WORLD);

as a square array.

  1. Since a is an array or pointers, the actual matrix elements are probably not in contiguous memory.

You need to allocate double *a as a long enough single array, and then do some index translation to figure out where element i,j goes into this array. Something like i*(i 1)/2 j.

  • Related