Problems with MPI_Bcast and MPI

I have a problem with implementing this program in MPI. I would like to send the data created in process 0 (process Master) to the other processes (process Slave).

In particular I want to send nCentroids, nPointsSlave and flag data via MPI_Bcast.

While I want to split the vector of points created in process 0 to other processes via MPI_Scatter. Obviously, the divided point vector does not have to be sent to process 0, so process 0 does not have to receive any point vector data, only the Slaves can process the vector data.

Can anyone help me to make everything work properly? I'm really in trouble with this piece of code and don't know how to get out of it.

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <mpi.h>

typedef struct
{
    double x;
    double y;
} Point;


int main(int argc, char* argv[])
{

MPI_Init(&argc, &argv);
int rank, size;

MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);

MPI_Datatype MPI_PUNTO;
MPI_Datatype type=MPI_DOUBLE;
int blocklen=2;
MPI_Aint disp=0;
MPI_Type_create_struct(1,&blocklen,&disp,&type,&MPI_PUNTO);
MPI_Type_commit(&MPI_PUNTO);


int nCentroids=0;
int nPoints=0;
int nPointsSlave=0;
int flag=0;

Point* points;
Point* buffreceive;

if(rank==0)
{
    int i;
    int nSlave=size-1;
    char *pathInput;

    nCentroids=2;
    nPoints=12;
    flag=1;
    // divide the points between the slaves
    nPointsSlave=nPoints/nSlave;

    pathInput = "/home/rob/eclipse-workspace/Test/in.txt";

    // space allocation for the vector of points
    points=(Point*)malloc(sizeof(Point)*nPoints);

    FILE *input;
    input=fopen(pathInput,"r");

    // load the data present on the file into the points vector
    for(i=0; i<nPoints;i  )
    fscanf(input,"%lf,%lf",&points[i].x,&points[i].y);
    fclose(input);


    buffreceive=(Point*)malloc(sizeof(Point)*nPointsSlave);

    MPI_Bcast(&flag,1,MPI_INT,0,MPI_COMM_WORLD);
    MPI_Bcast(&nCentroids,1,MPI_INT,0,MPI_COMM_WORLD);
    MPI_Bcast(&nPointsSlave,1,MPI_INT,0,MPI_COMM_WORLD);
    MPI_Scatter(points, nPointsSlave, MPI_PUNTO, buffreceive, nPointsSlave, MPI_PUNTO, 0, MPI_COMM_WORLD);

}else{
    printf("Process %d receive flag: %d\n", rank, flag);
    printf("Process %d receive nCentroids: %d\n", rank, nCentroids);
    printf("Process %d receive nPointsSlave: %d\n",rank, nPointsSlave);

    for(int i=0; i<nPointsSlave; i  )
    {
        printf("Process %d value X %f\n", rank, buffreceive[i].x);
        printf("Process %d value Y %f\n", rank, buffreceive[i].y);
    }
}

MPI_Finalize();
return 0;
}

But the result I get is that all variables are always equal to 0. The for loop to be able to print the points of the vector sent via MPI_Scatter is not read. This is the output my program produces.

Process 3 receive flag: 0
Process 3 receive nCentroids: 0
Process 3 receive nPointsSlave: 0
Process 1 receive flag: 0
Process 1 receive nCentroids: 0
Process 1 receive nPointsSlave: 0
Process 2 receive flag: 0
Process 2 receive nCentroids: 0
Process 2 receive nPointsSlave: 0

CodePudding user response：

The reason other processes do not receive any data is that the above code did not call MPI_Bcast, and MPI_Scatter for other processes relatively.

By convention, you do not need to differentiate these one-to-all mpi commands MPI_Bcast and MPI_Scatter for ROOT and other processes.

The structure of the code should looks like:

int main(int argc, char* argv[]) {    
    MPI_Init(&argc, &argv);
    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    ... // 

    int nCentroids=0;
    int nPoints=0;
    int nPointsSlave=0;
    int flag=0;

    Point* points = nullptr;
    Point* buffreceive = nullptr;
    
    if(rank==0) {
        ... // calulate size related code here
    }

    MPI_Bcast(&flag,1,MPI_INT,0,MPI_COMM_WORLD);
    MPI_Bcast(&nCentroids,1,MPI_INT,0,MPI_COMM_WORLD);
    MPI_Bcast(&nPointsSlave,1,MPI_INT,0,MPI_COMM_WORLD);

    if(rank==0) {
       points=(Point*)malloc(sizeof(Point)*nPoints);  // root allocate space for whole data.  
       ... // root then reads date from the file
     }
     else {
       buffreceive=(Point*)malloc(sizeof(Point)*nPointsSlave);  // reserver space for other processes
    }
    
    MPI_Scatter(points, nPointsSlave, MPI_PUNTO, buffreceive, nPointsSlave, MPI_PUNTO, 0, MPI_COMM_WORLD);

    ... // print the value and etc

    if(rank==0) {
        free(points);
    }
    else { 
        free(buffreceive);
    }
    MPI_Finalize();
    return 0;
}

P.S.
You don't need to worry "ROOT process does not need to scatter, or broadcast date to itself." As MPI APIs are smart enough to handle this case. They will finish the job without unnecessary cost automatically.

CodePudding user response：

The problems you have are:

buffreceive must be allocated in all processes
MPI_Bcast and MPI_Scatter must be allocated in all processes

Note that buffreceive size must be know before allocation, so allocation must occur after MPI_Bcast

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <mpi.h>

typedef struct
{
    double x;
    double y;
} Point;


int main(int argc, char* argv[])
{

    MPI_Init(&argc, &argv);
    int rank, size;

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    MPI_Datatype MPI_PUNTO;
    // type simplification 
    MPI_Type_contiguous(2, MPI_DOUBLE, &MPI_PUNTO);
    MPI_Type_commit(&MPI_PUNTO);

    int nCentroids=0;
    int nPoints=0;
    int nPointsWorker=0;
    int flag=0;

    Point* points;
    Point* buffreceive;

    if(rank==0)
    {
        int i;
        int nWorkers = size;
        char *pathInput;

        nCentroids=2;
        nPoints=12;
        flag=1;
        // divide the points between the workers
        nPointsWorker=nPoints/nWorkers;

        pathInput = "/home/rob/eclipse-workspace/Test/in.txt";

        // space allocation for the vector of points
        points = malloc(sizeof(Point)*nPoints);

        FILE *input = fopen(pathInput,"r");

        // TODO: test if input is not null

        // load the data present on the file into the points vector
        for(i=0; i<nPoints;i  )
            fscanf(input,"%lf,%lf",&points[i].x,&points[i].y);

        fclose(input);
    }

    // Here, process 0 will behave like all other worker

    MPI_Bcast(&flag,1,MPI_INT,0,MPI_COMM_WORLD);
    MPI_Bcast(&nCentroids,1,MPI_INT,0,MPI_COMM_WORLD);
    MPI_Bcast(&nPointsWorker,1,MPI_INT,0,MPI_COMM_WORLD);

    // you must wait to know size to allocate before allocating
    buffreceive = malloc(sizeof(Point)*nPointsWorker);

    MPI_Scatter(points, nPointsSlave, MPI_PUNTO, buffreceive, nPointsSlave, MPI_PUNTO, 0, MPI_COMM_WORLD);


    printf("Process %d receive flag: %d\n", rank, flag);
    printf("Process %d receive nCentroids: %d\n", rank, nCentroids);
    printf("Process %d receive nPointsSlave: %d\n",rank, nPointsWorker);

    for(int i=0; i<nPointsWorker; i  )
    {
        printf("Process %d value X %f\n", rank, buffreceive[i].x);
        printf("Process %d value Y %f\n", rank, buffreceive[i].y);
    }

    free(buffreceive);
    if (0 == rank) {
        free(points);
    }
    MPI_Finalize();
    return 0;
}