The code i wrote in C for matrix multiplication in MPI shows that my code is taking 5 seconds approx in global time but when i run the same thing in python mpi4py it takes very less time like few milliseconds, what is the problem with mpi in C, because it doesnt feel like 5 seconds when I run it in Linux shell,the output comes really fast but still shows the globaltime as 5 seconds.the code below is in C
#define N 4
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <time.h>
#include "mpi.h"
void print_results(char *prompt, int a[N][N]);
int main(int argc, char *argv[])
{
int i, j, k, rank, size, tag = 99, sum = 0;
int a[N][N];
int b[N][N];
int c[N][N];
int aa[N],cc[N];
int row,col;
int dest = 0;
int source;
double time1, time2, duration, global;
MPI_Status status;
MPI_Init(&argc, &argv);
time1 = MPI_Wtime();
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if(rank == 0){
printf("enter the number of row =");
scanf("%d",&row);
printf("enter the number of column =");
scanf("%d",&col);
srand(time(NULL));
for(i=0;i<row;i ) {
for(j=0;j<col;j ){
a[i][j] = rand() % 10;
}
}
srand(time(NULL));
for(i=0;i<row;i ){
for(j=0;j<col;j ){
b[i][j] = rand() % 10;
}
}
}
MPI_Scatter(a, N*N/size, MPI_INT, aa, N*N/size, MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(b, N*N, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
for (i = 0; i < N; i )
{
for (j = 0; j < N; j )
{
sum = sum aa[j] * b[j][i];
}
cc[i] = sum;
sum = 0;
}
MPI_Gather(cc, N*N/size, MPI_INT, c, N*N/size, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
time2 = MPI_Wtime();
duration = time2 - time1;
MPI_Reduce(&duration,&global,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
if(rank == 0) {
printf("Global runtime is %f\n",global);
}
printf("Runtime at %d is %f \n", rank,duration);
MPI_Finalize();
if (rank == 0)
print_results("C = ", c);
}
void print_results(char *prompt, int a[N][N])
{
int i, j;
printf ("\n\n%s\n", prompt);
for (i = 0; i < N; i ) {
for (j = 0; j < N; j ) {
printf(" %d", a[i][j]);
}
printf ("\n");
}
printf ("\n\n");
}
The output it gives is
4
4
enter the number of row =enter the number of column =Global runtime is 5.975327
Runtime at 0 is 1.493793
Runtime at 1 is 1.493793
Runtime at 2 is 1.493877
Runtime at 3 is 1.493865
C =
78 83 142 116
128 138 236 194
39 49 112 71
96 109 204 156
Please let me know if there is some problem with the code!!
CodePudding user response:
As discussed in the comment I have changed the position of time1 = MPI_Wtime(); and included a Barrier.
Take a look at the modified code :
#define N 4
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <time.h>
#include "mpi.h"
void print_results(char *prompt, int a[N][N]);
int main(int argc, char *argv[])
{
int i, j, k, rank, size, tag = 99, sum = 0;
int a[N][N];
int b[N][N];
int c[N][N];
int aa[N],cc[N];
int row,col;
int dest = 0;
int source;
double time1, time2, duration, global;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if(rank == 0){
printf("enter the number of row =");
scanf("%d",&row);
printf("enter the number of column =");
scanf("%d",&col);
srand(time(NULL));
for(i=0;i<row;i ) {
for(j=0;j<col;j ){
a[i][j] = rand() % 10;
}
}
srand(time(NULL));
for(i=0;i<row;i ){
for(j=0;j<col;j ){
b[i][j] = rand() % 10;
}
}
}
MPI_Barrier(MPI_COMM_WORLD);
time1 = MPI_Wtime();
MPI_Scatter(a, N*N/size, MPI_INT, aa, N*N/size, MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(b, N*N, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
for (i = 0; i < N; i )
{
for (j = 0; j < N; j )
{
sum = sum aa[j] * b[j][i];
}
cc[i] = sum;
sum = 0;
}
MPI_Gather(cc, N*N/size, MPI_INT, c, N*N/size, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
time2 = MPI_Wtime();
duration = time2 - time1;
MPI_Reduce(&duration,&global,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
if(rank == 0) {
printf("Global runtime is %f\n",global);
}
printf("Runtime at %d is %f \n", rank,duration);
MPI_Finalize();
if (rank == 0)
print_results("C = ", c);
}
void print_results(char *prompt, int a[N][N])
{
int i, j;
printf ("\n\n%s\n", prompt);
for (i = 0; i < N; i ) {
for (j = 0; j < N; j ) {
printf(" %d", a[i][j]);
}
printf ("\n");
}
printf ("\n\n");
}
By doing so you will not take into account the user input time and also plcing the Barrier before the first the first timing will ensure that all process have nearly identical starting.
Also beware that you code only works with 4x4 matrix !
Apart from that you sould have something like :
mpirun -n 4 a.out
enter the number of row =4
enter the number of column =4
Global runtime is 0.005867
Runtime at 0 is 0.001474
Runtime at 1 is 0.001464
Runtime at 2 is 0.001464
Runtime at 3 is 0.001466