Matrix multipication is very slow C-CodePudding

I dont know why but my matrix multipication is very slow and I need to optimize it. and also the print of the matrix (1000X1000) taking long time. The aim of the function is to calculate the matrix exponential, but my main problem is that this 2 actions are very slow for large matrices like 1000X1000.

These 2 actions implemented at poweMat() function and printeResult() function. Here is the code:

    #define M 1000
    #define e 2.71828182845904523536;
    
    //declaration of the functions
    void sumMatrices(vector<vector<double> >& mat1, vector<vector<double> >& mat2);
    void printResult(vector<vector<double> >&matRes);
    void mulMatWithFactorial(long factorialValue);
    long factorialCalculate(int n);
    void initializeMatrix();
    void initializeIdenticalMatrix();
    void checkIfTheMatrixIsDiagonal();
    void calculateExpoMatrixWithDiagonalMatrix();
    void readMatrixFromFile();
    void powerMat(vector<vector<double> >& mat, int powNum);
    
    //declaration of the variables
    vector<vector<double>> inputMatrix(M, vector<double>(M));
    vector<vector<double>> sumMatrixResult(M, vector<double>(M));
    vector<vector<double>> powerMatrixResult(M, vector<double>(M));
    vector<vector<double>> mulFactorialMatrixResult(M, vector<double>(M));
    vector<vector<double>> finalMatrixResult(M, vector<double>(M));
    vector<vector<double>> identicalMatrix(M, vector<double>(M));
    vector<vector<vector<double>>> listOfMatrices;
    bool matrixIsNilpotent = false;
    int diagonaMatrixlFlag = 1;
    
    int main() {
        //variables
        long factorialValue;
        
    initializeIdenticalMatrix();
    readMatrixFromFile();

    //check if the matrix is diagonal - so we will have easier and faster compute
    checkIfTheMatrixIsDiagonal();
    if (diagonaMatrixlFlag == 1) {
        calculateExpoMatrixWithDiagonalMatrix();
        goto endOfLoop;
    }
    
    //loop for taylor series
    for (int i = 0; i < 5; i  ) {
        if (i == 0) { // first we add identical matrix when the power is 0
            sumMatrices(finalMatrixResult, identicalMatrix); // summarize between this 2 matrices
            finalMatrixResult = sumMatrixResult; //copy matrices
        }
        
        if (i == 1) { // we add the matrix itself because the power is 1
            sumMatrices(finalMatrixResult, inputMatrix);
            finalMatrixResult = sumMatrixResult; //copy matrices
        }
        if (i > 1 ) {
            powerMat(inputMatrix, i);
            if (matrixIsNilpotent) { // it means that A^i is 0 for some integer, so the series terminates after a finite number
                goto endOfLoop;
            }
            factorialValue = factorialCalculate(i); // calculate the factorial of i
            mulMatWithFactorial(factorialValue); // multiply (1/i) * matrix^i - like in the algorithm
            sumMatrices(finalMatrixResult, mulFactorialMatrixResult); // summarize it with the previous result
            finalMatrixResult = sumMatrixResult; //copy matrices
        }
    }
    
    endOfLoop:
    printResult(finalMatrixResult); // print the final result - e^M
    return 0;
}
    
//Summarize matrices
void sumMatrices(vector<vector<double> >& mat1, vector<vector<double> >& mat2) {
    for (int i = 0; i < M; i  )
        for (int j = 0; j < M; j  )
            sumMatrixResult[i][j] = mat1[i][j]   mat2[i][j];
}

//Print matrix
void printResult(vector<vector<double> >& matRes) {
    for (int i = 0; i < M; i  ) {
        for (int j = 0; j < M; j  ) {
            printf("%f ", matRes[i][j]);
            if (j == M - 1) {
                printf("\n");
            }
        }
    }
}


//Calculate the factorial of n
long factorialCalculate(int n) {
    long factorial = 1.0;
    for (int i = 1; i <= n;   i) {
        factorial *= i;
    }
    return factorial;
}

// mutiply the matrix with scalar
void mulMatWithFactorial(long factorialValue) {
    for (int i = 0; i < M; i  ) {
        for (int j = 0; j < M; j  ) {
            mulFactorialMatrixResult[i][j] = powerMatrixResult[i][j] * 1/factorialValue;
        }
    }
}

//initialize matrix
void initializeMatrix() {
    for (int i = 0; i < M; i  ) {
        for (int j = 0; j < M; j  ) {
            powerMatrixResult[i][j] = 0;
        }
    }
}

void checkIfTheMatrixIsDiagonal() {
    for (int i = 0; i < M; i  ) {
        for (int j = 0; j < M; j  ) {
            if (i == j)
            {
                if (inputMatrix[i][j] == 0) {
                    diagonaMatrixlFlag = 0;
                    goto endOfLoop;
                }

            }
            else
            {
                if (inputMatrix[i][j] != 0) {
                    diagonaMatrixlFlag = 0;
                    goto endOfLoop;
                }

            }
        }
    }
    endOfLoop:
    return;
}

void calculateExpoMatrixWithDiagonalMatrix() {
    for (int i = 0; i < M; i  ) {
        for (int j = 0; j < M; j  ) {
            if (i == j)
            {
                for (int k = 0; k < inputMatrix[i][j];   k)// loop to calculate the pow of e^alpha
                {
                    finalMatrixResult[i][j] *= e;
                }
            }
            else
            {
                finalMatrixResult[i][j] = 0;
            }
        }
    }
}

void readMatrixFromFile() {
    ifstream f("inv_matrix(1000x1000).txt");
    for (int i = 0; i < M; i  )
        for (int j = 0; j < M; j  ) {
            f >> inputMatrix[i][j];
            if (f.peek() == ',')
                f.ignore();
        }
    listOfMatrices.push_back(inputMatrix);
}

void initializeIdenticalMatrix() {
    for (int i = 0; i < M; i  ) {
        for (int k = 0; k < M; k  ) {
            if (i == k) {
                identicalMatrix[i][k] = 1;
            }
            else {
                identicalMatrix[i][k] = 0;
            }
        }
    }
}

void powerMat(vector<vector<double> >& mat, int powNum) {
    int counterForNilpotent = 0;

    initializeMatrix();
    auto start = high_resolution_clock::now();

    for (int i = 0; i < M; i  ) {
        for (int k = 0; k < M; k  ) {
            for (int j = 0; j < M; j  ) {
                powerMatrixResult[i][j]  = mat[i][k] * listOfMatrices[powNum-2][k][j];
            }
        }
    }
    auto stop = high_resolution_clock::now();
    auto duration = duration_cast<seconds>(stop - start);
    cout << duration.count() << " seconds" << endl; // checking run time

    listOfMatrices.push_back(powerMatrixResult);

    // check if after we we did A^i , the matrix is equal to 0
    for (int i = 0; i < M; i  ) {
        for (int j = 0; j < M; j  ) {
            if (powerMatrixResult[i][j] == 0) {
                counterForNilpotent  ;
            }
        }
    }

    if (counterForNilpotent == M * M) {
        matrixIsNilpotent = true;
    }
}

CodePudding user response：

Going through each element of an array of size "n" will have some computational efficiency of O(n^2), meaning for large arrays it will take a while but won't be "life-time-of-the-universe" lengths of time.

Usually to do operations on massive arrays like this, they're reduced in some form first so that the computation can be closer to O(n) or better using some truths about reduced forms of matrices.

So, a faster implementation for matrix multiplication would start with some rref() function upon both matrices and then only evaluating parts of those matrices that would have objects in the columns and rows.

Here are some great places to review/learn (for free) Linear Algebra:

"3b1b (2016): Essence of Linear Algebra" = https://www.youtube.com/watch?v=kjBOesZCoqc&list=PL0-GT3co4r2y2YErbmuJw2L5tW4Ew2O5B

"MIT OpenCourseWare (2009): Linear Algebra" = https://www.youtube.com/watch?v=ZK3O402wf1c&list=PL49CF3715CB9EF31D&index=1

CodePudding user response：

Use SSE2. It’s not a library. It’s a method to use cpu vector hardware.

You set up operations to run in parallel.

https://en.wikipedia.org/wiki/SSE2