Problems with finding the Longest Common Sequence of numbers of two arrays-CodePudding

This my Dynamic Programming code in C, but I do not understand why I am getting an output of 1 in place of 3.

Also can someone suggest how I should move forward with printing the longest sequence after I have found the longest seq length?

#include <stdio.h>

int max(int nmu1, int num2);
int findMaxLength(int a[], int b[], int n, int m);

int main() {
    int a[] = { 1, 2, 8, 2, 1 };
    int b[] = { 8, 2, 1, 4, 7 };

    int n = sizeof(a) / sizeof(a[0]);
    int m = sizeof(b) / sizeof(b[0]);

    printf("%d", findMaxLength(a, b, n, m));
}

int findMaxLength(int a[], int b[], int n, int m) {
    int dp[n   1][m   1];

    for (int i = 0; i <= n; i  )
        for (int j = 0; j <= m; j  )
            dp[i][j] = 0;

    for (int i = n - 1; i >= 0; i--) {
        for (int j = 0; j >= 0; j--) {
            if (a[i] == b[j])
                dp[i][j] = dp[i   1][j   1]   1;
        }
    }

    int maxm = 0;

    for (int i = 0; i < n; i  ) {
        for (int j = 0; j < m; j  ) {
            maxm = max(maxm, dp[i][j]);
        }
    }

    return maxm;
}

int max(int num1, int num2) {
    return (num1 > num2) ? num1 : num2;
}

CodePudding user response：

The inner loop for (int j = 0; j >= 0; j--) does nothing. You should instead write:

for (int j = m - 1; j >= 0; j--)

To display the actual substring, the function findMaxLength must keep track of the offsets of the best match as it finds a new one. You cannot use maxm = max(maxm, dp[i][j]); for this, you must use an explicit test.

Here is a modified version:

#include <stdio.h>
#include <stdlib.h>

int findMaxLength(const int a[], const int b[], int n, int m) {
    int dp[n   1][m   1];

    for (int i = 0; i <= n; i  ) {
        for (int j = 0; j <= m; j  )
            dp[i][j] = 0;
    }
    for (int i = n - 1; i >= 0; i--) {
        for (int j = m - 1; j >= 0; j--) {
            if (a[i] == b[j])
                dp[i][j] = dp[i   1][j   1]   1;
        }
    }

    int maxi = 0, maxj = 0, maxm = 0;

    for (int i = 0; i < n; i  ) {
        for (int j = 0; j < m; j  ) {
            if (maxm < dp[i][j]) {
                maxm = dp[i][j];
                maxi = i;
                maxj = j;
            }
        }
    }

    if (maxm > 0) {
        printf("a[%d..%d] == b[%d..%d] == [ %d",
               maxi, maxi   maxm - 1, maxj, maxj   maxm - 1, a[maxi]);
        for (int i = 1; i < maxm; i  ) {
            printf(", %d", a[maxi   i]);
        }
        printf(" ]\n");
    }
    return maxm;
}

int main() {
    int a[] = { 1, 2, 8, 2, 1 };
    int b[] = { 8, 2, 1, 4, 7 };
    int na = sizeof(a) / sizeof(a[0]);
    int nb = sizeof(b) / sizeof(b[0]);
    int len = findMaxLength(a, b, na, nb);
    printf("%d\n", len);
    return 0;
}

Output:

a[2..4] == b[0..2] == [ 8, 2, 1 ]
3

Note that findMaxLength uses at most 2 arrays at a time, so if you combine the 3 loops into one, you can reduce the size requirements from O(n.m) to O(m):

int findMaxLength(const int a[], const int b[], int n, int m) {
    int dp[2][m   1];
    int maxi = 0, maxj = 0, maxm = 0;

    for (int j = 0; j <= m; j  ) {
        dp[0][j] = 0;
    }
    dp[1][m] = 0;

    for (int i = n - 1; i >= 0; i--) {
        for (int j = m - 1; j >= 0; j--) {
            dp[1][j] = (a[i] == b[j]) * (dp[0][j   1]   1);
            if (maxm <= dp[1][j]) {
                maxm = dp[1][j];
                maxi = i;
                maxj = j;
            }
        }
        if (i-- == 0)
            break;
        for (int j = m - 1; j >= 0; j--) {
            dp[0][j] = (a[i] == b[j]) * (dp[1][j   1]   1);
            if (maxm <= dp[0][j]) {
                maxm = dp[0][j];
                maxi = i;
                maxj = j;
            }
        }
    }
    if (maxm > 0) {
        printf("a[%d..%d] == b[%d..%d] == [", maxi, maxi   maxm - 1, maxj, maxj   maxm - 1);
        for (int i = 0; i < maxm; i  ) {
            printf(" %d", a[maxi   i]);
        }
        printf(" ]\n");
    }
    return maxm;
}

(a[i] == b[j]) * (dp[0][j 1] 1) computes the value of dp[1][j] with branchless code, which is more efficient than your if statement.

With this alternative, I get a 10x performance improvement for larger array sizes, such as 16K numbers, but still suffer O(n.m) time complexity. More advanced algorithms are required to reduce the complexity of this classic Longest common substring problem.