Longest Common Sequence of numbers of two arrays. How do I print the Longest Seq-CodePudding

Can someone suggest how I should move forward with printing the longest sequence after I have found the longest seq length?

for example here I have got the output as 3 but now I also want to print the corresponding sequence. I am really confused as to how to do that using dynamic programming method which i have implemented.

#include <stdio.h>

int max(int nmu1, int num2);
int findMaxLength(int a[], int b[], int n, int m);

int main() {
    int a[] = { 1, 2, 8, 2, 1 };
    int b[] = { 8, 2, 1, 4, 7 };

    int n = sizeof(a) / sizeof(a[0]);
    int m = sizeof(b) / sizeof(b[0]);

    printf("%d", findMaxLength(a, b, n, m));
}

int findMaxLength(int a[], int b[], int n, int m) {
    int dp[n   1][m   1];

    for (int i = 0; i <= n; i  )
        for (int j = 0; j <= m; j  )
            dp[i][j] = 0;

    for (int i = n - 1; i >= 0; i--) {
        for (int j = m - 1; j >= 0; j--) {
            if (a[i] == b[j])
                dp[i][j] = dp[i   1][j   1]   1;
        }
    }

    int maxm = 0;

    for (int i = 0; i < n; i  ) {
        for (int j = 0; j < m; j  ) {
            maxm = max(maxm, dp[i][j]);
        }
    }

    return maxm;
}

int max(int num1, int num2) {
    return (num1 > num2) ? num1 : num2;
}

CodePudding user response：

The inner loop for (int j = 0; j >= 0; j--) does nothing. You should instead write:

for (int j = m - 1; j >= 0; j--)

To display the actual substring, the function findMaxLength must return the offsets of the best match. You can pass the addresses of local variables to store these values.

Here is a modified version:

#include <stdio.h>
#include <stdlib.h>

int max(int num1, int num2) {
    return (num1 > num2) ? num1 : num2;
}

int findMaxLength(const int a[], const int b[], int n, int m, int *pmaxi, int *pmaxj) {
    //int dp[n   1][m   1];
    int (*dp)[m   1] = malloc(sizeof(*dp) * (n   1));

    for (int i = 0; i <= n; i  ) {
        for (int j = 0; j <= m; j  )
            dp[i][j] = 0;
    }
    for (int i = n - 1; i >= 0; i--) {
        for (int j = m - 1; j >= 0; j--) {
            if (a[i] == b[j])
                dp[i][j] = dp[i   1][j   1]   1;
        }
    }

    int maxi = 0, maxj = 0, maxm = 0;

    for (int i = 0; i < n; i  ) {
        for (int j = 0; j < m; j  ) {
            if (maxm < dp[i][j]) {
                maxm = dp[i][j];
                maxi = i;
                maxj = j;
            }
        }
    }

    free(dp);
    *pmaxi = maxi;
    *pmaxj = maxj;
    return maxm;
}

int main() {
    int a[] = { 1, 2, 8, 2, 1 };
    int b[] = { 8, 2, 1, 4, 7 };

    int na = sizeof(a) / sizeof(a[0]);
    int nb = sizeof(b) / sizeof(b[0]);
    int ma, mb, len;
    len = findMaxLength(a, b, na, nb, &ma, &mb);
    printf("longest substring length: %d\n", len);
    if (len > 0) {
        printf("a[%d..%d] == b[%d..%d] == [", ma, ma   len - 1, mb, mb   len - 1);
        for (int i = 0; i < len; i  ) {
            printf(" %d", a[ma   i]);
        }
        printf(" ]\n");
    }
    return 0;
}

Output:

longest substring length: 3
a[2..4] == b[0..2] == [ 8 2 1 ]

Note that findMaxLength uses at most 2 arrays at a time, so if you combine the 3 loops into one, you can reduce the size requirements from O(n.m) to O(m):

int findMaxLength(const int a[], const int b[], int n, int m, int *pmaxi, int *pmaxj) {
    int dp[2][m   1];
    int maxi = 0, maxj = 0, maxm = 0;

    for (int j = 0; j <= m; j  ) {
        dp[0][j] = 0;
    }
    dp[1][m] = 0;

    for (int i = n - 1; i >= 0; i--) {
        for (int j = m - 1; j >= 0; j--) {
            dp[1][j] = (a[i] == b[j]) * (dp[0][j   1]   1);
            if (maxm <= dp[1][j]) {
                maxm = dp[1][j];
                maxi = i;
                maxj = j;
            }
        }
        if (i-- == 0)
            break;
        for (int j = m - 1; j >= 0; j--) {
            dp[0][j] = (a[i] == b[j]) * (dp[1][j   1]   1);
            if (maxm <= dp[0][j]) {
                maxm = dp[0][j];
                maxi = i;
                maxj = j;
            }
        }
    }
    *pmaxi = maxi;
    *pmaxj = maxj;
    return maxm;
}

(a[i] == b[j]) * (dp[0][j 1] 1) computes the value of dp[1][j] with branchless code, which is more efficient than your if statement.

With this alternative, I get a 10x performance improvement for larger array sizes, such as 16K numbers, but still suffer O(n.m) time complexity. More advanced algorithms are required to reduce the complexity of this classic Longest common substring problem.