Home > Blockchain >  Unable to find the cause of segmentation fault of huffman coding to do entropy encoding and decoding
Unable to find the cause of segmentation fault of huffman coding to do entropy encoding and decoding

Time:01-27

I have been looking at the code to do entropy encoding and decoding with of huffman coding in c, and I cannot seem to find the reason why I keep getting segmentation fault.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

const int MAX_TREE_HT = 100;

struct MinHeapNode {
    char data;
    unsigned freq;
    struct MinHeapNode* left, * right;
};

struct MinHeap {
    unsigned size;
    unsigned capacity;
    struct MinHeapNode** array;
};

struct MinHeapNode* newNode(char data, unsigned freq) {
    struct MinHeapNode* temp = (struct MinHeapNode*)malloc(sizeof(struct MinHeapNode));
    temp->left = temp->right = NULL;
    temp->data = data;
    temp->freq = freq;
    return temp;
}

struct MinHeap* createMinHeap(unsigned capacity) {
    struct MinHeap* minHeap = (struct MinHeap*)malloc(sizeof(struct MinHeap));
    minHeap->size = 0;
    minHeap->capacity = capacity;
    minHeap->array = (struct MinHeapNode**)malloc(minHeap->capacity * sizeof(struct MinHeapNode*));
    return minHeap;
}

void swapMinHeapNode(struct MinHeapNode** a, struct MinHeapNode** b) {
    struct MinHeapNode* t = *a;
    *a = *b;
    *b = t;
}

void minHeapify(struct MinHeap* minHeap, int idx) {
    int smallest = idx;
    int left = 2 * idx   1;
    int right = 2 * idx   2;

    if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq)
        smallest = left;

    if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq)
        smallest = right;

    if (smallest != idx) {
        swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]);
        minHeapify(minHeap, smallest);
    }
}

int isSizeOne(struct MinHeap* minHeap) {
    return (minHeap->size == 1);
}

struct MinHeapNode* extractMin(struct MinHeap* minHeap) {
    struct MinHeapNode* temp = minHeap->array[0];
    minHeap->array[0] = minHeap->array[minHeap->size - 1];
    --minHeap->size;
    minHeapify(minHeap, 0);
    return temp;
}

void insertMinHeap(struct MinHeap* minHeap, struct MinHeapNode* minHeapNode) {
      minHeap->size;
    int i = minHeap->size - 1;
    minHeap->array[i] = minHeapNode;
    while (i && minHeap->array[i]->freq < minHeap->array[(i - 1) / 2]->freq) {
        swapMinHeapNode(&minHeap->array[i], &minHeap->array[(i - 1) / 2]);
        i = (i - 1) / 2;
    }
}

void buildMinHeap(struct MinHeap* minHeap) {
    int n = minHeap->size - 1;
    int i;
    for (i = (n - 1) / 2; i >= 0; --i)
        minHeapify(minHeap, i);
}

void printArr(int arr[], int n) {
    int i;
    for (i = 0; i < n;   i)
        printf("%d", arr[i]);
    printf("\n");
}

int isLeaf(struct MinHeapNode* root) {
    return !(root->left) && !(root->right);
}

struct MinHeap* createAndBuildMinHeap(char data[], int freq[], int size) {
    struct MinHeap* minHeap = createMinHeap(size);
    int i;
    for (i = 0; i < size;   i)
        minHeap->array[i] = newNode(data[i], freq[i]);
    minHeap->size = size;
    buildMinHeap(minHeap);
    return minHeap;
}

struct MinHeapNode* buildHuffmanTree(char data[], int freq[], int size) {
    struct MinHeapNode *left, *right, *top;
    struct MinHeap* minHeap = createAndBuildMinHeap(data, freq, size);
    while (!isSizeOne(minHeap)) {
        left = extractMin(minHeap);
        right = extractMin(minHeap);
        top = newNode('$', left->freq   right->freq);
        top->left = left;
        top->right = right;
        insertMinHeap(minHeap, top);
    }
    return extractMin(minHeap);
}

void printCodes(struct MinHeapNode* root, int arr[], int top) {
    if (root->left) {
        arr[top] = 0;
        printCodes(root->left, arr, top   1);
    }
    if (root->right) {
        arr[top] = 1;
        printCodes(root->right, arr, top   1);
    }
    if (isLeaf(root)) {
        printf("%c: ", root->data);
        printArr(arr, top);
    }
}

void HuffmanCodes(char data[], int freq[], int size) {
    struct MinHeapNode* root = buildHuffmanTree(data, freq, size);
    int arr[MAX_TREE_HT], top = 0;
    printCodes(root, arr, top);
}

void encode(char* str, char* encoded_str, struct MinHeapNode* root) {
    if (root == NULL)
        return;
    if (isLeaf(root)) {
        while (*str != root->data)
            str  ;
        while (*str == root->data) {
            *encoded_str = '1';
            encoded_str  ;
            str  ;
        }
        return encode(str, encoded_str, root);
    }
    if (*str == root->data) {
        *encoded_str = '0';
        encoded_str  ;

        encode(str   1, encoded_str, root->left);
    } else {
        encode(str, encoded_str, root->right);
    }
}

void decode(char* encoded_str, struct MinHeapNode* root, char* decoded_str) {
    struct MinHeapNode* curr = root;
    while (*encoded_str) {
        if (*encoded_str == '0')
            curr = curr->left;
        else
            curr = curr->right;
        if (isLeaf(curr)) {
            *decoded_str = curr->data;
            decoded_str  ;
            curr = root;
        }
        encoded_str  ;
    }
}

int main() {
    char str[] = "this is a test";
    int freq[256] = { 0 };
    int size = 0, i;
    for (i = 0; str[i]; i  ) {
        freq[str[i]]  ;
        size  ;
    }
    int unique = 0;
    char data[size];
    for (i = 0; i < 256; i  ) {
        if (freq[i]) {
            data[unique] = (char)i;
            unique  ;
        }
    }
    HuffmanCodes(data, freq, unique);
    char encoded_str[100], decoded_str[100];
    struct MinHeapNode* root = buildHuffmanTree(data, freq, unique);
    encode(str, encoded_str, root);
    printf("Encoded string: %s\n", encoded_str);
    decode(encoded_str, root, decoded_str);
    printf("Decoded string: %s\n", decoded_str);
    return 0;
}

With the several try and error with commenting out section by section, it was found that it's caused somewhere inside the encoding function. This is not the code I wrote myself and is written with some references online. And the output should be the following.

 : 00
a: 010
e: 011
h: 100
i: 1010
s: 1011
t: 110
Encoded string: 0101110101011101110111111001101
Decoded string: this is a test

Instead I get the following.

a: 0
e: 10
h: 110
i: 1110
s: 11110
 : 111110
t: 111111
zsh: segmentation fault

Thanks in advance for your help.

CodePudding user response:

void encode(char* str, char* encoded_str, struct MinHeapNode* root) {
if (root == NULL)
    return;
if (isLeaf(root)) {
    while (*str != root->data)
    {
        printf("str[]=%c", *str); // Here you can see that you have an overflow *******
        str  ;
    }
    while (*str == root->data) {
        *encoded_str = '1';
        encoded_str  ;
        str  ;
    }
    return encode(str, encoded_str, root);
}
if (*str == root->data) {
    *encoded_str = '0';
    encoded_str  ;

    encode(str   1, encoded_str, root->left);
} else {
    encode(str, encoded_str, root->right);
}

}

Just add a condition in your while loop to make sure you don't overflow your string:

while (*str  && *str != root->data) // HERE and in all other loops
{
    str  ;
}

BUT ! first you need to fix the fact that : char encoded_str[100], decoded_str[100]; are empty and not null terminated so you can go through them in a loop :

encoded_str[end_index] = '\0';
while (*str ....) // it will stop at the '/0' in case you dont find *str == root->data.

the problem in your encode function is that you never go out of the recursion. hope this will help you :).

CodePudding user response:

Also your freq[str[i]] ; needs to be freq[str[i] & 0xff] ;. char is signed, so you will be trying to access negative index elements of freq for bytes greater than 127.

  • Related