My goal here is to perform MergeSort on a dynamic array-like data structure I called a dictionary used to store strings and their relative weights. Sorry if the implementation is dumb, I'm a student and still learning.
Anyway, based on the segfaults I'm getting, I'm incorrectly allocating memory for my structs of type item to be copied over into the temporary lists I'm making. Not sure how to fix this. Code for mergesort and data structure setup is below, any help is appreciated.
/////// DICTIONARY METHODS ////////
typedef struct {
char *item;
int weight;
} item;
typedef struct {
item **wordlist;
//track size of dictionary
int size;
} dict;
//dict constructor
dict* Dict(int count){
//allocate space for dictionary
dict* D = malloc(sizeof(dict));
//allocate space for words
D->wordlist = malloc(sizeof(item*) * count);
//initial size
D->size = 0;
return D;
}
//word constructor
item* Item(char str[]){
//allocate memory for struct
item* W = malloc(sizeof(item));
//allocate memory for string
W->item = malloc(sizeof(char) * strlen(str));
W->weight = 0;
return W;
}
void merge(dict* D, int start, int middle, int stop){
//create ints to track lengths of left and right of array
int leftlen = middle - start 1;
int rightlen = stop - middle;
//create new temporary dicts to store the two sides of the array
dict* L = Dict(leftlen);
dict* R = Dict(rightlen);
int i, j, k;
//copy elements start through middle into left dict- this gives a segfault
for (int i = 0; i < leftlen; i ){
L->wordlist[i] = malloc(sizeof(item*));
L->wordlist[i] = D->wordlist[start i];
}
//copy elements middle through end into right dict- this gives a segfault
for (int j = 0; j < rightlen; j ){
R->wordlist[j] = malloc(sizeof(item*));
R->wordlist[j]= D->wordlist[middle 1 k];
}
i = 0;
j = 0;
k = leftlen;
while ((i < leftlen) && (j < rightlen)){
if (strcmp(L->wordlist[i]->item, R->wordlist[j]->item) <= 0) {
D->wordlist[k] = L->wordlist[i];
i ;
k ;
}
else{
D->wordlist[k] = R->wordlist[j];
j ;
k ;
}
}
while (i < leftlen){
D->wordlist[k] = L->wordlist[i];
i ;
k ;
}
while (j < rightlen){
D->wordlist[k] = L->wordlist[j];
j ;
k ;
}
}
void mergeSort(dict* D, int start, int stop){
if (start < stop) {
int middle = start (stop - start) / 2;
mergeSort(D, start, middle);
mergeSort(D, middle 1, stop);
merge(D, start, middle, stop);
}
I put print statements everywhere and narrowed it down to the mallocs in the section where I copy the dictionary to be sorted into 2 separate dictionaries. Also tried writing that malloc as malloc(sizeof(D->wordlist[start i])). Is there something else I need to do to be able to copy the item struct into the wordlist of the new struct?
Again, I'm new to this, so cut me some slack :)
CodePudding user response:
There are numerous errors in the code:
In
merge()
when copying elements to theR
list, the wrong (and uninitialized) index variablek
is being used instead ofj
.R->wordlist[j]= D->wordlist[middle 1 k];
should beR->wordlist[j]= D->wordlist[middle 1 j];
.In
merge()
before merging theL
andR
lists back toD
, the index variablek
for theD
list is being initialized to the wrong value.k = leftLen;
should bek = start;
.In
merge()
in the loop that should copy the remaining elements of the "right" list toD
, the elements are being copied from the "left" list instead of the "right" list.D->wordlist[k] = L->wordlist[j];
should beD->wordlist[k] = R->wordlist[j];
.In
Item()
, themalloc()
call is not reserving space for the null terminator at the end of the string.W->item = malloc(sizeof(char) * strlen(str));
should beW->item = malloc(sizeof(char) * (strlen(str) 1));
(and sincesizeof(char)
is 1 by definition it can be simplified toW->item = malloc(strlen(str) 1);
).Item()
is not copying the string to the allocated memory. Addstrcpy(W->item, str);
.There are memory leaks in
merge()
:L->wordlist[i] = malloc(sizeof(item*));
is not required and can be removed sinceL->wordlist[i]
is changed on the very next line:L->wordlist[i] = D->wordlist[start i];
.Similarly,
R->wordlist[j] = malloc(sizeof(item*));
is not required and can be removed sinceR->wordlist[j]
is changed on the very next line.L
andR
memory is created but never destroyed. Add these lines to the end ofmerge()
to free them:free(L->wordlist); free(L); free(R->wordlist); free(R);
None of the
malloc()
calls are checked for success.
CodePudding user response:
Allocate it all at once, before the merge sort even starts.
#include <stdlib.h>
#include <string.h>
// Weighted Word --------------------------------------------------------------
//
typedef struct {
char *word;
int weight;
} weighted_word;
// Create a weighted word
//
weighted_word* CreateWeightedWord(const char *str, int weight){
weighted_word* W = malloc(sizeof(weighted_word));
if (W){
W->word = malloc(strlen(str) 1); // string length nul terminator
if (W->word)
strcpy( W->word, str);
W->weight = weight;
}
return W;
}
// Free a weighted word
//
weighted_word *FreeWeightedWord(weighted_word *W){
if (W){
if (W->word)
free(W->word);
free(W);
}
return NULL;
}
// Dictionary (of Weighted Words) ---------------------------------------------
//
typedef struct {
weighted_word **wordlist; // this is a pointer to an array of (weighted_word *)s
int size; // current number of elements in use
int capacity; // maximum number of elements available to use
} dict;
// Create a dictionary with a fixed capacity
//
dict* CreateDict(int capacity){
dict* D = malloc(sizeof(dict));
if (D){
D->wordlist = malloc(sizeof(weighted_word*) * capacity);
D->size = 0;
D->capacity = capacity;
}
return D;
}
// Free a dictionary (and all weighted words)
//
dict *FreeDict(dict *D){
if (D){
for (int n = 0; n < D->size; n )
FreeWeightedWord(D->wordlist[n]);
free(D->wordlist);
free(D);
}
return NULL;
}
// Add a new weighted word to the end of our dictionary
//
void DictAddWord(dict *D, const char *str, int weight){
if (!D) return;
if (D->size == D->capacity) return;
D->wordlist[D->size] = CreateWeightedWord(str, weight);
if (D->wordlist[D->size])
D->size = 1;
}
// Merge Sort the Dictionary --------------------------------------------------
// Merge two partitions of sorted words
// words • the partitioned weighted word list
// start • beginning of left partition
// middle • end of left partition, beginning of right partition
// stop • end of right partition
// buffer • temporary work buffer, at least as big as (middle-start)
//
void MergeWeightedWords(weighted_word **words, int start, int middle, int stop, weighted_word **buffer){
int Lstart = start; int Rstart = middle; // Left partition
int Lstop = middle; int Rstop = stop; // Right partition
int Bindex = 0; // temporary work buffer output index
// while (left partition has elements) AND (right partition has elements)
while ((Lstart < Lstop) && (Rstart < Rstop)){
if (strcmp( words[Rstart]->word, words[Lstart]->word ) < 0)
buffer[Bindex ] = words[Rstart ];
else
buffer[Bindex ] = words[Lstart ];
}
// if (left partition has any remaining elements)
while (Lstart < Lstop)
buffer[Bindex ] = words[Lstart ];
// We don't actually need this. Think about it. Why not?
// // if (right partition has any remaining elements)
// while (Rstart < Rstop)
// buffer[Bindex ] = words[Rstart ];
// Copy merged data from temporary buffer back into source word list
for (int n = 0; n < Bindex; n )
words[start ] = buffer[n];
}
// Merge Sort an array of weighted words
// words • the array of (weighted_word*)s to sort
// start • index of first element to sort
// stop • index ONE PAST the last element to sort
// buffer • the temporary merge buffer, at least as big as (stop-start 1)/2
//
void MergeSortWeightedWords(weighted_word **words, int start, int stop, weighted_word **buffer){
if (start < stop-1){ // -1 because a singleton array is by definition sorted
int middle = start (stop - start) / 2;
MergeSortWeightedWords(words, start, middle, buffer);
MergeSortWeightedWords(words, middle, stop, buffer);
MergeWeightedWords(words, start, middle, stop, buffer);
}
}
// Merge Sort a Dictionary
//
void MergeSortDict(dict *D){
if (D){
// We only need to allocate a single temporary work buffer, just once, right here.
dict * Temp = CreateDict(D->size);
if (Temp){
MergeSortWeightedWords(D->wordlist, 0, D->size, Temp->wordlist);
}
FreeDict(Temp);
}
}
// Main program ---------------------------------------------------------------
#include <stdio.h>
int main(int argc, char **argv){
// Command-line arguments --> dictionary
dict *a_dict = CreateDict(argc-1);
for (int n = 1; n < argc; n )
DictAddWord(a_dict, argv[n], 0);
// Sort the dictionary
MergeSortDict(a_dict);
// Print the weighted words
for (int n = 0; n < a_dict->size; n )
printf( "%d %s\n", a_dict->wordlist[n]->weight, a_dict->wordlist[n]->word );
// Clean up
FreeDict(a_dict);
}
Notes for you:
Be consistent. You were inconsistent with capitalization and
*
placement and, oddly, vertical spacing. (You are waaay better than most beginners, though.) I personally hate the Egyptian brace style, but to each his own.I personally think there are far too many levels of
malloc()
s in this code too, but I will leave it at this one comment. It works as is.Strings must be nul-terminated — that is, each string takes
strlen()
characters plus one for a'\0'
character. There is a convenient library function that can copy a string for you too, calledstrdup()
, which AFAIK exists on every system.Always check that
malloc()
and friends succeed.Don’t forget to free everything you allocate. Functions help.
“Item” was a terribly non-descript name, and it overlapped with the meaning of two different things in your code. I renamed them to separate things.
Your dictionary object should be expected to keep track of how many elements it can support. The above code simply refuses to add words after the capacity is filled, but you could easily make it
realloc()
a larger capacity if the need arises. The point is to prevent invalid array accesses by adding too many elements to a fixed-size array.Printing the array could probably go in a function.
Notice how I set
start
as inclusive andstop
as exclusive. This is a very C (and C ) way of looking at things, and it is a good one. It will help you with all kinds of algorithms.Notice also how I split the Merge Sort up into two functions: one that takes a dictionary as argument, and a lower-level one that takes an array of the weighted words as argument that does all the work.
- The higher-level merge sort a dictionary allocates all the temporary buffer the merge algorithm needs, just once.
- The lower-level merge sort an array of (
weighted_word*
)s expects that temporary buffer to exist and doesn’t care (or know anything) about the dictionary object. - The merge algorithm likewise doesn't know much. It is simply given all the information it needs.
Right now the merge condition simply compares the weighted-word’s string value. But it doesn’t have to be so simple. For example, you could sort equal elements by weight. Create a function:
int CompareWeightedWords(const weighted_word *a, const weighted_word *b){
int rel = strcmp( a->word, b->word );
if (rel < 0) return -1;
if (rel > 0) return 1;
return a->weight < b->weight ? -1 : a->weight > b->weight;
}
And put it to use in the merge function:
if (CompareWeightedWords( words[Rstart], words[Lstart] ) < 0)
buffer[Bindex ] = words[Rstart ];
else
buffer[Bindex ] = words[Lstart ];
I don’t think I forgot anything.