Home > OS >  C program to count repeated words in a cstring
C program to count repeated words in a cstring

Time:07-14

I've been working on a C program, I've made the logic but I'm unable to execute it. The question is:

Task: Write a program, using functions only, with the following features.

  1. Program reads paragraph(s) from the file and stores in a string.
  2. Then program counts the occurrence of each word in the paragraph(s) and stores all words with their number of occurrences.
  3. If that word has appeared more than one time in whole string, it should store the word only once along its total occurrences.
  4. The output described in above (in part 3) must be stored in a new file. Sample input: is the is and the is and the and is and only that is

Sample output:

is 5

the 3

and 4

only 1

that 1

I'll cut short to Occurrence program that I've written, My logic is to store token into character array and then compare that array with main character array and do the increment:

 void occurances() {
    char* string = getInputFromFile();
    char separators[] = ",.\n\t ";
    char* token;
    char* nextToken;
    char* temp[100];
    token = strtok_s(string, separators, &nextToken);
    cout << temp;
    int counter = 0;
    int i = 0;
    while ((token != NULL)) {
        temp[i] = token;
        i  ;
        for (int i = 0; i < strlen(string); i  ) {
            for (int j = 0; j < 100; j  ) {
                if ((strcmp(token, *temp)) == 0) {
                    counter  ;
                }
            }
            cout << temp << " : " << counter << endl;
        }
        if (token != NULL) {
            token = strtok_s(NULL, separators, &nextToken);
        }
    }
}

This code is preposterous I know that, But please anyone be kind enough to give me a clue, actually I'm new to C . Thank you

CodePudding user response:

If you store token into array this array should grow dynamically because the number of tokens is not known at the beginning. And according to the task description, you cannot use C standard containers, so, it is necessary to implement dynamic array manually, for example:

#include <iostream>

std::size_t increase_capacity_value(std::size_t capacity) {
    if (capacity == 0) {
        return 1;
    }
    else if (capacity < (SIZE_MAX / 2)) {
        return capacity * 2;
    }
    return SIZE_MAX;
}

bool increase_array_capacity(char**& tokens_array, std::size_t*& tokens_count, std::size_t& capacity) {
    const std::size_t new_capacity = increase_capacity_value(capacity);
    if (new_capacity <= capacity) {
        return false;
    }

    const std::size_t tokens_array_byte_size = new_capacity * sizeof(char*);
    char** const new_tokens_array = static_cast<char**>(std::realloc(tokens_array, tokens_array_byte_size));
    if (new_tokens_array == nullptr) {
        return false;
    }

    tokens_array = new_tokens_array;

    const std::size_t tokens_count_byte_size = new_capacity * sizeof(std::size_t);
    std::size_t* const new_tokens_count = static_cast<std::size_t*>(std::realloc(tokens_count, tokens_count_byte_size));
    if (new_tokens_count == nullptr) {
        return false;
    }

    tokens_count = new_tokens_count;
    capacity = new_capacity;
    return true;
}

bool add_token(char* token, char**& tokens_array, std::size_t*& tokens_count, std::size_t& array_size, std::size_t& array_capacity) {
    if (array_size == array_capacity) {
        if (!increase_array_capacity(tokens_array, tokens_count, array_capacity)) {
            return false;
        }
    }

    tokens_array[array_size] = token;
    tokens_count[array_size] = 1;
      array_size;

    return true;
}

std::size_t* get_token_count_storage(char* token, char** tokens_array, std::size_t* tokens_count, std::size_t array_size) {
    for (std::size_t i = 0; i < array_size;   i) {
        if (std::strcmp(token, tokens_array[i]) == 0) {
            return tokens_count   i;
        }
    }
    return nullptr;
}

bool process_token(char* token, char**& tokens_array, std::size_t*& tokens_count, std::size_t& array_size, std::size_t& array_capacity) {
    std::size_t* token_count_ptr = get_token_count_storage(token, tokens_array, tokens_count, array_size);
    if (token_count_ptr == nullptr) {
        if (!add_token(token, tokens_array, tokens_count, array_size, array_capacity)) {
            return false;
        }
    }
    else {
          (*token_count_ptr);
    }
    return true;
}

int main() {
    char string[] = "is the is and the is and the and is and only that is";
    char separators[] = ",.\n\t ";

    std::size_t token_array_capacity = 0;
    std::size_t token_array_size = 0;
    char** tokens_array = nullptr;
    std::size_t* tokens_count = nullptr;

    char* current_token = std::strtok(string, separators);
    while (current_token != nullptr) {
        if (!process_token(current_token, tokens_array, tokens_count, token_array_size, token_array_capacity)) {
            break;
        }
        current_token = std::strtok(nullptr, separators);
    }

    // print the report only if all tokens were processed
    if (current_token == nullptr) {
        for (std::size_t i = 0; i < token_array_size;   i) {
            std::cout << tokens_array[i] << " : " << tokens_count[i] << std::endl;
        }
    }

    std::free(tokens_array);
    std::free(tokens_count);
}

godbolt.org

CodePudding user response:

okay what if i want to store any token once, in an array and then replace it with new word while deleting duplicates in character array

It is also possible solution. But in general case, it is also necessary to allocate the memory dynamically for the current token. Because the lengths of tokens are also not known at the beginning:

void replace_chars(char* str, const char* chars_to_replace) {
    while (str && *str != '\0') {
        str = std::strpbrk(str, chars_to_replace);
        if (str == nullptr) {
            break;
        }

        const std::size_t number_of_delimiters = std::strspn(str, chars_to_replace);
        for (std::size_t i = 0; i < number_of_delimiters;   i) {
            str[i] = '\0';
        }

        str  = number_of_delimiters;
    }
}

bool keep_token(char*& token_storage, const char* new_token) {
    if (new_token == nullptr) {
        return false;
    }

    const std::size_t current_token_len = token_storage ? std::strlen(token_storage) : 0;
    const std::size_t requried_token_len = std::strlen(new_token);

    if (token_storage == nullptr || current_token_len < requried_token_len) {
        token_storage =
            static_cast<char*>(std::realloc(token_storage, (requried_token_len   1) * sizeof(char)));
        if (token_storage == nullptr) {
            return false;
        }
    }

    std::strcpy(token_storage, new_token);
    return true;
}

std::size_t count_tokens_and_replace(char* str, std::size_t str_len, const char* token) {
    std::size_t number_of_tokens = 0;
    std::size_t i = 0;

    while (i < str_len) {
        while (str[i] == '\0')   i;

        if (std::strcmp(str   i, token) == 0) {
            replace_chars(str   i, token);
              number_of_tokens;
        }

        i  = std::strlen(str   i);
    }

    return number_of_tokens;
}

int main() {
    char string[] = "is the is and the is and the and is and only that is";
    char separators[] = ",.\n\t ";

    const std::size_t string_len = std::strlen(string);
    replace_chars(string, separators);

    std::size_t i = 0;
    char* token = nullptr;

    while (true) {
        while (i < string_len && string[i] == '\0')   i;

        if (i == string_len || !keep_token(token, string   i)) break;

        std::cout << token << " : " << count_tokens_and_replace(string   i, string_len - i, token) << std::endl;
    }

    std::free(token);
}

godbolt.org

But if it is known that the token length cannot be greater than N, it is possible to use the static array of chars to keep the current token. And it will allow to remove dynamic memory allocation from the code.

  • Related