Home > Back-end >  memcpy() copies some garbage characters in the destination string
memcpy() copies some garbage characters in the destination string

Time:02-20

I am trying to create a random string generator with following code.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// Random string generator
void rand_str(char *dest, size_t length) {
    char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
    int charset_length = 62;

    while (length-- > 0) {
        size_t index = rand() % charset_length;
        *dest   = charset[index];
    }
    *dest = '\0';
}

int main ()
{
    int num_data = 5;
    int string_length;
    int max_string_length = 10;

    char data[num_data][string_length];
    int i = 0;
    while (i < num_data)
    {
        string_length = 3   (rand() % max_string_length);
        char str[string_length];
        rand_str(str, string_length);
        short increment_avoider_flag = 0;
        for (int j = 0; j < i; j  )
        {
            if (!strcmp(data[j], str))
            {
                string_length = 3   (rand() % max_string_length);
                char str[string_length];
                rand_str(str, string_length);
                increment_avoider_flag = -1;
                break;
            }
        }
        if (!increment_avoider_flag)
        {
            memcpy(data[i], str, sizeof(str));
            printf("%s\n", str);
            printf("%s\n\n\n", data[i]);
            i  ;
        }
    }
    
}

The output to mentioned code is

pn2QMwQbLq
pn2QMwQbLq~??


WqJ99NSq
WqJ99NSqLq~??


LDvi5z
LDvi5zSqLq~??


gxBewrk5rHr
gxBewrk5rHr??


DcDg
DcDgwrk5rHr??


There are two problems with the output here.

  1. If 1st string created has length x which is greater than later string lengths, memcpy copies residue of previous string too. e.g. First string is pn2QMwQbLq and second string is WqJ99NSq but the copied string is WqJ99NSqLq~?? which has additional Lq from first string.
  2. The copied stings has some garbage characters in them. e.g. First original string pn2QMwQbLq but copied string pn2QMwQbLq~?? has additional ~??.

I am not sure what is happening here but seems like I am declaring or copying the character arrays incorrectly. Please help me figure out this problem.

CodePudding user response:

There are multiple problems in your code:

  • defining char data[num_data][string_length]; has undefined behavior because string_length is uninitialized. You should use char data[num_data][max_string_length 1]; to allow for the null terminator.

  • 3 (rand() % max_string_length) produces a pseudo-random integer in the range 3 to max_string_length 2, which seems incorrect. You should use 3 (rand() % (max_string_length - 2) to get the range 3 to max_string_length.

  • char str[string_length]; defines the array str one byte too short for a string of string_length characters.

  • if there is a collision, there is no need to generate an new random string, especially inside a new local array. Just set the indicator and break from the loop.

  • you can remove the need for the indicator by defining j outside the for loop and using i == j to check that there are no duplicates.

  • you should seed the random number generator to avoid generating the same strings at every run. Use srand(time(NULL));

There were multiple occurrences of undefined behavior that explain the unexpected results.

Here is a modified version:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

// Random string generator
void rand_str(char *dest, size_t length) {
    static const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
    int charset_length = sizeof(charset) - 1;

    while (length-- > 0) {
        size_t index = rand() % charset_length;
        *dest   = charset[index];
    }
    *dest = '\0';
}

int main() {
    int num_data = 5;
    int max_string_length = 10;
    char data[num_data][max_string_length   1];
    int i = 0, j;
    srand(time(NULL));
    while (i < num_data) {
        int string_length = 3   (rand() % (max_string_length - 2));
        char str[string_length   1];
        rand_str(str, string_length);
        for (j = 0; j < i; j  ) {
            if (!strcmp(data[j], str))
                break;
        }
        if (j == i) {
            strcpy(data[i], str);
            printf("%s\n", str);
            i  ;
        }
    }
    printf("\n");
    for (i = 0; i < num_data; i  ) {
        printf("%s\n", data[i]);
    }
    return 0;
}

CodePudding user response:

Since the string length changes everytime, so you need to define it dynamically, below are the 2 changes to the code.

int main ()
{
int num_data = 5;
int string_length;
int max_string_length = 10;
//char data[num_data][string_length];
char **data = (char **)malloc(num_data * sizeof(char *));   //change1
int i = 0;
while (i < num_data)
{
    string_length = 3   (rand() % max_string_length);
    data[i] = (char*)malloc(string_length * sizeof(char));  //change2
    char str[string_length];
    rand_str(str, string_length);
    short increment_avoider_flag = 0;

CodePudding user response:

If you want to generate random strings without any memory leaks.

The following code implements something of a map structure with time complexity of O(n), where n is the length of the string. This is not a good implementation of map, however creating a map in C would be a heavy task.

Pros:

  • no heap memory allocation
  • no repeating character in the final random string

Cons:

  • not a good implementation of map, as time complexity should be O(log(n))

Here's try it online

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

const char *charset = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
void random_str(char *str, size_t len);
void random_str(char *str, size_t len)
{
    if (len == 0)
    {
        fprintf(stderr, "could not generate 0 length string\n");
        exit(1);
    }
    for (size_t i = 0; i < len; i  )
    {
        str[i] = charset[(rand() % 62)];
    }
    str[len] = 0;
}

#define MAX_LEN 10

int main(void)
{
    char rdata[MAX_LEN][MAX_LEN   1] = {0};
    srand(time(NULL));
    for (size_t i = 0; i < 10;)
    {
        int flag = 1;
        random_str(rdata[i], 10);
        for (size_t j = 0; j < i; j  )
            if (strcmp(rdata[i], rdata[j]) == 0)
                flag = 0;
        if (flag == 1)
        {
            printf("%s\n", rdata[i]);
            i  ;
        }
    }
    return 0;
}
  • Related