Home > OS >  How to properly write an array of structs to a pipe in C
How to properly write an array of structs to a pipe in C

Time:04-27

I have a hard time figuring out how to pass an array of structs with strings in them through a pipe to a child process.
I created two demos to show my problem.
demo_int.c

#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/wait.h>

int main()
{
    pid_t child;
    int pfd[2];
    if (pipe(pfd) == -1)
    {
        exit(1);
    }

    child = fork();
    if (child < 0)
    {
        exit(1);
    }

    if (child == 0)
    {
        close(pfd[1]);
        int *arr = malloc(10 * sizeof(int));
        if (arr == NULL)
        {
            exit(1);
        }
        read(pfd[0], arr, 10 * sizeof(int));
        close(pfd[0]);
        printf("child process read:\n");
        for (int i = 0; i < 10;   i)
        {
            printf("%d\n", arr[i]);
        }
        free(arr);
        exit(0);
    }
    else
    {
        int *arr = malloc(10 * sizeof(int));
        if (arr == NULL)
        {
            exit(1);
        }
        for (int i = 0; i < 10;   i)
        {
            arr[i] = i;
        }

        printf("array to be written:\n");
        for (int i = 0; i < 10;   i)
        {
            printf("%d\n", arr[i]);
        }

        close(pfd[0]);
        write(pfd[1], arr, 10 * sizeof(int));
        close(pfd[1]);
        free(arr);
        printf("parent process done\n");
        wait(NULL);
    }
}

I created this, so I can be sure that the problem is not with the "dynamic array" part, but with the "structs" part, and maybe more specifically the "string in a struct" part.
This produces the expected result:

array to be written:
0
1
2
3
4
5
6
7
8
9
parent process done
child process read:
0
1
2
3
4
5
6
7
8
9

With valgrind reporting no errors or leaks.
However when I try the same with the problematic structs:

demo_person.c

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/wait.h>

typedef struct Person
{
    char *name;
    int age;
} Person;

int main()
{
    pid_t child;
    int pfd[2];
    if (pipe(pfd) == -1)
    {
        exit(1);
    }

    child = fork();
    if (child < 0)
    {
        exit(1);
    }

    if (child == 0)
    {
        close(pfd[1]);
        Person *arr = malloc(10 * sizeof(Person));
        if (arr == NULL)
        {
            exit(1);
        }
        read(pfd[0], arr, 10 * sizeof(Person));
        close(pfd[0]);
        printf("child process read:\n");
        for (int i = 0; i < 10;   i)
        {
            printf("%s %d\n", arr[i].name, arr[i].age);
        }

        for (int i = 0; i < 10;   i)
        {
            free(arr[i].name);
        }
        free(arr);
        exit(0);
    }
    else
    {
        Person *arr = malloc(10 * sizeof(Person));
        if (arr == NULL)
        {
            exit(1);
        }
        for (int i = 0; i < 10;   i)
        {
            char *name = malloc(8 * sizeof(char));
            if (name == NULL)
            {
                exit(1);
            }
            sprintf(name, "%s%d", "Person", i);
            arr[i].name = malloc(8 * sizeof(char));
            if (arr[i].name == NULL)
            {
                exit(1);
            }
            strcpy(arr[i].name, name);
            arr[i].age = i;
            free(name);
        }

        printf("array to be written:\n");
        for (int i = 0; i < 10;   i)
        {
            printf("%s %d\n", arr[i].name, arr[i].age);
        }

        close(pfd[0]);
        write(pfd[1], arr, 10 * sizeof(Person));
        close(pfd[1]);

        for (int i = 0; i < 10;   i)
        {
            free(arr[i].name);
        }
        free(arr);
        printf("parent process done\n");
        wait(NULL);
    }
}

The output is:

array to be written:
Person0 0
Person1 1
Person2 2
Person3 3
Person4 4
Person5 5
Person6 6
Person7 7
Person8 8
Person9 9
parent process done
child process read:
 0
 1
 2
 3
 4
 5
 6
 7
 8
 9
free(): invalid pointer

With valgrind reporting loads of errors (as expected after this output).
I found similiar looking questions, but none of the answers seemed to help.

EDIT:

Thanks to the answer I now understand that the problem is with the dynamically allocated string and only the mallocing process can access it, but the real program in which I encountered this problem has been populated (kind of) like this, as in it already uses these dinamically allocated strings.
Is there a way to pass the strings like this, or do I have to solve it somehow with new char[N] arrays?

CodePudding user response:

The memory you allocate with malloc and the pointer it returns are only valid in the process you do the call to malloc.

When you write the structure through the pipe you only write the (current process unique) pointer, not the memory it points to.

The quick and simple solution is to use an actual array instead:

typedef struct Person
{
    char name[10];
    int age;
} Person;

CodePudding user response:

What you've stumbled upon is commonly solved using what's known as "serialization," which allows you to reliably send and receive data over a wire (pipe, network socket, file, etc). A popular serialization format is JSON, for its wide support and easy readability, but there's nothing stopping you from creating your own serialization format, and just using that!

A common way to pack binary data reliably is to use a header-payload format, where the header contains information about what kind of data is in the payload, and also how long the payload is. From there, it's as simple as reading in a fixed size header, parsing it, then reading the payload on the receiving end.

Something like this may work for you:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

struct simple_header {
  char kind; /* I arbitrarily chose a char, which could use something like 's' for string, 'i' for int, etc. Commonly you will see an enum used here */
  int length; /* You could use a negative length to indicate errors of some kind, or just use a simple size_t */
};

struct simple_payload {
  unsigned char *data;
};

int serialize_string(int fd, const char *payload) {
  // Automatically find the size, for convenience
  size_t length = strlen(payload);

  // Set aside a header and populate it
  struct simple_header header;
  header.kind = 's';
  header.length = (int) length; // This could be checked better, but also just a simple example

  // Send the header over the wire, doing minimal error checking
  int ret = write(fd, &header, sizeof(header));
  if(ret < 0) return ret;

  // Send the payload
  ret = write(fd, payload, length);
  return ret;
}

int deserialize(int fd, struct simple_payload *destination) {
  struct simple_header received_header;

  int ret = read(fd, &received_header, sizeof(received_header));
  if(ret < 0) return ret;

  // This solution totally ignores endianness, which you will need to consider if sending and receiving on different computers

  // Always work with zeroed buffers when you can, leave room for NULL bytes
  destination->data = calloc(received_header.length   1, 1);
  
  ret = read(fd, destination->data, received_header.length);
  if(ret < 0) {
    free(destination->data);
    return ret;
  }

  switch(received_header.kind) {
  case 's':
    /* do something special for strings */
    ;
  default: 
    return -1; /* unsupported format */
  }


  return ret;

}

If this is anything more than a pet project, though, I'd recommend looking into serialization formats and their libraries (header-only will be easiest to integrate). With serialization, the devil really is in the details, the unhandled errors and endianness considerations can lead to data corruption, so if you value the data you're sending, please use a library! My included example does not cover:

  • when the header lies about payload length
  • payloads that exceed the length of whats in the header
  • Failed reads/writes, leading you to think you're reading a header when actually you're reading a payload
  • Error detection/correction (CRC, Reed-Solomon etc)
  • Struct alignment issues (packed vs unpacked)
  • Related