Python equivalent of C struct for writing bytes to a file-CodePudding

What could be the simplest Python equivalent to the following C code?

#include <stdio.h>

int main(void) {
    struct dog {
        char breed[16];
        char name[16];
    };
    struct person {
        char name[16];
        int age;
        struct dog pets[2];
    };
    struct person p = {
        "John Doe", 20, {{"Lab", "Foo"}, {"Pug", "Bar"}}
    };
    FILE *fp = fopen("data_from_c.txt", "w");
    fwrite(&p, sizeof(p), 1, fp);
    fclose(fp);
    return 0;
}

My main goal here is to write the data to the file as contiguous bytes:

$ xxd data_from_c.txt
00000000: 4a6f 686e 2044 6f65 0000 0000 0000 0000  John Doe........
00000010: 1400 0000 4c61 6200 0000 0000 0000 0000  ....Lab.........
00000020: 0000 0000 466f 6f00 0000 0000 0000 0000  ....Foo.........
00000030: 0000 0000 5075 6700 0000 0000 0000 0000  ....Pug.........
00000040: 0000 0000 4261 7200 0000 0000 0000 0000  ....Bar.........
00000050: 0000 0000                                ....

So far, I have tried using namedtuples and the struct module for packing the Python values:

from collections import namedtuple
import struct

dog = namedtuple('dog', 'breed name')
person = namedtuple('person', 'name age pets')
p = person(
    name=b'John Doe',
    age=22,
    pets=(dog(breed=b'Lab', name=b'Foo'), dog(breed=b'Pug', name=b'Bar'))
)

with open('data_from_python.txt', 'wb') as f:
    b = struct.pack('<16s i 16s 16s 16s 16s', *p)
    f.write(b)

However, the *p unpacking does not unpack the iterable recursively. Is there a way for doing this properly?

If there is an alternative to doing this that doesn't involve using struct or namedtuple, that would be welcome too.

CodePudding user response：

I would be tempted to make Person and Dog dataclasses and add methods to those dataclasses that take care of packing and unpacking the data to bytes.

As an example:

from dataclasses import dataclass, field
from pathlib import Path
import struct


@dataclass
class Dog:
    breed: str = ''
    name: str = ''
    _fmt: str = field(init=False, repr=False, default='<16s16s')

    def to_bytes(self):
        return struct.pack(self._fmt, self.breed.encode(), self.name.encode())

    def from_bytes(self, bin_data):
        breed, name = struct.unpack(self._fmt, bin_data)
        self.breed = breed.rstrip(b'\x00').decode()
        self.name = name.rstrip(b'\x00').decode()
        return self


@dataclass
class Person:
    name: str = ''
    age: int = 0
    pets: list[Dog] = field(default_factory=list)
    _fmt: str = field(init=False, repr=False, default='<16si')

    def to_bytes(self):
        person_bytes = bytearray()
        person_bytes.extend(struct.pack(self._fmt, self.name.encode(), self.age))
        for pet in self.pets:
            person_bytes.extend(pet.to_bytes())
        return bytes(person_bytes)

    @staticmethod
    def _split_pets(seq, size):
        return (seq[pos:pos   size] for pos in range(0, len(seq), size))

    def from_bytes(self, bin_data: bytes):
        header_size = struct.calcsize(self._fmt)
        dog_size = struct.calcsize(Dog._fmt)
        name, self.age = struct.unpack(self._fmt, bin_data[:header_size])
        self.name = name.rstrip(b'\x00').decode()
        pets_bytes = bin_data[header_size:]

        for pet_data in self._split_pets(pets_bytes, dog_size):
            self.pets.append(Dog().from_bytes(pet_data))
        return self


def main():
    file_loc = Path('/tmp/data_from_python.txt')

    person = Person(
        name='John Doe',
        age=20,
        pets=[
            Dog(breed='Lab', name='Foo'),
            Dog('Pug', 'Bar')
        ])
    file_loc.write_bytes(person.to_bytes())

    # Test recreation of person from file
    new_person = Person().from_bytes(file_loc.read_bytes())
    print("Person from file\n", new_person)


if __name__ == '__main__':
    main()

Which gave the transcript:

Person from file
 Person(name='John Doe', age=20, pets=[Dog(breed='Lab', name='Foo'), Dog(breed='Pug', name='Bar')])

And the file on disk looked to match that in the question:

$ xxd /tmp/data_from_python.txt 
00000000: 4a6f 686e 2044 6f65 0000 0000 0000 0000  John Doe........
00000010: 1400 0000 4c61 6200 0000 0000 0000 0000  ....Lab.........
00000020: 0000 0000 466f 6f00 0000 0000 0000 0000  ....Foo.........
00000030: 0000 0000 5075 6700 0000 0000 0000 0000  ....Pug.........
00000040: 0000 0000 4261 7200 0000 0000 0000 0000  ....Bar.........
00000050: 0000 0000                                ....