I am using shutils
to pack and unpack a Tensorflow model folder (I think this issue is more related to shutils
)
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense
def load_model_as_bytes(model):
def file_chunk_generate(file_path):
CHUNK_SIZE = 4 * 1024 * 1024
with open(file_path, 'rb') as f:
while True:
piece = f.read(CHUNK_SIZE);
if len(piece) == 0:
return
yield ByteChunk(buffer=piece)
return file_chunk_generator
tmpdir = tempfile.mkdtemp()
tf.saved_model.save(model, tmpdir)
zip_path = os.path.join(tmpdir, "tf_model")
shutil.make_archive(zip_path, 'zip', tmpdir)
size = os.path.getsize(f'{zip_path}.zip')
logging.info(f"send model file zip, length: {size}") #-------output 4621
file_chunk_generator = file_chunk_generate(f'{zip_path}.zip')
return file_chunk_generator
class NeuralNetworkPart(Model):
def __init__(self):
super().__init__()
self.d1 = Dense(128, activation='relu')
self.d2 = Dense(10)
def call(self, x):
x = x[0]
x = self.d1(x)
return self.d2(x)
model = NeuralNetworkPart()
it = load_model_as_bytes(model)
tmpdir = tempfile.mkdtemp()
zip_path = os.path.join(tmpdir, "tf_model.zip")
with open(zip_path, 'wb') as f:
for byte_chunk in it:
f.write(byte_chunk.buffer)
logging.info(f"receive model file zip, length: {os.path.getsize(zip_path)}") #-------output 4621
shutil.unpack_archive(zip_path, tmpdir)
Basically this program get a folder, using make_archive
to zip it. Then read the zip file as bytes, store it in a generator variable, and use the generator to write another zip file, and try to use unpack_archive
to unzip it.
Before the byte generator is written, and after the zip file is written before unpack, the size are the same (checked in the logging), however when calling unpack, it raise EOF error
shutil.unpack_archive(zip_path, tmpdir)
File "/lib/python3.6/shutil.py", line 983, in unpack_archive
func(filename, extract_dir, **kwargs)
File "/lib/python3.6/shutil.py", line 901, in _unpack_zipfile
data = zip.read(info.filename)
File "/lib/python3.6/zipfile.py", line 1338, in read
return fp.read()
File "/lib/python3.6/zipfile.py", line 858, in read
buf = self._read1(self.MAX_N)
File "/lib/python3.6/zipfile.py", line 940, in _read1
data = self._read2(n - len(data))
File "/lib/python3.6/zipfile.py", line 975, in _read2
raise EOFError
CodePudding user response:
This slightly simplified version seems to work just fine. Note that none of the temporary files are cleaned up by this; you might want to fix that before you have your tmpdir full of TensorFlow models.
import os
import shutil
import tempfile
def file_chunk_generate(file_path):
CHUNK_SIZE = 4 * 1024 * 1024
with open(file_path, "rb") as f:
while True:
piece = f.read(CHUNK_SIZE)
if not piece:
return
yield piece
def get_zip_chunk_generator(source_dir):
arcname = shutil.make_archive(
os.path.join(tempfile.mkdtemp("zip-"), "tf_model"), "zip", source_dir
)
return file_chunk_generate(arcname)
def make_source_dir():
tmpdir = tempfile.mkdtemp("src-")
for x in range(5):
with open(os.path.join(tmpdir, f"test-{x}.txt"), "wb") as f:
f.write(b"foo" * 1024)
return tmpdir
source_dir = make_source_dir()
it = get_zip_chunk_generator(source_dir)
dest_dir = tempfile.mkdtemp(prefix="dest-")
print("1", os.listdir(dest_dir))
zip_path = os.path.join(dest_dir, "tf_model_dest.zip")
with open(zip_path, "wb") as f:
for byte_chunk in it:
f.write(byte_chunk)
print("2", os.listdir(dest_dir))
shutil.unpack_archive(zip_path, dest_dir)
print("3", os.listdir(dest_dir))
The output is
1 []
2 ['tf_model_dest.zip']
3 ['test-0.txt', 'test-1.txt', 'test-3.txt', 'test-2.txt', 'tf_model_dest.zip', 'test-4.txt']
as you might expect.
I'd recommend using tarballs though if you're going to stream over the network (since you could indeed do that without any file on disk at all; ZIPs require seeking support to unpack, but TARs don't).