I have made a simple app that encrypts and decrypts files. but when i load a large file like a 2gb, my program uses 100% of the memory. I use multiprocessing and multi threading.
poolSize = min(cpu_count(), len(fileList))
process_pool = Pool(poolSize)
thread_pool = ThreadPool(len(fileList))
lock = Lock()
worker = partial(encfile, process_pool, lock)
thread_pool.map(worker, fileList)
def encfile(process_pool, lock, file):
with open(file, 'rb') as original_file:
original = original_file.read()
encrypted = process_pool.apply(encryptfn, args=(key, original,))
with open (file, 'wb') as encrypted_file:
encrypted_file.write(encrypted)
CodePudding user response:
This is my general idea:
Since memory is a problem, you have to read the files in smaller chunks, say 64K pieces and encrypt each 64K block and write those out. Of course, the encrypted block will have a length other than 64K so the problem becomes how to decrypt. So each encrypted block must be prefixed with a fixed-length header that is nothing more than the length of the following encrypted block encoded as a 4-byte unsigned integer (which should be way more than adequate). The decryption algorithm loop first reads the next 4-byte length and then know from that how many bytes long is the encrypted block that follows.
By the way, there is no need to pass to encfile
a lock
if you are not using it to, for example, count files processed.
from tempfile import mkstemp
from os import fdopen, replace
BLOCKSIZE = 64 * 1024
ENCRYPTED_HEADER_LENGTH = 4
def encfile(process_pool, lock, file):
"""
Encrypt file in place.
"""
fd, path = mkstemp() # make a temporary file
with open(file, 'rb') as original_file, \
fdopen (fd, 'wb') as encrypted_file:
while True:
original = original_file.read(BLOCKSIZE)
if not original:
break
encrypted = process_pool.apply(encryptfn, args=(key, original))
l = len(encrypted)
l_bytes = l.to_bytes(ENCRYPTED_HEADER_LENGTH, 'big')
encrypted_file.write(l_bytes)
encrypted_file.write(encrypted)
replace(path, file)
def decfile(file):
"""
Decrypt files in place.
"""
fd, path = mkstemp() # make a temporary file
with open(file, 'rb') as encrypted_file, \
fdopen (fd, 'wb') as original_file:
while True:
l_bytes = encrypted_file.read(ENCRYPTED_HEADER_LENGTH)
if not l_bytes:
break
l = int.from_bytes(l_bytes, 'big')
encrypted = encrypted_file.read(l)
decrypted = decryptfn(key, encrypted)
original_file.write(decrypted)
replace(path, file)