I created a for loop that would loop through a directory of images and resize every image and then saves it to another directory. The code works but I'm trying to parallelize the process to make it faster.
This is the resize function
import cv2
import os
def resize_image(img):
# get the name of the file
name = os.path.basename(img)
# read the image
img = cv2.imread(img)
# resize and save to new directory
resize = cv2.resize(img, (700, 700))
resized_image = cv2.imwrite("Resized/" name, resize)
And here is the for loop that would loop through the images in the directory (takes around 700 seconds to resize all the images in the directory).
SOURCE_DIRECTORY = "Source/"
directory_list = os.listdir(SOURCE_DIRECTORY)
for source_file in directory_list:
source_path = os.path.join(SOURCE_DIRECTORY, source_file)
if os.path.isfile(source_path):
resize_image(source_path)
In an effort to parallelize the process I tried using concurrent.futures and map it to the resize function.
import concurrent.futures
SOURCE_DIRECTORY = "Source/"
directory_list = os.listdir(SOURCE_DIRECTORY)
with concurrent.futures.ProcessPoolExecutor() as executor:
executor.map(resize_image, directory_list)
But I instantly get this error.
BrokenProcessPool: A child process terminated abruptly, the process pool is not usable anymore
How can I parallelize the process of resizing the images. Any help would be appreciated.
CodePudding user response:
Here is sample skeleton you can use to parallelize the task (using multiprocessing.Pool
):
import os
from multiprocessing import Pool
import cv2
def resize_image(file_name):
# get the name of the file
name = os.path.basename(file_name)
# just to be sure the file exists (skip if not necessary):
if not os.path.exists(name):
return f"{name} does not exist!"
# read the image
img = cv2.imread(img)
# resize and save to new directory
resize = cv2.resize(img, (700, 700))
resized_image = cv2.imwrite("Resized/" name, resize)
return f"{name} resized."
if __name__ == "__main__":
SOURCE_DIRECTORY = "Source/"
directory_list = os.listdir(SOURCE_DIRECTORY)
filelist = []
for source_file in directory_list:
source_path = os.path.join(SOURCE_DIRECTORY, source_file)
if os.path.isfile(source_path):
filelist.append(source_path)
with Pool(4) as pool: # 4 is number of processes we want to use
for result in pool.imap_unordered(resize_image, filelist):
print(result)