how to create a flow generator in python for my custom data-CodePudding

I do a cat/dog binary classification I created a training data this way, I applied an average filter to the images. the problem is that the database is quite large and I get displayed right after that, your notebook tried to allocate more memory than is available. I read that generators in python take less disk memory and can solve this problem, but I don't know how to create a generator suitable for this code I just created as training data

train_dir = "../input/dog-cat/train"

CATEGORIES = ["dog", "cat"]
    
training_data = []

def create_training_data():
    for category in CATEGORIES:  

        path = os.path.join(train_dir,category)  
        class_num = CATEGORIES.index(category)  

        for img in tqdm(os.listdir(path)):  
            try:
                img_train = cv2.imread(os.path.join(path,img))
                img_mean = cv2.blur(reduced_img_train,(9,9))
                training_data.append([img_mean, class_num])  
            except Exception as e:
             pass
create_training_data()

import random
random.shuffle(training_data)

x_train=[]
y_train=[]

for features,label in training_data:
    x_train.append(features)
    y_train.append(label)

CodePudding user response：

with the requirements you want to use ImageDataGenerator() with blur functions, check out CV2

CodePudding user response：

you have to use yield instead of return

def create_training_data():
    for category in CATEGORIES:  

        path = os.path.join(train_dir,category)  
        class_num = CATEGORIES.index(category)  

        for img in tqdm(os.listdir(path)):  
            try:
                img_train = cv2.imread(os.path.join(path,img))
                img_mean = cv2.blur(reduced_img_train,(9,9))
                yield [img_mean, class_num]  
            except Exception as e:
                pass

dataset = tf.data.Dataset.from_generator(create_training_data, output_types=(tf.float32 , tf.int32))