I am creating a model for Image Captioning that uses CNN and LSTM. I am training the model by generating batches. But I am getting an error while executing "model.fit" that takes the generator object. If anyone could help?
Below is the code for the generator function and the model used. I don't how what is wrong here!
def data_generator(train_descriptions,encoding_train,word_to_idx,max_len,num_photos_per_batch):
X1,X2,y = [],[],[]
n = 0
while True:
for key, desc_list in train_descriptions.items():
n =1
photo = encoding_train[key ".jpg"]
for desc in desc_list:
seq = [word_to_idx[word] for word in desc.split() if word in word_to_idx]
for i in range(1,len(seq)):
in_seq = seq[0:i]
out_seq = seq[i]
in_seq = pad_sequences([in_seq],maxlen=max_len,value=0,padding='post')[0]
out_seq = to_categorical([out_seq],num_classes=vocab_size)[0]
X1.append(photo)
X2.append(in_seq)
y.append(out_seq)
if n==num_photos_per_batch:
yield [[np.array(X1),np.array(X2),np.array(y)]]
X1,X2,y = [],[],[]
n=0
# image feature extractor model, inputs image feature vector
input_img_fea = Input(shape=(2048,))
inp_img1 = Dropout(0.3)(input_img_fea)
inp_img2 = Dense(256,activation='relu')(inp_img1)
#partial caption sequence model, inputs captions
input_cap = Input(shape=(max_len,))
inp_cap1 = Embedding(input_dim=vocab_size,output_dim=50,mask_zero=True)(input_cap)
inp_cap2 = Dropout(0.3)(inp_cap1)
inp_cap3 = LSTM(256)(inp_cap2)
decoder1 = add([inp_img2,inp_cap3])
decoder2 = Dense(256,activation='relu')(decoder1)
outputs = Dense(vocab_size,activation='softmax')(decoder2)
#Merge 2 networks
model = Model(inputs=[input_img_fea,input_cap],outputs=outputs)
model.summary()
model.layers[2].set_weights([embedding_output])
model.layers[2].trainable = False
model.compile(loss="categorical_crossentropy",optimizer="adam")
epochs = 20
number_pics_per_bath = 3
steps = len(train_descriptions)//number_pics_per_bath
for i in range(epochs):
generator = data_generator(train_descriptions,encoding_train,word_to_idx,max_len,number_pics_per_bath)
model.fit(generator,epochs=1,steps_per_epoch=steps)
model.save('./model_' str(i) '.h5')
ValueError: in user code:
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1021, in train_function *
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1010, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step **
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 859, in train_step
y_pred = self(x, training=True)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/usr/local/lib/python3.7/dist-packages/keras/engine/input_spec.py", line 200, in assert_input_compatibility
raise ValueError(f'Layer "{layer_name}" expects {len(input_spec)} input(s),'
ValueError: Layer "model_2" expects 2 input(s), but it received 3 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, None) dtype=float32>, <tf.Tensor 'IteratorGetNext:1' shape=(None, None) dtype=int32>, <tf.Tensor 'IteratorGetNext:2' shape=(None, None) dtype=float32>]
CodePudding user response:
Your shape in the yield is wrong, you need to provide 2 inputs not 3. Try this:
yield [[np.array(X1), np.array(X2)] , [np.array(y)]]