Input 0 of layer "conv2d" is incompatible with the layer expected axis -1 of input shape t-CodePudding

Preprocessing of image:

def PreprocessData(img, mask, target_shape_img, target_shape_mask, path1, path2):
    """
    Processes the images and mask present in the shared list and path
    Returns a NumPy dataset with images as 3-D arrays of desired size
   
    """
    # Pull the relevant dimensions for image and mask
    m = len(img)                     # number of images
    i_h,i_w,i_c = target_shape_img   # pull height, width, and channels of image
    m_h,m_w,m_c = target_shape_mask  # pull height, width, and channels of mask
    
    # Define X and Y as number of images along with shape of one image
    X = np.zeros((m,i_h,i_w,1), dtype=np.float32)
    y = np.zeros((m,m_h,m_w,1), dtype=np.int32)
    # RGBA image has 4 channels. 
    #255 will make the pixel completely opaque, 
    #value 0 fully transparent, 
    #values in between will make the pixels partly transparent
        
    # Resize images and masks
    for file in img:
        # convert image into an array of desired shape (3 channels)
        index = img.index(file)
        path = os.path.join(path1, file)
        single_img = np.asarray(Image.open(path).resize((i_h,i_w))) @ (0.21, 0.75, 0.04)

        #single_img = np.reshape(single_img,(i_h,i_w,i_c)) 
        single_img = single_img/255.
        X[index] = single_img[..., None] #X (dims: # images, img height, img width, img channels) 
        
        # convert mask into an array of desired shape 4 channel
        single_mask_ind = mask[index]
        path = os.path.join(path2, single_mask_ind)
        single_mask = np.asarray(Image.open(path).resize((i_h,i_w)))
        single_mask = single_mask > 0 #binarizing of targets 
        # single_mask = single_mask - 1 ### single_mask = single_mask/256???
        y[index] = single_mask[..., None] #y (dims: # masks, mask height, mask width, mask channels)
    return X, y

Encoder:

def EncoderMiniBlock(inputs, n_filters=32, dropout_prob=0.3, max_pooling=True):
    """
    This block uses multiple convolution layers, max pool, relu activation to create an architecture for learning. 
    Dropout can be added for regularization to prevent overfitting. 
    The block returns the activation values for next layer along with a skip connection which will be used in the decoder
    """
    # Add 2 Conv Layers with relu activation and HeNormal initialization using TensorFlow 
    # Proper initialization prevents from the problem of exploding and vanishing gradients 
    # 'Same' padding will pad the input to conv layer such that the output has the same height and width (hence, is not reduced in size) 
    conv = Conv2D(n_filters, 
                  3,   # Kernel size   
                  activation='relu',
                  padding='same',
                  kernel_initializer='HeNormal')(inputs)
    conv = Conv2D(n_filters, 
                  3,   # Kernel size
                  activation='relu',
                  padding='same',
                  kernel_initializer='HeNormal')(conv)
    
    # Batch Normalization will normalize the output of the last layer based on the batch's mean and standard deviation
    conv = BatchNormalization()(conv, training=False)

    # In case of overfitting, dropout will regularize the loss and gradient computation to shrink the influence of weights on output
    if dropout_prob > 0:     
        conv = tf.keras.layers.Dropout(dropout_prob)(conv)

    # Pooling reduces the size of the image while keeping the number of channels same
    # Pooling has been kept as optional as the last encoder layer does not use pooling (hence, makes the encoder block flexible to use)
    # Below, Max pooling considers the maximum of the input slice for output computation and uses stride of 2 to traverse across input image
    if max_pooling:
        next_layer = tf.keras.layers.MaxPooling2D(pool_size = (2,2))(conv)    
    else:
        next_layer = conv

    # skip connection (without max pooling) will be input to the decoder layer to prevent information loss during transpose convolutions      
    skip_connection = conv
    
    return next_layer, skip_connection

Decoder:

def DecoderMiniBlock(prev_layer_input, skip_layer_input, n_filters=32):
    """
    Decoder Block first uses transpose convolution to upscale the image to a bigger size and then,
    merges the result with skip layer results from encoder block
    Adding 2 convolutions with 'same' padding helps further increase the depth of the network for better predictions
    The function returns the decoded layer output
    """
    # Start with a transpose convolution layer to first increase the size of the image
    up = Conv2DTranspose(
                 n_filters,
                 (3,3),    # Kernel size
                 strides=(2,2),
                 padding='same')(prev_layer_input)

    # Merge the skip connection from previous block to prevent information loss
    merge = concatenate([up, skip_layer_input], axis=3)
    
    # Add 2 Conv Layers with relu activation and HeNormal initialization for further processing
    # The parameters for the function are similar to encoder
    conv = Conv2D(n_filters, 
                 3,     # Kernel size
                 activation='relu',
                 padding='same',
                 kernel_initializer='HeNormal')(merge)
    conv = Conv2D(n_filters,
                 3,   # Kernel size
                 activation='relu',
                 padding='same',
                 kernel_initializer='HeNormal')(conv)
    return conv

U-Net compilation

def UNetCompiled(input_size=(128, 128, 3), n_filters=32, n_classes=3):
    """
    Combine both encoder and decoder blocks according to the U-Net research paper
    Return the model as output 
    """
    # Input size represent the size of 1 image (the size used for pre-processing) 
    inputs = Input(input_size)
    
    # Encoder includes multiple convolutional mini blocks with different maxpooling, dropout and filter parameters
    # Observe that the filters are increasing as we go deeper into the network which will increasse the # channels of the image 
    cblock1 = EncoderMiniBlock(inputs, n_filters,dropout_prob=0, max_pooling=True)
    cblock2 = EncoderMiniBlock(cblock1[0],n_filters*2,dropout_prob=0, max_pooling=True)
    cblock3 = EncoderMiniBlock(cblock2[0], n_filters*4,dropout_prob=0, max_pooling=True)
    cblock4 = EncoderMiniBlock(cblock3[0], n_filters*8,dropout_prob=0.3, max_pooling=True)
    cblock5 = EncoderMiniBlock(cblock4[0], n_filters*16, dropout_prob=0.3, max_pooling=False) 
    
    # Decoder includes multiple mini blocks with decreasing number of filters
    # Observe the skip connections from the encoder are given as input to the decoder
    # Recall the 2nd output of encoder block was skip connection, hence cblockn[1] is used
    ublock6 = DecoderMiniBlock(cblock5[0], cblock4[1],  n_filters * 8)
    ublock7 = DecoderMiniBlock(ublock6, cblock3[1],  n_filters * 4)
    ublock8 = DecoderMiniBlock(ublock7, cblock2[1],  n_filters * 2)
    ublock9 = DecoderMiniBlock(ublock8, cblock1[1],  n_filters)

    # Complete the model with 1 3x3 convolution layer (Same as the prev Conv Layers)
    # Followed by a 1x1 Conv layer to get the image to the desired size. 
    # Observe the number of channels will be equal to number of output classes
    conv9 = Conv2D(n_filters,
                 3,
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal')(ublock9)

    conv10 = Conv2D(n_classes, 1, padding='same')(conv9)
    
    # Define the model
    model = tf.keras.Model(inputs=inputs, outputs=conv10)

    return model

Define the desired shape

target_shape_img = [128, 128, 3]
target_shape_mask = [128, 128,1]

Process data using apt helper function

X, y = PreprocessData(img, mask, target_shape_img, target_shape_mask, path1, path2)

I am not able to understand what is wrong because I am getting this error:

ValueError: in user code:
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py",
line 1021, in train_function * return step_function(self, iterator) File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1010, in step_function ** outputs = model.distribute_strategy.run(run_step, args=(data,)) File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step ** outputs = model.train_step(data) File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 859, in train_step y_pred = self(x, training=True) File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 67, in error_handler raise e.with_traceback(filtered_tb) from None File "/usr/local/lib/python3.7/dist-packages/keras/engine/input_spec.py", line 249, in assert_input_compatibility f'Input {input_index} of layer "{layer_name}" is '
ValueError: Exception encountered when calling layer "model" (type Functional).

Input 0 of layer "conv2d" is incompatible with the layer: expected axis -1 of input shape to have value 3, but received input with shape None, 128, 128, 1)

Call arguments received:
  • inputs=tf.Tensor(shape=(None, 128, 128, 1), dtype=float32)
  • training=True
  • mask=None

CodePudding user response：

You seem to have defined a model that takes inputs of shape (128,128,3) and are inputting shape (128,128,1). If you change the input shape when you define the UNetCompiled function, it should solve the issue.

def UNetCompiled(input_size=(128, 128, 1), n_filters=32, n_classes=3):

Or you could change the input shape in the PreprocessData function if the images are colour and not greyscale images

You have defined the images as having 1 channel

# Define X and Y as number of images along with shape of one image
    X = np.zeros((m,i_h,i_w,1), dtype=np.float32)
    y = np.zeros((m,m_h,m_w,1), dtype=np.int32)

but in the next line have written # RGBA image has 4 channels.

If your input image has 4 channels, both the images and the model input_shape needs to reflect this