Why my model using a custom layer does not work properly?-CodePudding

I am working on customizing a layer to use in my model. The core part is the "call" function as,

class Custom_Layer(Layer):
// some code


def call(self, inputs, **kwargs):
  kernel = mul(self.base, self.diag_start - self.diag_end) 
  outputs = matmul(a=inputs, b=kernel)

  if self.use_bias:
    outputs = tf.nn.bias_add(outputs, self.bias)

  if self.activation is not None:
    outputs = self.activation(outputs)

  return outputs    
// some code

and it is used in a simple model.

inputs = tf.keras.layers.Input(shape=(784,),dtype='float32') 
layer1 = Custom_layer(2000, **Custom_layer_config, activation='tanh')(inputs)
layer2 = Custom_layer(200, **Custom_layer_config, activation='tanh')(layer1)
output_lay = Custom_layer(10, **Custom_layer_config, activation='softmax')(layer2)

model = tf.keras.models.Model(inputs=inputs, outputs=output_lay)

opt = tf.keras.optimizers.Adamax(learning_rate=0.02)
model.compile(optimizer=opt,
     loss='sparse_categorical_crossentropy',
     metrics=['accuracy'])
model.summary()

It is supposed to print like this:

Model: "functional_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_8 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
CustomLayer_18 (Custom_Layer)       (None, 2000)              1570784   
_________________________________________________________________
CustomLayer_19 (Custom_Layer)       (None, 200)               402200    
_________________________________________________________________
CustomLayer_20 (Custom_Layer)       (None, 10)                2210      
=================================================================
Total params: 1,975,194
Trainable params: 5,194
Non-trainable params: 1,970,000
_________________________________________________________________

But prints this:

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_2 (InputLayer)        [(None, 784)]             0         
                                                                 
 tf.linalg.matmul_3 (TFOpLam  (None, 2000)             0         
 bda)                                                            
                                                                 
 tf.math.tanh_2 (TFOpLambda)  (None, 2000)             0         
                                                                 
 tf.linalg.matmul_4 (TFOpLam  (None, 200)              0         
 bda)                                                            
                                                                 
 tf.math.tanh_3 (TFOpLambda)  (None, 200)              0         
                                                                 
 tf.linalg.matmul_5 (TFOpLam  (None, 10)               0         
 bda)                                                            
                                                                 
 tf.compat.v1.nn.softmax_1 (  (None, 10)               0         
 TFOpLambda)                                                     
                                                                 
=================================================================
Total params: 0
Trainable params: 0
Non-trainable params: 0

The first summary is what I got from author's repository and the second summary is from my run of the same code without changing anything..

The code is not a complex one but it is weird why there is no parameters at all. My question is that what is wrong here.

CodePudding user response：

Try to make it as an inherited class from this example.

Sample: Custom LSTM class

import tensorflow as tf

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Class / Definition
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class MyLSTMLayer( tf.keras.layers.LSTM ):
    def __init__(self, units, return_sequences, return_state):
        super(MyLSTMLayer, self).__init__( units, return_sequences=True, return_state=False )
        self.num_units = units

    def build(self, input_shape):
        self.kernel = self.add_weight("kernel",
        shape=[int(input_shape[-1]),
        self.num_units])

    def call(self, inputs):
        return tf.matmul(inputs, self.kernel)

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
start = 3
limit = 12
delta = 3
sample = tf.range( start, limit, delta )
sample = tf.cast( sample, dtype=tf.float32 )
sample = tf.constant( sample, shape=( 1, 1, 3 ) )
layer = MyLSTMLayer( 3, True, False )

model = tf.keras.Sequential([
    tf.keras.Input(shape=(1, 3)),
    layer,
])

model.summary()

print( sample )
print( model.predict(sample) )

Output:

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 my_lstm_layer (MyLSTMLayer)  (None, 1, 3)             9

=================================================================
Total params: 9
Trainable params: 9
Non-trainable params: 0
_________________________________________________________________

tf.Tensor([[[3. 6. 9.]]], shape=(1, 1, 3), dtype=float32)
1/1 [==============================] - 1s 575ms/step
[[[-2.8894916 -2.146874  13.688236 ]]]