Home > Software engineering >  How to train a model with a multiple binary digit output prediction
How to train a model with a multiple binary digit output prediction

Time:05-22

I am looking for the right activation function for my last layer and possibly the right loss function for training.
At the input, my model gets symbols from 0 to 7. I want to train my model to learn which number belongs to which encoding.

At the output I want to get three numbers which stand for the coding.

I don't think my prediction from the model can quite fit.

My code for you:

import numpy as np
import tensorflow as tf 
from tensorflow import keras 

# Creating train data:
trainSym = np.random.randint(low=0,high=8,size=5000)

# Bit labeling for my symbols:
bit_labels = np.zeros((8,3))

# Labeling:
for j in range(8): 
    for i in range(3):
        # Return the binary representation of the input number as a string.
        bit_labels[j,i] = np.binary_repr(j,width=3)[i] 
print(bit_labels)

# Converting to my train labels: 
trainLabels = tf.gather(bit_labels,trainSym)
print(trainLabels)


# check the correct order: 
for i in range(8): 
    print(trainSym[i],'|',trainLabels[i])
    

# Creating the model: 
input_signal = keras.Input(shape=(),dtype=tf.int32)
one_hot      = tf.one_hot(input_signal,depth=8)
encoded      = keras.layers.Dense(16,activation='relu')(one_hot)
encoded1     = keras.layers.Dense(2,activation='relu')(encoded)

decoded      = keras.layers.Dense(2,activation='relu')(encoded1)
decoded1     = keras.layers.Dense(16,activation='relu')(decoded)
decoded2     = keras.layers.Dense(3,activation='sigmoid')(decoded1)

model  = keras.Model(input_signal,decoded2)

# structure from our model:
print(model.summary())

# Optimizer and loss function: 
lr = 0.001
adam = keras.optimizers.Adam(learning_rate = lr)

model.compile(optimizer=adam,loss="binary_crossentropy")

# Callbacks:
early_stopping = keras.callbacks.EarlyStopping(monitor='loss', 
                                               patience=3,
                                               restore_best_weights=True)

# Training: 
history = model.fit(trainSym,trainLabels,
                         epochs=10,
                         batch_size=256,
                         callbacks=[early_stopping])


# Evaluation:
test_symbols= np.random.randint(low=0,high=8,size=100)
test_labels = tf.gather(bit_labels,test_symbols)


pred_final_signal =  model.predict(test_symbols,batch_size=128)
pred_output = tf.cast(tf.less(pred_final_signal,0.5),tf.int8)

# Calculate the number of errors: 
no_error = tf.equal(pred_output,tf.cast(test_labels,tf.int8))
no_error = tf.reduce_mean(tf.cast(no_error, tf.float32))
print(no_error)

Has someone an idea what the numbers in my pred_final_signal saying?

Many thanks :-)

CodePudding user response:

Try using np.where with a threshold to make sense of your predictions:

import numpy as np
import tensorflow as tf 
from tensorflow import keras 

# Creating train data:
trainSym = np.random.randint(low=0,high=8,size=5000)

# Bit labeling for my symbols:
bit_labels = np.zeros((8,3))

# Labeling:
for j in range(8): 
    for i in range(3):
        # Return the binary representation of the input number as a string.
        bit_labels[j,i] = np.binary_repr(j,width=3)[i] 
print(bit_labels)

# Converting to my train labels: 
trainLabels = tf.gather(bit_labels,trainSym)
print(trainLabels)


# check the correct order: 
for i in range(8): 
    print(trainSym[i],'|',trainLabels[i])
    

# Creating the model: 
input_signal = tf.keras.Input(shape=(),dtype=tf.int32)
one_hot      = tf.one_hot(input_signal,depth=8)
encoded      = tf.keras.layers.Dense(32,activation='relu')(one_hot)
encoded1     = tf.keras.layers.Dense(16,activation='relu')(encoded)
encoded1     = tf.keras.layers.Dense(8,activation='relu')(encoded1)


decoded      = tf.keras.layers.Dense(8,activation='relu')(encoded1)
decoded1     = tf.keras.layers.Dense(16,activation='relu')(decoded)
decoded1     = tf.keras.layers.Dense(32,activation='relu')(decoded1)
decoded2     = tf.keras.layers.Dense(3, activation='sigmoid')(decoded1)

model  = tf.keras.Model(input_signal,decoded2)

# structure from our model:
print(model.summary())

# Optimizer and loss function: 
lr = 0.001
adam = keras.optimizers.Adam(learning_rate = lr)

model.compile(optimizer=adam,loss="binary_crossentropy")

# Callbacks:
early_stopping = keras.callbacks.EarlyStopping(monitor='loss', 
                                               patience=3,
                                               restore_best_weights=True)

# Training: 
history = model.fit(trainSym,trainLabels,
                         epochs=15,
                         batch_size=256,
                         callbacks=[early_stopping])


# Evaluation:
test_symbols= np.random.randint(low=0,high=8,size=50)


pred_final_signal =  model.predict(test_symbols,batch_size=50)
print(test_symbols)

pred_final_signal = np.where(pred_final_signal>0.5,1,0)
print(pred_final_signal)
[6 2 6 5 7 7 0 2 6 3 6 5 3 4 1 0 7 0 0 4 0 4 7 4 0 6 2 4 7 7 0 3 1 5 7 2 6
 3 1 1 1 5 3 2 6 0 0 2 4 7]
[[1 1 0]
 [0 1 0]
 [1 1 0]
 [1 0 1]
 [1 1 1]
 [1 1 1]
 [0 0 0]
 [0 1 0]
 [1 1 0]
 [0 1 1]
 [1 1 0]
 [1 0 1]
 [0 1 1]
 [1 0 0]
 [0 0 1]
 [0 0 0]
 [1 1 1]
 [0 0 0]
 [0 0 0]
 [1 0 0]
 [0 0 0]
 [1 0 0]
 [1 1 1]
 [1 0 0]
 [0 0 0]
 [1 1 0]
 [0 1 0]
 [1 0 0]
 [1 1 1]
 [1 1 1]
 [0 0 0]
 [0 1 1]
 [0 0 1]
 [1 0 1]
 [1 1 1]
 [0 1 0]
 [1 1 0]
 [0 1 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [1 0 1]
 [0 1 1]
 [0 1 0]
 [1 1 0]
 [0 0 0]
 [0 0 0]
 [0 1 0]
 [1 0 0]
 [1 1 1]]
  • Related