The sensitivity does not improve despite making multiple changes in model and dataset-CodePudding

I have a CNN model which I run on the dataset which is linked here for viewing : data I have tried using sensitivity and specificity provided by Keras and also tried the one using scikit learn. I want help to understand if something is wrong with my code? I understand the model performance depends on a lot of things but I want to know if there is something wrong with the code that is giving me sensitivity different for Keras and different for scikit learn. Also, there are misclassifications. how can I improve my model results? My code looks like below:

import numpy as np
import pandas as pd
import os
import tensorflow as tf
#import keras as k
from IPython.display import display
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import plot_model
#from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import TimeDistributed
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn import metrics

def confusion_metrics (conf_matrix):
  # save confusion matrix and slice into four pieces
  TP = conf_matrix[1][1]
  TN = conf_matrix[0][0]
  FP = conf_matrix[0][1]
  FN = conf_matrix[1][0]
  print('True Positives:', TP)
  print('True Negatives:', TN)
  print('False Positives:', FP)
  print('False Negatives:', FN)
  # calculate accuracy
  conf_accuracy = (float (TP TN) / float(TP   TN   FP   FN))
  # calculate mis-classification
  conf_misclassification = 1- conf_accuracy
  # calculate the sensitivity
  conf_sensitivity = (TP / float(TP   FN))
  # calculate the specificity
  conf_specificity = (TN / float(TN   FP))
  # calculate precision
  conf_precision = (TN / float(TN   FP))
  # calculate f_1 score
  conf_f1 = 2 * ((conf_precision * conf_sensitivity) / (conf_precision   conf_sensitivity))
  print('-'*50)
  print(f'Accuracy: {round(conf_accuracy,2)}') 
  print(f'Mis-Classification: {round(conf_misclassification,2)}') 
  print(f'Sensitivity: {round(conf_sensitivity,2)}') 
  print(f'Specificity: {round(conf_specificity,2)}') 
  print(f'Precision: {round(conf_precision,2)}')
  print(f'f_1 Score: {round(conf_f1,2)}')

def og_build_model_less_layer(n_rows,n_cols):
    ecg_input = Input(shape=(n_cols,n_rows), name='ecg_signal')
    print('model_input shape:' , ecg_input.shape)
     
    c1 = Conv1D(80, 2,name = 'conv_1',kernel_initializer="glorot_uniform")(ecg_input)
    b1 = BatchNormalization(name = 'BN_1')(c1)       #a1 = Activation('relu')(b1)
    d1 = Dropout(0.4,name = 'drop_1')(b1)
    c2 = Conv1D(80,2,name = 'conv_2',kernel_initializer="glorot_uniform")(d1)
    b2 = BatchNormalization(name = 'BN_2')(c2)
    d2 = Dropout(0.6,name = 'drop_2')(b2)
    c3 = Conv1D(80, 2,name = 'conv_3',kernel_initializer="glorot_uniform")(d2)
    b3 = BatchNormalization(name = 'BN_3')(c3)
    d3 = Dropout(0.4,name = 'drop_3')(b3)
    c4 = Conv1D(80, 2,name = 'conv_4',kernel_initializer="glorot_uniform")(d3)
    b4 = BatchNormalization(name = 'BN_4')(c4)
    d4 = Dropout(0.6,name = 'drop_4')(b4)
    c5 = Conv1D(80, 2,name = 'conv_5',kernel_initializer="glorot_uniform")(d4)
    b5 = BatchNormalization(name = 'BN_5')(c5)
    d5 = Dropout(0.5,name = 'drop_5')(b5)
    fl = Flatten(name='fl')(d5)
    den1 = Dense(256,name='den1')(fl)
    den = Dense(30,name='den2')(den1)
    drp = Dropout(0.5)(den)
    output = Dense(1, activation='sigmoid')(drp)    
    opt = Adam(learning_rate=1e-4)
    sens = tf.keras.metrics.SensitivityAtSpecificity(0.15)
    spec = tf.keras.metrics.SpecificityAtSensitivity(0.15)
    model = Model(inputs=ecg_input, outputs=output, name='model')
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy',sens,spec])
    print(model.summary)
    return model

train_df = pd.read_pickle('data/train_ecg_gl.pkl')
train_df = train_df.dropna()
train_df = train_df.sort_values(by='Time', ascending=True)#, na_position='first')
test_df = pd.read_pickle('data/test_ecg_gl.pkl')
test_df = test_df.dropna()
test_df = test_df.sort_values(by='Time', ascending=True)
df = pd.concat([train_df, test_df], ignore_index=True)
df = df.sort_values(by='Time')
data = df.iloc[:,1:161].values
data=data[...,None]
labels = df['hypo_label'].values

train_data = train_df.iloc[:,1:161].values
train_data=train_data[...,None]
train_labels = train_df['hypo_label'].values

test_data = test_df.iloc[:,1:161].values
test_data=test_data[...,None]
test_labels = test_df['hypo_label'].values

xtrain,ytrain = train_data,train_labels
xtest,ytest = test_data,test_labels

n_cols = data.shape[2]
n_rows = data.shape[1]
batch_size,lr , verbose , epochs ,  val_split = 45 ,0.01, 1, 40, 0.1
early_stopping_callback = EarlyStopping(monitor = 'loss', patience = 10, mode = 'min', restore_best_weights = True)
cb_lr_reducer = ReduceLROnPlateau(monitor='loss', factor= 0.1, patience=10, min_lr= 1e-5)
model = og_build_model_less_layer(n_cols,n_rows)
model.fit(x = xtrain, y = ytrain, epochs=epochs,verbose=verbose,batch_size=batch_size,validation_split=val_split, shuffle=False,callbacks=[cb_lr_reducer, early_stopping_callback])
_, taccuracy,tsensitivity,tspecificity = model.evaluate(xtest, ytest, batch_size=batch_size, verbose=verbose)
print('Model Test 0.7*0.3 Accuracy:' , taccuracy)
print('Model Test 0.7*0.3 sensitivity:' , tsensitivity)
print('Model Test 0.7*0.3 specificity:' , specificity)

y_pred = model.predict(xtest)
y_pred = y_pred.flatten()
print(y_pred)
#print(p_pred.round(2))
# extract the predicted class labels
y_pred = np.where(y_pred < 0.5, 0, 1)
# Creating the confusion matrix
cm = metrics.confusion_matrix(ytest, y_pred)
# Assigning columns names
cm_df = pd.DataFrame(cm, columns = ['Predicted Negative', 'Predicted Positive'],index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
print(cm_df)

confusion_metrics(cm)

After I run this code for the data linked here, it gives me following output:

Model Test 0.7*0.3 Accuracy: 0.654349148273468
Model Test 0.7*0.3 sensitivity: 0.9166133999824524
Model Test 0.7*0.3 specificity: 0.9982390403747559
##################################################
##################################################
##################################################
[0.00757153 0.00837034 0.02366774 ... 0.5926605  0.59990513 0.56060743]
                 Predicted Negative  Predicted Positive
Actual Negative               29073                2160
Actual Positive               14531                1107
True Positives: 1107
True Negatives: 29073
False Positives: 2160
False Negatives: 14531
--------------------------------------------------
Accuracy: 0.64
Mis-Classification: 0.36
Sensitivity: 0.07
Specificity: 0.93
Precision: 0.93
f_1 Score: 0.13

CodePudding user response：

The performance metrics improved when I used my custom validation set which is a stratified split of 80-20 from training.