I'm trying to make a predictor model for identifying therapeutic peptides based on their word2vec vectors. The dataset has 100 positive and 100 negative examples. I've already embedded the peptide sequences with Word2Vec and am trying to train my neural network. However, the accuracy remains constant at 51.88%.
What I have tried: Changing the loss function(to binary cross entropy), number of nodes in each layer
Here is my code:
import sklearn
a = sklearn.utils.shuffle(arrayvectors, random_state=1)
b = sklearn.utils.shuffle(labels, random_state=1)
dfa = pd.DataFrame(a, columns=None)
dfb = pd.DataFrame(b, columns=None)
X = dfa.iloc[:]
y = dfb.iloc[:]
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=300)
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)
y_train = y_train.astype(np.float32)
y_test = y_test.astype(np.float32)
## train data
class trainData(Dataset):
def __init__(self, X_data, y_data):
self.X_data = X_data
self.y_data = y_data
def __getitem__(self, index):
return self.X_data[index], self.y_data[index]
def __len__ (self):
return len(self.X_data)
train_data = trainData(torch.FloatTensor(X_train),
torch.FloatTensor(y_train))
## test data
class testData(Dataset):
def __init__(self, X_data):
self.X_data = X_data
def __getitem__(self, index):
return self.X_data[index]
def __len__ (self):
return len(self.X_data)
test_data = testData(torch.FloatTensor(X_test))
EPOCHS = 100
BATCH_SIZE = 2
LEARNING_RATE = 0.0001
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_data, batch_size=1)
# make mode
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(4,)))
model.add(Dropout(0.5))
model.add(Dense(16, input_dim=1, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(12,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1,activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=1000, batch_size=64)
Let me know if you have any thoughts!
CodePudding user response:
Try increasing the batch size to 16 from 2 and also reduce the drop out to .2 or less. That's too much dropout