I'm trying to do a study on how to improve the accuracy of some models, and I want to use a Bagging kNN with Cross_Evaluation. The code is the following:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
from sklearn.neighbors import KNeighborsClassifier
class MachineDataset():
def __init__(self, data, X_train=None, y_train=None,
X_test=None, y_test=None, clf=None):
self.data = data
self.X = self.data.loc[:, self.data.columns != data.columns[-1]]
self.y = self.data[self.data.columns[-1]]
self.X_train = X_train
self.y_train = y_train
self.X_test = X_test
self.y_test = y_test
self.clf = clf
def set_clf(self, clf):
self.clf = clf
def eval_x_fold(self, x_fold=10):
eval_score = cross_val_score(self.clf, self.X, self.y, cv=x_fold)
print("%0.2f accuracy" % eval_score.mean()*100)
def main():
base_clf = KNeighborsClassifier(metric='minkowski', n_neighbors=5)
clf = BaggingClassifier(base_estimator=base_clf,
n_estimators=500,
max_samples=1.0, max_features=1.0, bootstrap=False,
n_jobs=1, random_state=1)
machine = MachineDataset(pd.read_csv('../Datasets/UJIIndoorLoc/UJIIndoorLoc_ID.csv'),
clf=clf)
machine.eval_x_fold()
pass
if __name__=='__main__':
main()
however, when it runs, it shows something like:
Killed
I don't know what exactly is happening here, it may be a bad configuration?
Thanks in advance
CodePudding user response:
It means your script was killed by the OS. In most cases it's because it was using too much memory. Try looking at your memory usage during exectution.
You can use cat /proc/meminfo
on Linux and Task Manager on Windows.
How big is the dataset?
If it is a memory problem, you will have to either get more RAM, use another computer with more RAM or reduce the size of the dataset.