I did a model like that:
base = Classifier(classname="weka.classifiers.trees.ADTree",
options=["-B", "10", "-E", "-3", "-S", "1"])
CostS_cls = SingleClassifierEnhancer(classname="weka.classifiers.meta.CostSensitiveClassifier",
options =["-cost-matrix", "[0.0 1.0; 1.0 0.0]", "-S", "1"])
CostS_cls.classifier = base
smote = Filter(classname="weka.filters.supervised.instance.SMOTE",
options=["-C", "0", "-K", "3", "-P", "250.0", "-S", "1"])
fc = FilteredClassifier(options=["-S","1"])
fc.filter = smote
fc.classifier = CostS_cls
bagging_cls = SingleClassifierEnhancer(classname="weka.classifiers.meta.Bagging",
options=["-P", "100", "-S", "1", "-num-slots", "1", "-I", "100"])
bagging_cls.classifier = fc
multisearch_cls = MultiSearch(options = ["-S", "1"])
multisearch_cls.evaluation = "FM"
multisearch_cls.search = ["-sample-size", "100", "-initial-folds", "2", "-subsequent-folds", "10",
"-initial-test-set", ".", "-subsequent-test-set", ".", "-num-slots", "1"]
mparam = MathParameter()
mparam.prop = "numOfBoostingIterations"
mparam.minimum = 5.0
mparam.maximum = 50.0
mparam.step = 1.0
mparam.base = 10.0
mparam.expression = "I"
multisearch_cls.parameters = [mparam]
multisearch_cls.classifier = bagging_cls
AttS_cls = AttributeSelectedClassifier()
AttS_cls.search = from_commandline('weka.attributeSelection.GreedyStepwise -B -T -1.7976931348623157E308 -N -1 -num-slots 1', classname=get_classname(ASSearch))
AttS_cls.evaluation = from_commandline('weka.attributeSelection.CfsSubsetEval -P 1 -E 1', classname=get_classname(ASEvaluation))
AttS_cls.classifier = multisearch_cls
train, test = data_modelos_1_2.train_test_split(70.0, Random(1))
AttS_cls.build_classifier(train)
and I'm trying to validate it with cross-validation but when I do that:
train, test = data_modelos_1_2.train_test_split(70.0, Random(1))
AttS_cls.build_classifier(train)
evl = Evaluation(test)
evl.crossvalidate_model(AttS_cls, test, 10, Random(1))
Im getting this error:
---------------------------------------------------------------------------
JavaException Traceback (most recent call last)
/tmp/ipykernel_50548/1197040560.py in <module>
47 print(AttS_cls.to_commandline())
48 evl = Evaluation(test)
---> 49 evl.crossvalidate_model(AttS_cls, test, 10, Random(1))
50 print(AttS_cls)
51 print("----------------------------------------------------------------------------")
/usr/local/lib/python3.8/dist-packages/weka/classifiers.py in crossvalidate_model(self, classifier, data, num_folds, rnd, output)
1289 else:
1290 generator = [output.jobject]
-> 1291 javabridge.call(
1292 self.jobject, "crossValidateModel",
1293 "(Lweka/classifiers/Classifier;Lweka/core/Instances;ILjava/util/Random;[Ljava/lang/Object;)V",
~/.local/lib/python3.8/site-packages/javabridge/jutil.py in call(o, method_name, sig, *args)
890 ret_sig = sig[sig.find(')') 1:]
891 nice_args = get_nice_args(args, args_sig)
--> 892 result = fn(*nice_args)
893 x = env.exception_occurred()
894 if x is not None:
~/.local/lib/python3.8/site-packages/javabridge/jutil.py in fn(*args)
857 x = env.exception_occurred()
858 if x is not None:
--> 859 raise JavaException(x)
860 return result
861 else:
JavaException: Thread-based execution of evaluation tasks failed!
So i don't know what I'm doing wrong, because i know that using weka u can crossvalidate this types of model but I'm trying on pyweka and have that problem.
CodePudding user response:
I have turned your code snippet into one with imports and fixed the MultiSearch setup for Bagging (mparam.prop = "numIterations"
instead of mparam.prop = "numOfBoostingIterations"
), allowing it to be executed.
Since I do not have access to your data, I just used the UCI dataset vote.arff.
Your code was a bit odd, as it did a 70/30 train/test split, trained the classifier and then performed cross-validation on the test data. For cross-validation you do not train the classifier, as this happens within the internal cross-validation loop (each trained classifier inside that loop gets discarded, as cross-validation is only used for gathering statistics).
The code below has therefore three parts:
- your original evaluation code, but commented out
- performing proper cross-validation
- performing train/test evaluation
I do not use Jupyter notebooks and tested the code successfully in a regular virtual environment on my Linux Mint:
- Python:
3.8.10
- Output of
pip freeze
:numpy==1.22.3 packaging==21.3 pyparsing==3.0.7 python-javabridge==4.0.3 python-weka-wrapper3==0.2.7
The modified code itself:
import weka.core.jvm as jvm
from weka.core.converters import load_any_file
from weka.classifiers import Classifier, SingleClassifierEnhancer, FilteredClassifier, MultiSearch, AttributeSelectedClassifier, Evaluation
from weka.core.classes import MathParameter, from_commandline, Random, get_classname
from weka.filters import Filter
from weka.attribute_selection import ASEvaluation, ASSearch
jvm.start(packages=True)
# the dataset/path needs adjusting
data_modelos_1_2 = load_any_file("/some/where/vote.arff")
data_modelos_1_2.class_is_last()
base = Classifier(classname="weka.classifiers.trees.ADTree",
options=["-B", "10", "-E", "-3", "-S", "1"])
CostS_cls = SingleClassifierEnhancer(classname="weka.classifiers.meta.CostSensitiveClassifier",
options=["-cost-matrix", "[0.0 1.0; 1.0 0.0]", "-S", "1"])
CostS_cls.classifier = base
smote = Filter(classname="weka.filters.supervised.instance.SMOTE",
options=["-C", "0", "-K", "3", "-P", "250.0", "-S", "1"])
fc = FilteredClassifier(options=["-S", "1"])
fc.filter = smote
fc.classifier = CostS_cls
bagging_cls = SingleClassifierEnhancer(classname="weka.classifiers.meta.Bagging",
options=["-P", "100", "-S", "1", "-num-slots", "1", "-I", "100"])
bagging_cls.classifier = fc
multisearch_cls = MultiSearch(options=["-S", "1"])
multisearch_cls.evaluation = "FM"
multisearch_cls.search = ["-sample-size", "100", "-initial-folds", "2", "-subsequent-folds", "10",
"-initial-test-set", ".", "-subsequent-test-set", ".", "-num-slots", "1"]
mparam = MathParameter()
mparam.prop = "numIterations"
mparam.minimum = 5.0
mparam.maximum = 50.0
mparam.step = 1.0
mparam.base = 10.0
mparam.expression = "I"
multisearch_cls.parameters = [mparam]
multisearch_cls.classifier = bagging_cls
AttS_cls = AttributeSelectedClassifier()
AttS_cls.search = from_commandline('weka.attributeSelection.GreedyStepwise -B -T -1.7976931348623157E308 -N -1 -num-slots 1', classname=get_classname(ASSearch))
AttS_cls.evaluation = from_commandline('weka.attributeSelection.CfsSubsetEval -P 1 -E 1', classname=get_classname(ASEvaluation))
AttS_cls.classifier = multisearch_cls
# original
# train, test = data_modelos_1_2.train_test_split(70.0, Random(1))
# AttS_cls.build_classifier(train)
# evl = Evaluation(test)
# evl.crossvalidate_model(AttS_cls, test, 10, Random(1))
# print(evl.summary())
# cross-validation
print("\ncross-validation\n")
evl = Evaluation(data_modelos_1_2)
evl.crossvalidate_model(AttS_cls, data_modelos_1_2, 10, Random(1))
print(evl.summary())
# train/test split
print("\ntrain/test split\n")
train, test = data_modelos_1_2.train_test_split(70.0, Random(1))
AttS_cls.build_classifier(train)
evl = Evaluation(test)
evl.test_model(AttS_cls, test)
print(evl.summary())
jvm.stop()
This generated the following output:
cross-validation
Correctly Classified Instances 416 95.6322 %
Incorrectly Classified Instances 19 4.3678 %
Kappa statistic 0.9094
Mean absolute error 0.0737
Root mean squared error 0.1778
Relative absolute error 15.5353 %
Root relative squared error 36.5084 %
Total Number of Instances 435
train/test split
Correctly Classified Instances 126 96.1832 %
Incorrectly Classified Instances 5 3.8168 %
Kappa statistic 0.9216
Mean absolute error 0.0735
Root mean squared error 0.1649
Relative absolute error 15.3354 %
Root relative squared error 33.6949 %
Total Number of Instances 131