I cannot see what the error is here. I only think these are Warnings.
What is there error message, and a probable cause?
Update: I ran linting in VS Code using Ctrl
Shift
P
: >Python: Run Linting
. Pushed changes and ran the pipeline again.
test_ontology_tagger.py
:
import pathlib
import pytest
# from dagster import execute_solid
# from ontology_tagger.ontology_tagger_worker import batch_predict
# from pwmf.pipeline.utils import local_mode
import yaml
# from pycm import ConfusionMatrix
import pandas as pd
# import datetime
from ontology_tagger.modules.s3_util import S3Util
import os
import logging
cwd = pathlib.Path(__file__).parent.absolute()
s3_util = S3Util()
@pytest.fixture
def models_with_unit_test_data():
return models('data_local')
@pytest.fixture
def models_with_functional_test_data():
return models('functional_test')
def models(test_file):
try:
with open(cwd.parent / 'models.yaml') as fh:
models = yaml.safe_load(fh)
model_test_data = {}
for model, meta in models.items():
if meta['test']['skip_unit_test']:
continue
if test_file in meta['test']:
model_test_data[model] = meta['test'][test_file]
except Exception as e:
print(f'Cannot load models.yaml {e}')
return model_test_data
@pytest.fixture
def s3_paths():
try:
with open(cwd.parent / 'models.yaml') as fh:
models = yaml.safe_load(fh)
model_validation_results = {}
for model, meta in models.items():
path = meta['local']['model_s3'].split('.')[0]
model_validation_results[model] = path
except Exception as e:
print(f'Cannot load models.yaml {e}')
return model_validation_results
def get_config(test_file='../data/test.csv', remove_ids=False):
if remove_ids:
df = pd.read_csv(test_file, header=None, sep='\t', index_col=0)
test_data = (cwd / 'temp.csv').as_posix()
print(f'Removing ids and generating test file in {test_data}')
df.to_csv(test_data, header=False, index=False, sep='\t')
else:
test_data = (cwd / test_file).as_posix()
return {
'resources': {
'fqdn_string': {'config': {'fqdn': None}},
'mds_store': {
'config': {
'mds_uri': 'mongodb://root:broot@localhost:27017/mds?authSource=admin'
}
},
'file_cache': {'config': {'target_folder': './'}},
'file_manager': {'config': {}},
's3': {},
},
'solids': {
'batch_predict': {
'inputs': {
'batch_size': {'value': 20},
'data_uri': {'value': test_data},
's3_uri': {'value': 'dbpedia_comprehensive'},
'instance_type': {'value': 'local'},
'batch_id': {'value': ''},
}
}
},
}
'''
@pytest.mark.unit
def test_batch_predict_local(models_with_unit_test_data):
for model, test_data_path in models_with_unit_test_data.items():
config = get_config('../' test_data_path)
config['solids']['batch_predict']['inputs']['s3_uri']['value'] = model
dataset = config['solids']['batch_predict']['inputs']['data_uri']['value']
print(f'testing model {model} on data {dataset}')
output = execute_solid(batch_predict, run_config=config, mode_def=local_mode)
assert output.success is True
assert 'predictions' in output.output_values
assert 'id' in output.output_values['predictions']
assert 'probabilities' in output.output_values['predictions']
assert 'classes' in output.output_values['predictions']
cl = output.output_values['predictions']['classes']
lb = output.output_values['predictions']['id']
n_cl = len(cl[0])
n_lb = len(lb[0])
assert n_cl == n_lb
for i in range(n_cl):
accuracy = sum(1 for x, y in zip(cl, lb) if x[i] == y[i]) / float(len(cl))
assert accuracy > 0.70
@pytest.mark.unit
def test_generate_performance_metrics(models_with_functional_test_data, s3_paths):
"""
save metric files to s3 under the same s3 path as the model names in models.yaml
"""
timestamp_folder = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
for model, test_data_path in models_with_functional_test_data.items():
config = get_config(test_data_path, remove_ids=True)
config['solids']['batch_predict']['inputs']['s3_uri']['value'] = model
dataset = config['solids']['batch_predict']['inputs']['data_uri']['value']
print(f'testing model {model} on data {dataset}')
output = execute_solid(batch_predict, run_config=config, mode_def=local_mode)
cl = output.output_values['predictions']['classes']
lb = output.output_values['predictions']['id']
s3_path = s3_paths[model]
n = len(cl[0])
classes = []
labels = []
for i in range(n):
for c, l in zip(cl, lb):
try:
c_t = c[i]
l_t = l[i]
append = True
except IndexError:
print(f'skipping {c} and {l} for class {i}')
append = False
if append:
classes.append(c_t)
labels.append(l_t)
cm = ConfusionMatrix(actual_vector=labels, predict_vector=classes)
out_file = f'confusion_matrix_class{i 1}'
print(f'Uploading metrics for file {out_file} to {s3_path}')
cm.save_html(out_file)
s3_util.upload_file(
f'{out_file}.html', f'{s3_path}/{timestamp_folder}/{out_file}.html'
)
cm.save_csv(out_file)
s3_util.upload_file(
f'{out_file}.csv', f'{s3_path}/{timestamp_folder}/{out_file}.csv'
)
'''
@pytest.mark.unit
def test_validation_classifier_dataset(models_with_unit_test_data): # might do both ClassifierDataset and LabelMapper
for model, test_data_path in models_with_unit_test_data.items():
config = get_config('../' test_data_path)
config['solids']['batch_predict']['inputs']['s3_uri']['value'] = model
dataset = config['solids']['batch_predict']['inputs']['data_uri']['value']
print(f'testing model {model} on data {dataset}')
validation_log_init()
# ...
num_lines = validation_log_init()
assert num_lines == 2 # no. lines in log file expected
"""
@pytest.mark.unit
def test_validation_label_mapper(models_with_unit_test_data):
for model, test_data_path in models_with_unit_test_data.items():
config = get_config('../' test_data_path)
config['solids']['batch_predict']['inputs']['s3_uri']['value'] = model
dataset = config['solids']['batch_predict']['inputs']['data_uri']['value']
print(f'testing model {model} on data {dataset}')
validation_log_init()
# ...
num_lines = validation_log_init()
assert num_lines == 1 # no. lines in log file expected
"""
@pytest.fixture
def validation_log_init():
global log_file
log_file = 'test_ontology_tagger.log'
open(log_file, 'w').close() # empties file
logging.basicConfig(filename=log_file, level=logging.INFO)
@pytest.fixture
def validation_log_check_clear():
size = os.path.getsize(log_file)
os.remove(log_file) # 'global log_file'
return size
Build Traceback:
#17 5.630 print(f'testing model {model} on data {dataset}')
#17 5.630
#17 5.630 > validation_log_init()
#17 5.630
#17 5.630 tests/test_ontology_tagger.py:174:
#17 5.631 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#17 5.631
#17 5.631 @pytest.fixture
#17 5.631 def validation_log_init():
#17 5.631 global log_file
#17 5.631 log_file = '../notebooks/test_ontology_tagger.log'
#17 5.631 > open(log_file, 'w').close() # empties file
#17 5.631 E FileNotFoundError: [Errno 2] No such file or directory: '../notebooks/test_ontology_tagger.log'
#17 5.631
#17 5.631 tests/test_ontology_tagger.py:204: FileNotFoundError
#17 5.632 =============================== warnings summary ===============================
#17 5.632 ontology_tagger/tests/test_ontology_tagger.py::test_validation_classifier_dataset
#17 5.632 /home/worker/python/ontology_tagger/.venv/lib/python3.7/site-packages/_pytest/python.py:166: RemovedInPytest4Warning: Fixture "validation_log_init" called directly. Fixtures are not meant to be called directly, are created automatically when test functions request them as parameters. See https://docs.pytest.org/en/latest/fixture.html for more information.
#17 5.632 testfunction(**testargs)
#17 5.632
#17 5.632 -- Docs: https://docs.pytest.org/en/latest/warnings.html
#17 5.633 ===================== 1 failed, 1 warnings in 2.36 seconds =====================
#17 ERROR: executor failed running [/bin/sh -c cd ontology_tagger && poetry run invoke deploy]: exit code: 1
------
> [test 5/5] RUN cd ontology_tagger && poetry run invoke deploy:
------
executor failed running [/bin/sh -c cd ontology_tagger && poetry run invoke deploy]: exit code: 1
##[error]Bash exited with code '1'.
Finishing: Test worker
Please let me know if there is anything else I can provide.
CodePudding user response:
The linter package I was using is flake8
.
This was a series of code quality problems.
The last one being E265 block comment should start with '# '
.
This meant that a space
had to appear immediately after #
; before any and all other text.
#This comment needs a space
def print_name(self):
print(self.name)
# Comment is correct now
def print_name(self):
print(self.name)