diff --git a/lapixdl/evaluation/model.py b/lapixdl/evaluation/model.py index 262d7b5..803d4d9 100644 --- a/lapixdl/evaluation/model.py +++ b/lapixdl/evaluation/model.py @@ -3,6 +3,7 @@ from enum import Enum from dataclasses import dataclass from functools import reduce +import math import numpy as np @@ -17,6 +18,14 @@ TResult = TypeVar('TResult') +def recall_string(recall: float) -> str: + return 'No positive cases in GT' if math.isnan(recall) else str(recall) + + +def specificity_string(specificity: float) -> str: + return 'No negative cases in GT' if math.isnan(specificity) else str(specificity) + + @dataclass class Result(Generic[TResult]): """Result of a GT versus Predictions @@ -183,6 +192,8 @@ def accuracy(self) -> float: @property def recall(self) -> float: """float: Recall metric - TP / (TP + FN).""" + if self.TP == 0 and self.FN == 0: + return math.nan return self.TP/(self.TP + self.FN) @property @@ -193,17 +204,25 @@ def false_positive_rate(self) -> float: @property def specificity(self) -> float: """float: Specificity metric - TN / (FP + TN).""" + if self.FP == 0 and self.TN == 0: + return math.nan return self.TN/(self.FP + self.TN) @property def precision(self) -> float: """float: Precision metric - TP / (FP + TP).""" + if self.FP == 0 and self.FN == 0: # No GT instances + return 1 + return self.TP/(self.FP + self.TP) @property def f_score(self) -> float: """float: F-Score/Dice metric - 2*TP / (FP + FN + 2*TP).""" - return 2*self.TP/(self.FP + self.FN + 2*self.TP) + if self.TP == 0 and self.FP == 0 and self.FN == 0: # No GT instances + return 1 + else: + return 2*self.TP/(self.FP + self.FN + 2*self.TP) @property def confusion_matrix(self) -> List[List[int]]: @@ -228,9 +247,9 @@ def __str__(self): f'\tFN: {self.FN}\n' f'\tFPR: {self.false_positive_rate}\n' f'\tAccuracy: {self.accuracy}\n' - f'\tRecall: {self.recall}\n' + f'\tRecall: {recall_string(self.recall)}\n' f'\tPrecision: {self.precision}\n' - f'\tSpecificity: {self.specificity}\n' + f'\tSpecificity: {specificity_string(self.specificity)}\n' f'\tF-Score: {self.f_score}' ) @@ -287,17 +306,24 @@ def accuracy(self) -> float: @property def avg_recall(self) -> float: """float: Macro average recall metric.""" - return reduce(lambda acc, curr: curr.recall + acc, self.by_class_w_instances, .0) / len(self.by_class_w_instances) + by_class_w_recall = [ + c for c in self.by_class_w_instances if not math.isnan(c.recall)] + + if(len(by_class_w_recall) == 0): + return 1 + return reduce(lambda acc, curr: curr.recall + acc, by_class_w_recall, .0) / len(by_class_w_recall) @property def avg_precision(self) -> float: """float: Macro average precision metric.""" - return reduce(lambda acc, curr: curr.precision + acc, self.by_class_w_instances, .0) / len(self.by_class_w_instances) + return reduce(lambda acc, curr: (0 if math.isnan(curr.precision) else curr.precision) + acc, self.by_class_w_instances, .0) / len(self.by_class_w_instances) @property def avg_specificity(self) -> float: """float: Macro average specificity metric.""" - return reduce(lambda acc, curr: curr.specificity + acc, self.by_class_w_instances, .0) / len(self.by_class_w_instances) + by_class_w_specificity = [ + c for c in self.by_class_w_instances if not math.isnan(c.specificity)] + return reduce(lambda acc, curr: curr.specificity + acc, by_class_w_specificity, .0) / len(by_class_w_specificity) @property def avg_f_score(self) -> float: @@ -391,9 +417,9 @@ def __str__(self): f'\tFN: {self.FN}\n' f'\tIoU: {self.iou}\n' f'\tAccuracy: {self.accuracy}\n' - f'\tRecall: {self.recall}\n' + f'\tRecall: {recall_string(self.recall)}\n' f'\tPrecision: {self.precision}\n' - f'\tSpecificity: {self.specificity}\n' + f'\tSpecificity: {specificity_string(self.specificity)}\n' f'\tFPR: {self.false_positive_rate}\n' f'\tF-Score: {self.f_score}\n' ) @@ -577,7 +603,7 @@ def __str__(self): f'\tTN: [NA]\n' f'\tIoU: {self.iou}\n' f'\tAccuracy: {self.accuracy}\n' - f'\tRecall: {self.recall}\n' + f'\tRecall: {recall_string(self.recall)}\n' f'\tPrecision: {self.precision}\n' f'\tF-Score: {self.f_score}\n' f'\tAverage Precision: {self.average_precision()}\n' diff --git a/setup.py b/setup.py index 87be2bb..b06f182 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name='lapixdl', packages=find_packages(exclude=['tests']), - version='0.7.16', + version='0.7.17', description='Utils for Computer Vision Deep Learning research', long_description=long_description, diff --git a/tests/evaluation/evaluate/test_evaluate_classification.py b/tests/evaluation/evaluate/test_evaluate_classification.py index 7142346..a85ffa9 100644 --- a/tests/evaluation/evaluate/test_evaluate_classification.py +++ b/tests/evaluation/evaluate/test_evaluate_classification.py @@ -15,4 +15,52 @@ def test_evaluation(): assert round(metrics.avg_recall, 3) == .511 assert round(metrics.avg_precision, 3) == .547 assert round(metrics.avg_f_score, 3) == .465 - assert round(metrics.avg_specificity, 3) == .757 \ No newline at end of file + assert round(metrics.avg_specificity, 3) == .757 + +def test_evaluation_w_zeros(): + classes = ['a', 'b'] + + gt_class = [Classification(x) for x in [0,0,0,0,0,0, 1,1,1,1,1,1,1,1,1,1] ] + pred_class = [Classification(x) for x in [1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0] ] + + + metrics = evaluate_classification(gt_class, pred_class, classes) + + assert metrics.count == 16 + assert metrics.accuracy == 0 + assert metrics.avg_recall == 0 + assert metrics.avg_precision == 0 + assert metrics.avg_f_score == 0 + assert metrics.avg_specificity == 0 + +def test_evaluation_w_only_gt_negatives_falses(): + classes = ['a', 'b'] + + gt_class = [Classification(x) for x in [0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0] ] + pred_class = [Classification(x) for x in [1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1] ] + + + metrics = evaluate_classification(gt_class, pred_class, classes) + + assert metrics.count == 16 + assert metrics.accuracy == 0 + assert metrics.avg_recall == 0 + assert metrics.avg_precision == 0 + assert metrics.avg_f_score == 0 + assert metrics.avg_specificity == 0 + +def test_evaluation_w_only_gt_negatives_trues(): + classes = ['a', 'b'] + + gt_class = [Classification(x) for x in [0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0] ] + pred_class = [Classification(x) for x in [0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0] ] + + + metrics = evaluate_classification(gt_class, pred_class, classes) + + assert metrics.count == 16 + assert metrics.accuracy == 1 + assert metrics.avg_recall == 1 + assert metrics.avg_precision == 1 + assert metrics.avg_f_score == 1 + assert metrics.avg_specificity == 1 \ No newline at end of file diff --git a/tests/evaluation/model/test_BinaryClassificationMetrics.py b/tests/evaluation/model/test_BinaryClassificationMetrics.py index 11ae6c2..634cb9e 100644 --- a/tests/evaluation/model/test_BinaryClassificationMetrics.py +++ b/tests/evaluation/model/test_BinaryClassificationMetrics.py @@ -1,3 +1,4 @@ +import math from lapixdl.evaluation.model import BinaryClassificationMetrics @@ -26,6 +27,13 @@ def test_recall(): assert bin_class.recall == 0.25 +def test_recall_zero(): + bin_class = BinaryClassificationMetrics(cls=['a', 'b']) + bin_class.TP = 0 + bin_class.FN = 0 + + assert math.isnan(bin_class.recall) + def test_fpr(): bin_class = BinaryClassificationMetrics(cls=['a', 'b']) @@ -41,6 +49,13 @@ def test_specificity(): assert bin_class.specificity == 0.75 +def test_specificity_zero(): + bin_class = BinaryClassificationMetrics(cls=['a', 'b']) + bin_class.TN = 0 + bin_class.FP = 0 + + assert math.isnan(bin_class.specificity) + def test_precision(): bin_class = BinaryClassificationMetrics(cls=['a', 'b']) @@ -49,6 +64,12 @@ def test_precision(): assert bin_class.precision == 0.8 +def test_precision_zero(): + bin_class = BinaryClassificationMetrics(cls=['a', 'b']) + bin_class.TP = 0 + bin_class.FP = 0 + + assert bin_class.precision == 1 def test_f_score(): bin_class = BinaryClassificationMetrics(cls=['a', 'b']) @@ -58,3 +79,12 @@ def test_f_score(): bin_class.FP = 6 assert bin_class.f_score == 0.5 + +def test_f_score_zero(): + bin_class = BinaryClassificationMetrics(cls=['a', 'b']) + bin_class.FN = 0 + bin_class.TN = 0 + bin_class.TP = 0 + bin_class.FP = 0 + + assert bin_class.f_score == 1