From cdf61a84a0201c2e1e0e7eba0e69c48bd632a84b Mon Sep 17 00:00:00 2001 From: Chandra Sanapala Date: Fri, 17 Jan 2025 19:10:04 +0530 Subject: [PATCH 1/3] feat: add the test coverage baseline as a json file --- tests/baseline.json | 16 ++++ tests/baseline.py | 68 +++++++++++++++++ tests/cases/comparison/is_false.test | 1 + tests/cases/comparison/is_not_false.test | 1 + tests/cases/comparison/is_not_true | 1 + tests/cases/comparison/is_true.test | 1 + tests/test_extensions.py | 97 ++++++++++++++++++++---- 7 files changed, 170 insertions(+), 15 deletions(-) create mode 100644 tests/baseline.json create mode 100644 tests/baseline.py diff --git a/tests/baseline.json b/tests/baseline.json new file mode 100644 index 000000000..c30f17ac5 --- /dev/null +++ b/tests/baseline.json @@ -0,0 +1,16 @@ +{ + "registry": { + "dependency_count": 13, + "extension_count": 13, + "function_count": 165, + "num_aggregate_functions": 29, + "num_scalar_functions": 169, + "num_window_functions": 0, + "num_function_overloads": 517 + }, + "coverage": { + "total_test_count": 1086, + "num_function_variants": 517, + "num_covered_function_variants": 229 + } +} diff --git a/tests/baseline.py b/tests/baseline.py new file mode 100644 index 000000000..c666a3d2e --- /dev/null +++ b/tests/baseline.py @@ -0,0 +1,68 @@ +import json +from dataclasses import dataclass +from typing import List, Dict + +from tests.coverage.coverage import TestCoverage +from tests.coverage.extensions import FunctionRegistry + + +@dataclass +class Registry: + extension_count: int + dependency_count: int + function_count: int + num_aggregate_functions: int + num_scalar_functions: int + num_window_functions: int + num_function_overloads: int + + +@dataclass +class Coverage: + total_test_count: int + num_function_variants: int + num_covered_function_variants: int + + def num_function_variants_without_coverage(self): + return self.num_function_variants - self.num_covered_function_variants + + +@dataclass +class Baseline: + registry: Registry + coverage: Coverage + + @classmethod + def from_dict(cls, data: Dict): + registry_data = data["registry"] + test_coverage_data = data["coverage"] + registry = Registry(**registry_data) + coverage = Coverage(**test_coverage_data) + return cls(registry, coverage) + + def num_function_variants_without_coverage(self): + return self.coverage.num_function_variants_without_coverage() + + +def read_baseline_file(file_path: str) -> Baseline: + with open(file_path, "r") as file: + data = json.load(file) + return Baseline.from_dict(data) + + +def generate_baseline(registry: FunctionRegistry, coverage: TestCoverage): + registry_data = Registry( + extension_count=len(registry.extensions), + dependency_count=len(registry.dependencies), + function_count=len(registry.registry), + num_aggregate_functions=len(registry.aggregate_functions), + num_scalar_functions=len(registry.scalar_functions), + num_window_functions=len(registry.window_functions), + num_function_overloads=sum([len(f) for f in registry.registry.values()]), + ) + test_coverage_data = Coverage( + total_test_count=coverage.test_count, + num_function_variants=coverage.total_function_variants, + num_covered_function_variants=coverage.num_covered_function_variants, + ) + return Baseline(registry=registry_data, coverage=test_coverage_data) diff --git a/tests/cases/comparison/is_false.test b/tests/cases/comparison/is_false.test index c11295ac3..ea2d78b2d 100644 --- a/tests/cases/comparison/is_false.test +++ b/tests/cases/comparison/is_false.test @@ -1,6 +1,7 @@ ### SUBSTRAIT_SCALAR_TEST: v1.0 ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' +# basic is_false is_false(true::bool) = false::bool is_false(false::bool) = true::bool is_false(null::bool) = false::bool diff --git a/tests/cases/comparison/is_not_false.test b/tests/cases/comparison/is_not_false.test index 4301e041f..a19bb53de 100644 --- a/tests/cases/comparison/is_not_false.test +++ b/tests/cases/comparison/is_not_false.test @@ -1,6 +1,7 @@ ### SUBSTRAIT_SCALAR_TEST: v1.0 ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' +# basic is_not_false is_not_false(true::bool) = true::bool is_not_false(false::bool) = false::bool is_not_false(null::bool) = true::bool diff --git a/tests/cases/comparison/is_not_true b/tests/cases/comparison/is_not_true index 3dafdaee1..997735bb1 100644 --- a/tests/cases/comparison/is_not_true +++ b/tests/cases/comparison/is_not_true @@ -1,6 +1,7 @@ ### SUBSTRAIT_SCALAR_TEST: v1.0 ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' +# basic is_not_true is_not_true(true::bool) = false::bool is_not_true(false::bool) = true::bool is_not_true(null::bool) = true::bool diff --git a/tests/cases/comparison/is_true.test b/tests/cases/comparison/is_true.test index f3bb7ff0e..cb1f30170 100644 --- a/tests/cases/comparison/is_true.test +++ b/tests/cases/comparison/is_true.test @@ -1,6 +1,7 @@ ### SUBSTRAIT_SCALAR_TEST: v1.0 ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' +# basic is_true is_true(true::bool) = true::bool is_true(false::bool) = false::bool is_true(null::bool) = false::bool diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 44335f19e..95fff93e1 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -1,42 +1,109 @@ # SPDX-License-Identifier: Apache-2.0 +import json import os +from dataclasses import asdict +from tests.baseline import read_baseline_file, generate_baseline from tests.coverage.case_file_parser import load_all_testcases from tests.coverage.coverage import get_test_coverage from tests.coverage.extensions import build_type_to_short_type from tests.coverage.extensions import Extension +def compare_baselines(expected, actual): + errors = [] + + if actual.registry.extension_count < expected.registry.extension_count: + errors.append( + f"Extension count mismatch: expected {expected.registry.extension_count}, got {actual.registry.extension_count}" + ) + if actual.registry.dependency_count < expected.registry.dependency_count: + errors.append( + f"Dependency count mismatch: expected {expected.registry.dependency_count}, got {actual.registry.dependency_count}" + ) + if actual.registry.function_count < expected.registry.function_count: + errors.append( + f"Function count mismatch: expected {expected.registry.function_count}, got {actual.registry.function_count}" + ) + if ( + actual.registry.num_aggregate_functions + < expected.registry.num_aggregate_functions + ): + errors.append( + f"Aggregate function count mismatch: expected {expected.registry.num_aggregate_functions}, got {actual.registry.num_aggregate_functions}" + ) + if actual.registry.num_scalar_functions < expected.registry.num_scalar_functions: + errors.append( + f"Scalar function count mismatch: expected {expected.registry.num_scalar_functions}, got {actual.registry.num_scalar_functions}" + ) + if actual.registry.num_window_functions < expected.registry.num_window_functions: + errors.append( + f"Window function count mismatch: expected {expected.registry.num_window_functions}, got {actual.registry.num_window_functions}" + ) + if ( + actual.registry.num_function_overloads + < expected.registry.num_function_overloads + ): + errors.append( + f"Function overload count mismatch: expected {expected.registry.num_function_overloads}, got {actual.registry.num_function_overloads}" + ) + + if actual.coverage.total_test_count < expected.coverage.total_test_count: + errors.append( + f"Total test count mismatch: expected {expected.coverage.total_test_count}, got {actual.coverage.total_test_count}" + ) + if actual.coverage.num_function_variants < expected.coverage.num_function_variants: + errors.append( + f"Total function variants mismatch: expected {expected.coverage.num_function_variants}, got {actual.coverage.num_function_variants}" + ) + if ( + actual.coverage.num_covered_function_variants + < expected.coverage.num_covered_function_variants + ): + errors.append( + f"Covered function variants mismatch: expected {expected.coverage.num_covered_function_variants}, got {actual.coverage.num_covered_function_variants}" + ) + + expected_coverage_gap = expected.num_function_variants_without_coverage() + actual_coverage_gap = actual.num_function_variants_without_coverage() + if actual_coverage_gap > expected_coverage_gap: + errors.append( + f"Coverage gap too large: {actual_coverage_gap} function variants with no tests, " + f"out of {actual.coverage.num_function_variants} total function variants. " + f"New functions should be added along with test cases that illustrate their behavior." + ) + + return errors + + # NOTE: this test is run as part of pre-commit hook def test_substrait_extension_coverage(): script_dir = os.path.dirname(os.path.abspath(__file__)) + baseline = read_baseline_file(os.path.join(script_dir, "baseline.json")) extensions_path = os.path.join(script_dir, "../extensions") registry = Extension.read_substrait_extensions(extensions_path) - assert len(registry.registry) >= 161 - num_overloads = sum([len(f) for f in registry.registry.values()]) - assert num_overloads >= 510 - assert len(registry.dependencies) >= 13 - assert len(registry.scalar_functions) >= 162 - assert len(registry.aggregate_functions) >= 29 - assert len(registry.window_functions) >= 0 test_case_dir = os.path.join(script_dir, "./cases") all_test_files = load_all_testcases(test_case_dir) coverage = get_test_coverage(all_test_files, registry) - assert coverage.test_count >= 1077 assert ( coverage.num_tests_with_no_matching_function == 0 ), f"{coverage.num_tests_with_no_matching_function} tests with no matching function" - assert coverage.num_covered_function_variants >= 226 - assert coverage.total_function_variants >= 513 - assert ( - coverage.total_function_variants - coverage.num_covered_function_variants - ) <= 287, ( - f"Coverage gap too large: {coverage.total_function_variants - coverage.num_covered_function_variants} " - f"function variants with no tests, out of {coverage.total_function_variants} total function variants." + + actual_baseline = generate_baseline(registry, coverage) + errors = compare_baselines(baseline, actual_baseline) + assert not errors, ( + "\n".join(errors) + + f"The baseline file does not match the current test coverage. " + f"Please update the file at tests/baseline.json to align with the current baseline" + f"{json.dumps(asdict(actual_baseline), indent=2)}" ) + if baseline != actual_baseline: + print("\nBaseline has changed, updating tests/baseline.json") + print(json.dumps(asdict(actual_baseline), indent=2)) + def test_build_type_to_short_type(): long_to_short = build_type_to_short_type() From fa23172e24976a10df0c6a4a9941d54c3865be42 Mon Sep 17 00:00:00 2001 From: Chandra Sanapala Date: Fri, 17 Jan 2025 19:14:44 +0530 Subject: [PATCH 2/3] add license header --- tests/baseline.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/baseline.py b/tests/baseline.py index c666a3d2e..a60dcfc60 100644 --- a/tests/baseline.py +++ b/tests/baseline.py @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: Apache-2.0 import json from dataclasses import dataclass from typing import List, Dict From b694f60bab778204827bf812d4d379c030d6f39a Mon Sep 17 00:00:00 2001 From: Chandra Sanapala Date: Sat, 18 Jan 2025 04:05:29 +0530 Subject: [PATCH 3/3] address review comments --- tests/baseline.py | 68 +++++++++++++++++++ tests/cases/comparison/is_false.test | 2 +- tests/cases/comparison/is_not_false.test | 2 +- .../{is_not_true => is_not_true.test} | 2 +- tests/cases/comparison/is_true.test | 2 +- tests/test_extensions.py | 68 +------------------ 6 files changed, 73 insertions(+), 71 deletions(-) rename tests/cases/comparison/{is_not_true => is_not_true.test} (80%) diff --git a/tests/baseline.py b/tests/baseline.py index a60dcfc60..135069e7f 100644 --- a/tests/baseline.py +++ b/tests/baseline.py @@ -44,6 +44,74 @@ def from_dict(cls, data: Dict): def num_function_variants_without_coverage(self): return self.coverage.num_function_variants_without_coverage() + def validate_against(self, expected): + errors = [] + + if self.registry.extension_count < expected.registry.extension_count: + errors.append( + f"Extension count mismatch: expected {expected.registry.extension_count}, got {self.registry.extension_count}" + ) + if self.registry.dependency_count < expected.registry.dependency_count: + errors.append( + f"Dependency count mismatch: expected {expected.registry.dependency_count}, got {self.registry.dependency_count}" + ) + if self.registry.function_count < expected.registry.function_count: + errors.append( + f"Function count mismatch: expected {expected.registry.function_count}, got {self.registry.function_count}" + ) + if ( + self.registry.num_aggregate_functions + < expected.registry.num_aggregate_functions + ): + errors.append( + f"Aggregate function count mismatch: expected {expected.registry.num_aggregate_functions}, got {self.registry.num_aggregate_functions}" + ) + if self.registry.num_scalar_functions < expected.registry.num_scalar_functions: + errors.append( + f"Scalar function count mismatch: expected {expected.registry.num_scalar_functions}, got {self.registry.num_scalar_functions}" + ) + if self.registry.num_window_functions < expected.registry.num_window_functions: + errors.append( + f"Window function count mismatch: expected {expected.registry.num_window_functions}, got {self.registry.num_window_functions}" + ) + if ( + self.registry.num_function_overloads + < expected.registry.num_function_overloads + ): + errors.append( + f"Function overload count mismatch: expected {expected.registry.num_function_overloads}, got {self.registry.num_function_overloads}" + ) + + if self.coverage.total_test_count < expected.coverage.total_test_count: + errors.append( + f"Total test count mismatch: expected {expected.coverage.total_test_count}, got {self.coverage.total_test_count}" + ) + if ( + self.coverage.num_function_variants + < expected.coverage.num_function_variants + ): + errors.append( + f"Total function variants mismatch: expected {expected.coverage.num_function_variants}, got {self.coverage.num_function_variants}" + ) + if ( + self.coverage.num_covered_function_variants + < expected.coverage.num_covered_function_variants + ): + errors.append( + f"Covered function variants mismatch: expected {expected.coverage.num_covered_function_variants}, got {self.coverage.num_covered_function_variants}" + ) + + expected_coverage_gap = expected.num_function_variants_without_coverage() + actual_coverage_gap = self.num_function_variants_without_coverage() + if actual_coverage_gap > expected_coverage_gap: + errors.append( + f"Coverage gap too large: {actual_coverage_gap} function variants with no tests, " + f"out of {self.coverage.num_function_variants} total function variants. " + f"New functions should be added along with test cases that illustrate their behavior." + ) + + return errors + def read_baseline_file(file_path: str) -> Baseline: with open(file_path, "r") as file: diff --git a/tests/cases/comparison/is_false.test b/tests/cases/comparison/is_false.test index ea2d78b2d..6cfd337d6 100644 --- a/tests/cases/comparison/is_false.test +++ b/tests/cases/comparison/is_false.test @@ -1,7 +1,7 @@ ### SUBSTRAIT_SCALAR_TEST: v1.0 ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' -# basic is_false +# basic: Basic examples without any special cases is_false(true::bool) = false::bool is_false(false::bool) = true::bool is_false(null::bool) = false::bool diff --git a/tests/cases/comparison/is_not_false.test b/tests/cases/comparison/is_not_false.test index a19bb53de..202b84318 100644 --- a/tests/cases/comparison/is_not_false.test +++ b/tests/cases/comparison/is_not_false.test @@ -1,7 +1,7 @@ ### SUBSTRAIT_SCALAR_TEST: v1.0 ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' -# basic is_not_false +# basic: Basic examples without any special cases is_not_false(true::bool) = true::bool is_not_false(false::bool) = false::bool is_not_false(null::bool) = true::bool diff --git a/tests/cases/comparison/is_not_true b/tests/cases/comparison/is_not_true.test similarity index 80% rename from tests/cases/comparison/is_not_true rename to tests/cases/comparison/is_not_true.test index 997735bb1..58b42781a 100644 --- a/tests/cases/comparison/is_not_true +++ b/tests/cases/comparison/is_not_true.test @@ -1,7 +1,7 @@ ### SUBSTRAIT_SCALAR_TEST: v1.0 ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' -# basic is_not_true +# basic: Basic examples without any special cases is_not_true(true::bool) = false::bool is_not_true(false::bool) = true::bool is_not_true(null::bool) = true::bool diff --git a/tests/cases/comparison/is_true.test b/tests/cases/comparison/is_true.test index cb1f30170..4bddaf2d8 100644 --- a/tests/cases/comparison/is_true.test +++ b/tests/cases/comparison/is_true.test @@ -1,7 +1,7 @@ ### SUBSTRAIT_SCALAR_TEST: v1.0 ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml' -# basic is_true +# basic: Basic examples without any special cases is_true(true::bool) = true::bool is_true(false::bool) = false::bool is_true(null::bool) = false::bool diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 95fff93e1..064a67eaa 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -10,72 +10,6 @@ from tests.coverage.extensions import Extension -def compare_baselines(expected, actual): - errors = [] - - if actual.registry.extension_count < expected.registry.extension_count: - errors.append( - f"Extension count mismatch: expected {expected.registry.extension_count}, got {actual.registry.extension_count}" - ) - if actual.registry.dependency_count < expected.registry.dependency_count: - errors.append( - f"Dependency count mismatch: expected {expected.registry.dependency_count}, got {actual.registry.dependency_count}" - ) - if actual.registry.function_count < expected.registry.function_count: - errors.append( - f"Function count mismatch: expected {expected.registry.function_count}, got {actual.registry.function_count}" - ) - if ( - actual.registry.num_aggregate_functions - < expected.registry.num_aggregate_functions - ): - errors.append( - f"Aggregate function count mismatch: expected {expected.registry.num_aggregate_functions}, got {actual.registry.num_aggregate_functions}" - ) - if actual.registry.num_scalar_functions < expected.registry.num_scalar_functions: - errors.append( - f"Scalar function count mismatch: expected {expected.registry.num_scalar_functions}, got {actual.registry.num_scalar_functions}" - ) - if actual.registry.num_window_functions < expected.registry.num_window_functions: - errors.append( - f"Window function count mismatch: expected {expected.registry.num_window_functions}, got {actual.registry.num_window_functions}" - ) - if ( - actual.registry.num_function_overloads - < expected.registry.num_function_overloads - ): - errors.append( - f"Function overload count mismatch: expected {expected.registry.num_function_overloads}, got {actual.registry.num_function_overloads}" - ) - - if actual.coverage.total_test_count < expected.coverage.total_test_count: - errors.append( - f"Total test count mismatch: expected {expected.coverage.total_test_count}, got {actual.coverage.total_test_count}" - ) - if actual.coverage.num_function_variants < expected.coverage.num_function_variants: - errors.append( - f"Total function variants mismatch: expected {expected.coverage.num_function_variants}, got {actual.coverage.num_function_variants}" - ) - if ( - actual.coverage.num_covered_function_variants - < expected.coverage.num_covered_function_variants - ): - errors.append( - f"Covered function variants mismatch: expected {expected.coverage.num_covered_function_variants}, got {actual.coverage.num_covered_function_variants}" - ) - - expected_coverage_gap = expected.num_function_variants_without_coverage() - actual_coverage_gap = actual.num_function_variants_without_coverage() - if actual_coverage_gap > expected_coverage_gap: - errors.append( - f"Coverage gap too large: {actual_coverage_gap} function variants with no tests, " - f"out of {actual.coverage.num_function_variants} total function variants. " - f"New functions should be added along with test cases that illustrate their behavior." - ) - - return errors - - # NOTE: this test is run as part of pre-commit hook def test_substrait_extension_coverage(): script_dir = os.path.dirname(os.path.abspath(__file__)) @@ -92,7 +26,7 @@ def test_substrait_extension_coverage(): ), f"{coverage.num_tests_with_no_matching_function} tests with no matching function" actual_baseline = generate_baseline(registry, coverage) - errors = compare_baselines(baseline, actual_baseline) + errors = actual_baseline.validate_against(baseline) assert not errors, ( "\n".join(errors) + f"The baseline file does not match the current test coverage. "