substrait-io · EpsilonPrime · Jan 24, 2025 · Jan 17, 2025 · Jan 17, 2025 · Jan 17, 2025
@@ -0,0 +1,16 @@
+{
+   "registry": {
+     "dependency_count": 13,
+     "extension_count": 13,
+     "function_count": 165,
+     "num_aggregate_functions": 29,
+     "num_scalar_functions": 169,
+     "num_window_functions": 0,
+     "num_function_overloads": 517
+   },
+   "coverage": {
+     "total_test_count": 1086,
+     "num_function_variants": 517,
+     "num_covered_function_variants": 229
+   }
+}
@@ -0,0 +1,137 @@
+# SPDX-License-Identifier: Apache-2.0
+import json
+from dataclasses import dataclass
+from typing import List, Dict
+
+from tests.coverage.coverage import TestCoverage
+from tests.coverage.extensions import FunctionRegistry
+
+
+@dataclass
+class Registry:
+    extension_count: int
+    dependency_count: int
+    function_count: int
+    num_aggregate_functions: int
+    num_scalar_functions: int
+    num_window_functions: int
+    num_function_overloads: int
+
+
+@dataclass
+class Coverage:
+    total_test_count: int
+    num_function_variants: int
+    num_covered_function_variants: int
+
+    def num_function_variants_without_coverage(self):
+        return self.num_function_variants - self.num_covered_function_variants
+
+
+@dataclass
+class Baseline:
+    registry: Registry
+    coverage: Coverage
+
+    @classmethod
+    def from_dict(cls, data: Dict):
+        registry_data = data["registry"]
+        test_coverage_data = data["coverage"]
+        registry = Registry(**registry_data)
+        coverage = Coverage(**test_coverage_data)
+        return cls(registry, coverage)
+
+    def num_function_variants_without_coverage(self):
+        return self.coverage.num_function_variants_without_coverage()
+
+    def validate_against(self, expected):
+        errors = []
+
+        if self.registry.extension_count < expected.registry.extension_count:
+            errors.append(
+                f"Extension count mismatch: expected {expected.registry.extension_count}, got {self.registry.extension_count}"
+            )
+        if self.registry.dependency_count < expected.registry.dependency_count:
+            errors.append(
+                f"Dependency count mismatch: expected {expected.registry.dependency_count}, got {self.registry.dependency_count}"
+            )
+        if self.registry.function_count < expected.registry.function_count:
+            errors.append(
+                f"Function count mismatch: expected {expected.registry.function_count}, got {self.registry.function_count}"
+            )
+        if (
+            self.registry.num_aggregate_functions
+            < expected.registry.num_aggregate_functions
+        ):
+            errors.append(
+                f"Aggregate function count mismatch: expected {expected.registry.num_aggregate_functions}, got {self.registry.num_aggregate_functions}"
+            )
+        if self.registry.num_scalar_functions < expected.registry.num_scalar_functions:
+            errors.append(
+                f"Scalar function count mismatch: expected {expected.registry.num_scalar_functions}, got {self.registry.num_scalar_functions}"
+            )
+        if self.registry.num_window_functions < expected.registry.num_window_functions:
+            errors.append(
+                f"Window function count mismatch: expected {expected.registry.num_window_functions}, got {self.registry.num_window_functions}"
+            )
+        if (
+            self.registry.num_function_overloads
+            < expected.registry.num_function_overloads
+        ):
+            errors.append(
+                f"Function overload count mismatch: expected {expected.registry.num_function_overloads}, got {self.registry.num_function_overloads}"
+            )
+
+        if self.coverage.total_test_count < expected.coverage.total_test_count:
+            errors.append(
+                f"Total test count mismatch: expected {expected.coverage.total_test_count}, got {self.coverage.total_test_count}"
+            )
+        if (
+            self.coverage.num_function_variants
+            < expected.coverage.num_function_variants
+        ):
+            errors.append(
+                f"Total function variants mismatch: expected {expected.coverage.num_function_variants}, got {self.coverage.num_function_variants}"
+            )
+        if (
+            self.coverage.num_covered_function_variants
+            < expected.coverage.num_covered_function_variants
+        ):
+            errors.append(
+                f"Covered function variants mismatch: expected {expected.coverage.num_covered_function_variants}, got {self.coverage.num_covered_function_variants}"
+            )
+
+        expected_coverage_gap = expected.num_function_variants_without_coverage()
+        actual_coverage_gap = self.num_function_variants_without_coverage()
+        if actual_coverage_gap > expected_coverage_gap:
+            errors.append(
+                f"Coverage gap too large: {actual_coverage_gap} function variants with no tests, "
+                f"out of {self.coverage.num_function_variants} total function variants. "
+                f"New functions should be added along with test cases that illustrate their behavior."
+            )
+
+        return errors
+
+
+def read_baseline_file(file_path: str) -> Baseline:
+    with open(file_path, "r") as file:
+        data = json.load(file)
+    return Baseline.from_dict(data)
+
+
+def generate_baseline(registry: FunctionRegistry, coverage: TestCoverage):
+    registry_data = Registry(
+        extension_count=len(registry.extensions),
+        dependency_count=len(registry.dependencies),
+        function_count=len(registry.registry),
+        num_aggregate_functions=len(registry.aggregate_functions),
+        num_scalar_functions=len(registry.scalar_functions),
+        num_window_functions=len(registry.window_functions),
+        num_function_overloads=sum([len(f) for f in registry.registry.values()]),
+    )
+    test_coverage_data = Coverage(
+        total_test_count=coverage.test_count,
+        num_function_variants=coverage.total_function_variants,
+        num_covered_function_variants=coverage.num_covered_function_variants,
+    )
+    return Baseline(registry=registry_data, coverage=test_coverage_data)
@@ -1,6 +1,7 @@
 ### SUBSTRAIT_SCALAR_TEST: v1.0
 ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
 
+# basic: Basic examples without any special cases
 is_false(true::bool) = false::bool
 is_false(false::bool) = true::bool
 is_false(null::bool) = false::bool
@@ -1,6 +1,7 @@
 ### SUBSTRAIT_SCALAR_TEST: v1.0
 ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
 
+# basic: Basic examples without any special cases
 is_not_false(true::bool) = true::bool
 is_not_false(false::bool) = false::bool
 is_not_false(null::bool) = true::bool
@@ -1,6 +1,7 @@
 ### SUBSTRAIT_SCALAR_TEST: v1.0
 ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
 
+# basic: Basic examples without any special cases
 is_not_true(true::bool) = false::bool
 is_not_true(false::bool) = true::bool
 is_not_true(null::bool) = true::bool
@@ -1,6 +1,7 @@
 ### SUBSTRAIT_SCALAR_TEST: v1.0
 ### SUBSTRAIT_INCLUDE: '/extensions/functions_comparison.yaml'
 
+# basic: Basic examples without any special cases
 is_true(true::bool) = true::bool
 is_true(false::bool) = false::bool
 is_true(null::bool) = false::bool
@@ -1,6 +1,9 @@
 # SPDX-License-Identifier: Apache-2.0
+import json
 import os
+from dataclasses import asdict
 
+from tests.baseline import read_baseline_file, generate_baseline
 from tests.coverage.case_file_parser import load_all_testcases
 from tests.coverage.coverage import get_test_coverage
 from tests.coverage.extensions import build_type_to_short_type
@@ -10,33 +13,31 @@
 # NOTE: this test is run as part of pre-commit hook
 def test_substrait_extension_coverage():
     script_dir = os.path.dirname(os.path.abspath(__file__))
+    baseline = read_baseline_file(os.path.join(script_dir, "baseline.json"))
     extensions_path = os.path.join(script_dir, "../extensions")
     registry = Extension.read_substrait_extensions(extensions_path)
-    assert len(registry.registry) >= 161
-    num_overloads = sum([len(f) for f in registry.registry.values()])
-    assert num_overloads >= 510
-    assert len(registry.dependencies) >= 13
-    assert len(registry.scalar_functions) >= 162
-    assert len(registry.aggregate_functions) >= 29
-    assert len(registry.window_functions) >= 0
 
     test_case_dir = os.path.join(script_dir, "./cases")
     all_test_files = load_all_testcases(test_case_dir)
     coverage = get_test_coverage(all_test_files, registry)
 
-    assert coverage.test_count >= 1077
     assert (
         coverage.num_tests_with_no_matching_function == 0
     ), f"{coverage.num_tests_with_no_matching_function} tests with no matching function"
-    assert coverage.num_covered_function_variants >= 226
-    assert coverage.total_function_variants >= 513
-    assert (
-        coverage.total_function_variants - coverage.num_covered_function_variants
-    ) <= 287, (
-        f"Coverage gap too large: {coverage.total_function_variants - coverage.num_covered_function_variants} "
-        f"function variants with no tests, out of {coverage.total_function_variants} total function variants."
+
+    actual_baseline = generate_baseline(registry, coverage)
+    errors = actual_baseline.validate_against(baseline)
+    assert not errors, (
+        "\n".join(errors)
+        + f"The baseline file does not match the current test coverage. "
+        f"Please update the file at tests/baseline.json to align with the current baseline"
+        f"{json.dumps(asdict(actual_baseline), indent=2)}"
     )
 
+    if baseline != actual_baseline:
+        print("\nBaseline has changed, updating tests/baseline.json")
+        print(json.dumps(asdict(actual_baseline), indent=2))
+
 
 def test_build_type_to_short_type():
     long_to_short = build_type_to_short_type()