Merge pull request #891 from gchq/feature/coverage_fail

Fail coverage workflow if coverage reduces
gchq · Dec 13, 2024 · ef08009 · ef08009
2 parents 0d3e08c + be6aadf
commit ef08009
Show file tree

Hide file tree

Showing 5 changed files with 255 additions and 9 deletions.
diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
@@ -49,18 +49,43 @@ jobs:
         run: uv run pytest tests/unit --cov
       - name: Extract total coverage percentage
         id: cov
-        run: echo "percentage=$( uv run coverage report --format=total )" >> $GITHUB_OUTPUT
+        run: |
+          echo "percentage_int=$( uv run coverage report --format=total )" >> $GITHUB_OUTPUT
+          echo "percentage_float=$( uv run coverage report --format=total --precision=8 )" >> $GITHUB_OUTPUT
+      - name: Check out metadata repo
+        if: github.event_name == 'pull_request'
+        uses: actions/checkout@v4
+        with:
+          repository: gchq/coreax-metadata
+          # GitHub Actions require check out destination to be within coreax/coreax. To
+          # save checking out the main repo into coreax/coreax/coreax, check out the
+          # metadata repo to a nested location inside coreax/coreax. Pick a folder name
+          # that is very unlikely to clash with any current or future folder name
+          # committed to the main coreax repo.
+          path: tmp_coreax-metadata
+      - name: Check for reduction in coverage
+        if: github.event_name == 'pull_request'
+        env:
+          HISTORIC: tmp_coreax-metadata/coverage
+        run: |
+          # Create directory if it doesn't exist yet
+          mkdir -p $HISTORIC
+          uv run tests/coverage/compare.py \
+            ${{ steps.cov.outputs.percentage_float }} \
+            $HISTORIC
       - name: Minimize UV cache
         run: uv cache prune --ci
         if: always()
   coverage-badge:
     name: Update coverage badge
+    # Keep as a separate job to avoid clashes between meta and main repos
     if: github.event_name == 'push'
     # Push coverage badge config to coreax-metadata repo.
     needs:
       - coverage
     env:
-      percentage: ${{ needs.coverage.outputs.percentage }}
+      percentage_int: ${{ needs.coverage.outputs.percentage_int }}
+      percentage_float: ${{ needs.coverage.outputs.percentage_float }}
     runs-on: ubuntu-latest
     steps:
       - name: Generate a GitHub token
@@ -74,24 +99,39 @@ jobs:
         uses: actions/checkout@v4
         with:
           repository: gchq/coreax-metadata
+      - name: Generate high-precision coverage JSON
+        run: |
+          echo "{\"total\": ${{ env.percentage_float }}}" > $RUNNER_TEMP/coverage.json
+      - name: Save high-precision coverage data
+        env:
+          GH_TOKEN: ${{ steps.generate-token.outputs.token }}
+        run: |
+          export message="chore: update precise coverage data for $GITHUB_SHA"
+          export content=$( base64 -i $RUNNER_TEMP/coverage.json )
+          OUT_NAME="coverage/coverage-$(date --utc +%Y-%m-%d--%H-%M-%S)--$GITHUB_SHA--v1.json"
+          gh api --method PUT \
+            /repos/:owner/coreax-metadata/contents/$OUT_NAME \
+            -f message="$message" \
+            -f content="$content"
       - name: Choose badge colour
         id: design
         run: |
           echo "colour=${{
-            env.percentage >= 90 && 'brightgreen' ||
-            env.percentage >= 70 && 'yellow' ||
-            env.percentage >= 50 && 'orange' ||
+            env.percentage_int >= 90 && 'brightgreen' ||
+            env.percentage_int >= 70 && 'yellow' ||
+            env.percentage_int >= 50 && 'orange' ||
             'red'
           }}" >> $GITHUB_OUTPUT
       - name: Generate badge config JSON
+        # Display an integer percentage
         run: |
-          echo "coverage = ${{ env.percentage }}%"
+          echo "coverage = ${{ env.percentage_int }}%"
           echo "colour = ${{ steps.design.outputs.colour }}"
           {
             echo "{"
             echo "  \"schemaVersion\": 1,"
             echo "  \"label\": \"Coverage\","
-            echo "  \"message\": \"${{ env.percentage }}%\","
+            echo "  \"message\": \"${{ env.percentage_int }}%\","
             echo "  \"color\": \"${{ steps.design.outputs.colour }}\""
             echo "}"
           } > $RUNNER_TEMP/badge.json
@@ -104,7 +144,7 @@ jobs:
           BADGE_PATH: coverage/coreax_coverage.json
           GH_TOKEN: ${{ steps.generate-token.outputs.token }}
         run: |
-          export message="chore: update coverage for $GITHUB_SHA"
+          export message="chore: update coverage badge for $GITHUB_SHA"
           export content=$( base64 -i $RUNNER_TEMP/badge.json )
           # Create new file if does not exist yet (or did not exist at checkout)
           if [ ! -f $BADGE_PATH ]; then

diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
@@ -93,7 +93,7 @@ jobs:
           app-id: ${{ vars.WRITE_CONTENTS_PR_APP }}
           private-key: ${{ secrets.WRITE_CONTENTS_PR_KEY }}
           repositories: coreax-metadata
-      - name: Save performance data to Gist
+      - name: Save performance data
         if: github.event_name == 'push'
         env:
           # this is the only step that should actually need write permissions

diff --git a/benchmark/mnist_benchmark.py b/benchmark/mnist_benchmark.py
@@ -632,6 +632,7 @@ def main() -> None:
     }
 
     # Run the experiment with 5 different random keys
+    # pylint: disable=duplicate-code
     for i in range(5):
         print(f"Run {i + 1} of 5:")
         key = jax.random.PRNGKey(i)
@@ -641,6 +642,7 @@ def main() -> None:
                 solver = getter(size)
                 solver_name = get_solver_name(solver)
                 start_time = time.perf_counter()
+                # pylint: enable=duplicate-code
                 coreset, _ = eqx.filter_jit(solver.reduce)(train_data_umap)
 
                 coreset_indices = coreset.nodes.data

diff --git a/tests/coverage/__init__.py b/tests/coverage/__init__.py
@@ -0,0 +1,15 @@
+# © Crown Copyright GCHQ
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Auxiliary package when assessing code coverage."""
diff --git a/tests/coverage/compare.py b/tests/coverage/compare.py
@@ -0,0 +1,189 @@
+# © Crown Copyright GCHQ
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Script that compares coverage data to previously-recorded data."""
+
+import argparse
+import datetime
+import json
+import re
+import sys
+from pathlib import Path
+from typing import Optional
+
+COVERAGE_FILENAME_REGEX = re.compile(
+    r"^coverage"
+    r"-(\d{4})-(\d{2})-(\d{2})"
+    r"--(\d{2})-(\d{2})-(\d{2})"
+    r"--([0-9a-f]{40})"
+    r"--v(\d+)\.json$"
+)
+
+# Set tolerances for reduction in coverage percentage before test fails
+ABSOLUTE_TOLERANCE = 0
+RELATIVE_TOLERANCE = 0
+
+# Increment this if any changes are made to the storage format! Remember to also
+# increment the corresponding value in the `coverage.yml` workflow file.
+CURRENT_DATA_VERSION = 1
+
+
+def parse_args() -> tuple[float, Path]:
+    """
+    Parse command-line arguments.
+
+    :return: Tuple of (coverage total, directory of reference data, hash of commit,
+        commit subject file)
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("coverage_total", help="New total coverage as a percentage.")
+    parser.add_argument(
+        "reference_directory", help="Directory containing historic coverage data."
+    )
+    args = parser.parse_args()
+    return float(args.coverage_total), Path(args.reference_directory)
+
+
+def date_from_filename(path: Path) -> Optional[tuple[datetime.datetime, str]]:
+    """
+    Extract the date from a coverage data file name.
+
+    The current filename format is::
+
+        coverage-YYYY-MM-DD--HH-MM-SS--[40-char git commit hash]--vX.json
+
+    where `YYYY-MM-DD--HH-MM-SS` is the year, month, day, hour, minute, and second
+    that the file was created, the commit hash is for the commit the tests were run
+    against, and the vX at the end is a version number specifier, in case we need to
+    change the format at a later date.
+
+    :param path: The path to the coverage data file. Only the filename component
+        (`path.name`) is used.
+    :return: Tuple (date_time, commit_hash) if the filename matched the expected format,
+        or :data:`None` if it did not match.
+    """
+    filename = path.name
+    match = COVERAGE_FILENAME_REGEX.fullmatch(filename)
+    if not match:
+        return None
+
+    year, month, day, hour, minute, second, git_hash, spec_version = match.groups()
+    if int(spec_version) != CURRENT_DATA_VERSION:
+        # But in future, we could try and extract at least some data?
+        return None
+
+    return datetime.datetime(
+        year=int(year),
+        month=int(month),
+        day=int(day),
+        hour=int(hour),
+        minute=int(minute),
+        second=int(second),
+        tzinfo=datetime.timezone.utc,
+    ), git_hash
+
+
+def get_most_recent_coverage_total(reference_directory: Path) -> float:
+    """
+    Get the most recent saved coverage total in the given directory.
+
+    Uses :py:func:`date_from_filename` to extract the date, time and commit hash from
+    each file name. The date and time are stored with an accuracy of one second, so two
+    data files sharing a time are extremely unlikely but not impossible. In case two
+    data files have the exact same time recorded, the latest file is selected based on
+    the lexicographic ordering of the associated commit hashes.
+
+    :param reference_directory: Directory containing historic coverage data
+    :return: Total coverage extracted from the most recent coverage file, or 0 if no
+        file found
+    """
+    files: dict[Path, tuple[datetime.datetime, str]] = {}
+    for filename in reference_directory.iterdir():
+        date_tuple = date_from_filename(filename)
+        if date_tuple is not None:
+            files[filename] = date_tuple
+
+    if not files:
+        print("**WARNING: No historic coverage data found.**")
+        return 0
+
+    most_recent_file = max(files.keys(), key=files.get)
+
+    with open(most_recent_file, "r", encoding="utf8") as f:
+        coverage_dict = json.load(f)
+
+    return coverage_dict["total"]
+
+
+def check_significant_difference(
+    current_coverage: float, historic_coverage: float
+) -> bool:
+    """
+    Check if the coverage has reduced significantly.
+
+    Print console messages with coverage change. Display full precision for differences
+    but round absolute percentages to two decimal places.
+
+    :param current_coverage: Current coverage total
+    :param historic_coverage: Most recent historic coverage total
+    :return: Is there a significant reduction in coverage?
+    """
+    absolute_loss = historic_coverage - current_coverage
+    relative_loss = absolute_loss / historic_coverage if historic_coverage > 0 else 0
+
+    if absolute_loss == 0:
+        print(f"PASS: Coverage remained the same at {current_coverage:.2f}%.")
+        return False
+    if absolute_loss < 0:
+        print(
+            f"PASS: Coverage increased by {-absolute_loss}% from "
+            f"{historic_coverage:.2f}% to "
+            f"{current_coverage:.2f}%."
+        )
+        return False
+
+    exceed_absolute = absolute_loss > ABSOLUTE_TOLERANCE
+    exceed_relative = relative_loss > RELATIVE_TOLERANCE
+
+    if exceed_absolute or exceed_relative:
+        if exceed_absolute and exceed_relative:
+            tolerance_msg = "absolute and relative tolerances"
+        elif exceed_absolute:
+            tolerance_msg = "absolute tolerance"
+        else:
+            tolerance_msg = "relative tolerance"
+        print(
+            f"FAIL: Coverage reduced by {absolute_loss}% from {historic_coverage:.2f}% "
+            f"to {current_coverage:.2f}%, exceeding {tolerance_msg}."
+        )
+        return True
+
+    print(
+        f"PASS: Coverage reduced slightly by {absolute_loss}% from "
+        f"{historic_coverage:.2f}% to {current_coverage:.2f}%."
+    )
+    return False
+
+
+def main() -> None:  # noqa: C901
+    """Run the command-line script."""
+    current_coverage, reference_directory = parse_args()
+    historic_coverage = get_most_recent_coverage_total(reference_directory)
+    if check_significant_difference(current_coverage, historic_coverage):
+        # Return code 2 to match failure behaviour of coverage
+        sys.exit(2)
+
+
+if __name__ == "__main__":
+    main()