Skip to content

Commit

Permalink
feat(script): Verify checker labelling invariants
Browse files Browse the repository at this point in the history
There were multiple occasions in the past at which point it was realised
that the checker-labelling in the configuration files did not conform to
some "invariants" which we all, or mostly, agreed upon to upkeep, but
only did so ad-hoc and implicitly, without any tooling-based
verification.

N.B., violation of these invariants did not result in **serious** issues
or bugs, but often caused weird and quirky behaviour which was
non-trivial to understand for users.

This patch extends the `label-tool` with facilities to verify and, if
meaningful for the specific rule, automatically fix the label set of
individual checkers as one entity (as opposed to the `doc_url_generate`
or `severity_generate` tools, which only considered the value of a
unique single label), upholding invariants **between** labels, e.g.,
`profile:` labels.

This patch contains 4 implemented rules:

 - `profile:default` ⊆ `profile:sensitive` ⊆ `profile:extreme`
 - `guideline:sei-cert` ⇔ `sei-cert:<rule-number>`
 - `guideline:sei-cert` ⇒ `profile:security`
 - clangsa (alpha.* ∪ debug.*) ⇒ ¬( `profile:(default|sensitive|extreme|security|portability)` )
  • Loading branch information
whisperity committed Jul 9, 2024
1 parent 962f449 commit 414cee2
Show file tree
Hide file tree
Showing 21 changed files with 1,262 additions and 7 deletions.
2 changes: 2 additions & 0 deletions scripts/labels/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from . import \
checker_labels, \
exception, \
fixit, \
http_, \
output, \
projects, \
Expand All @@ -27,6 +28,7 @@
"checker_labels",
"codechecker",
"exception",
"fixit",
"http_",
"output",
"projects",
Expand Down
7 changes: 6 additions & 1 deletion scripts/labels/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
try:
from .doc_url.generate_tool import __main__ as doc_url_generate
from .doc_url.verify_tool import __main__ as doc_url_verify
from .invariant_check.tool import __main__ as invariant_check
from .severity.generate_tool import __main__ as severity_generate
except ModuleNotFoundError as e:
import traceback
Expand All @@ -39,7 +40,10 @@ def args() -> argparse.ArgumentParser:
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
subparsers = parser.add_subparsers(
title="subcommands",
description="Please select a subcommand to continue.",
description="""
'label-tool' is a collection of semi-individual sub-tools.
Please select one to continue.
""",
dest="subcommand",
required=True)

Expand All @@ -56,6 +60,7 @@ def add_subparser(package):

add_subparser(doc_url_generate)
add_subparser(doc_url_verify)
add_subparser(invariant_check)
add_subparser(severity_generate)

return parser
Expand Down
119 changes: 115 additions & 4 deletions scripts/labels/checker_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,28 @@
from codechecker_common.checker_labels import split_label_kv

from .output import Settings as OutputSettings, error, trace
from . import fixit


# The raw label structure, as present verbatim in the configuration file.
# (e.g., {"bugprone-foo": ["severity:HIGH", "profile:default"]})
_ConfigFileLabels = Dict[str, List[str]]


# Maps: checker -> single label value | None
# (e.g., {"bugprone-foo": "HIGH"})
SingleLabels = Dict[str, Optional[str]]
Labels = Dict[str, Dict[str, str]]

# Maps the label keys into a list of label values with that key.
# (e.g., {"profile": ["default"], "severity": ["HIGH"]})
KeySplitLabels = Dict[str, List[str]]

# Maps: checker -> [label key -> label values...]...
# (e.g., {"bugprone-foo": {"profile": ["default", "sensitive", "extreme"]}}
MultipleLabels = Dict[str, KeySplitLabels]


K_Labels = "labels"
K_LabelToolSkipDirective = "label-tool-skip"


Expand Down Expand Up @@ -123,7 +137,7 @@ def get_checkers_with_ignore_of_key(path: pathlib.Path,
checker's labels.
"""
try:
label_cfg = cast(_ConfigFileLabels, _load_json(path)["labels"])
label_cfg = cast(_ConfigFileLabels, _load_json(path)[K_Labels])
except KeyError:
error("'%s' is not a label config file", path)
raise
Expand Down Expand Up @@ -155,7 +169,7 @@ def get_checker_labels(
`skip_directive_handling`'s value.
"""
try:
label_cfg = cast(_ConfigFileLabels, _load_json(path)["labels"])
label_cfg = cast(_ConfigFileLabels, _load_json(path)[K_Labels])
except KeyError:
error("'%s' is not a label config file", path)
raise
Expand All @@ -182,6 +196,64 @@ def get_checker_labels(
for checker, labels in filtered_labels.items()}


def get_checker_labels_multiple(path: pathlib.Path) -> MultipleLabels:
"""
Loads the checker config label file available at `path` and transfors it
into a `MultipleLabels` structure, and returns it.
This method **DOES NOT** respect the ``label-tool-skip`` directives.
"""
try:
label_cfg = cast(_ConfigFileLabels, _load_json(path)[K_Labels])
except KeyError:
error("'%s' is not a label config file", path)
raise

return {
checker: {
key: [label_v2
for label_kv2 in labels
for label_k2, label_v2 in (split_label_kv(label_kv2),)
if label_k2 == key
]
for label_kv in labels
for key, _ in (split_label_kv(label_kv),)
}
for checker, labels in label_cfg.items()
}


def apply_label_fixes(labels: MultipleLabels,
fixes: fixit.FixMap) -> MultipleLabels:
"""
Applies the `FixAction`s in `fixes` to the `labels` structure, in place.
(Returns a reference to the input `labels` parameter.)
The `fixes` are applied in the order they appear in the input.
Consistency and order-independence of the resulting actions are **NOT**
verified by this function, please see `fixit.filter_conflicting_fixes`
for that.
"""
for checker, fix_actions in fixes.items():
checker_labels: KeySplitLabels = labels.get(checker, dict())

for fix in fix_actions:
if isinstance(fix, (fixit.ModifyLabelAction,
fixit.RemoveLabelAction)):
ok, ov = split_label_kv(cast(str, fix.old))
checker_labels[ok] = [
v_ for v_ in checker_labels.get(ok, list()) if v_ != ov]

if isinstance(fix, (fixit.AddLabelAction,
fixit.ModifyLabelAction)):
nk, nv = split_label_kv(cast(str, fix.new))
checker_labels[nk] = checker_labels.get(nk, list()) + [nv]

labels[checker] = checker_labels

return labels


def update_checker_labels(
analyser: str,
path: pathlib.Path,
Expand All @@ -204,7 +276,7 @@ def update_checker_labels(
"""
try:
config = _load_json(path)
label_cfg = cast(_ConfigFileLabels, config["labels"])
label_cfg = cast(_ConfigFileLabels, config[K_Labels])
except KeyError:
error("'%s's '%s' is not a label config file", analyser, path)
raise
Expand Down Expand Up @@ -245,3 +317,42 @@ def update_checker_labels(
label_cfg[checker] = sorted(checker_labels)

_save_json(path, config)


def update_checker_labels_multiple_overwrite(
analyser: str,
path: pathlib.Path,
labels: MultipleLabels
):
"""
Loads the checker config label file available at `path` and updates it to
reflect the **CONTENTS** of `labels`.
Entries in the file which do not have a corresponding key in `labels` are
left intact, but the labels for which **ANY** value exists in `labels` are
**OVERWRITTEN** as a single entity.
To mark a checker for **DELETION**, map it in `labels` to an explicit
`None`.
This method **DOES NOT** respect the ``label-tool-skip`` directives.
"""
try:
config = _load_json(path)
label_cfg = cast(_ConfigFileLabels, config[K_Labels])
except KeyError:
error("'%s's '%s' is not a label config file", analyser, path)
raise

for checker, kvs in labels.items():
if kvs is None:
try:
del label_cfg[checker]
except KeyError:
pass
continue

label_cfg[checker] = sorted({f"{k}:{v}"
for k, vs in kvs.items()
for v in vs
})

_save_json(path, config)
1 change: 1 addition & 0 deletions scripts/labels/doc_url/generate_tool/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -------------------------------------------------------------------------
#
# Part of the CodeChecker project, under the Apache License v2.0 with
# LLVM Exceptions. See LICENSE for license information.
Expand Down
2 changes: 1 addition & 1 deletion scripts/labels/doc_url/verify_tool/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def args(parser: Optional[argparse.ArgumentParser]) -> argparse.ArgumentParser:
default=cpu_count(),
help="""
The number of parallel processes to use for querying the validity of the
"documentation URLs.
documentation URLs.
Defaults to all available logical cores.
""")

Expand Down
9 changes: 8 additions & 1 deletion scripts/labels/doc_url/verify_tool/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,14 @@


class Worker(_Singleton):
"""Implementation of methods executed in a parallel computation context."""
"""
Implementation of methods executed in a parallel computation context.
This object, living as a globalised `_Singleton`, may bind objects of
global state (with an **UNSYNCHRONISED** copy living in each interpreter
process as spawned by the `Pool`) that can not be transmitted through the
usual IPC means used by ``Pool.map()``.
"""

def __init__(self):
"""Returns the instance that was loaded as a `Singleton`."""
Expand Down
151 changes: 151 additions & 0 deletions scripts/labels/fixit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
# -------------------------------------------------------------------------
#
# Part of the CodeChecker project, under the Apache License v2.0 with
# LLVM Exceptions. See LICENSE for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# -------------------------------------------------------------------------
"""
Represents simple string transformations that appear in the context of
automatic fixes, and supports conflict-checking the set of these actions
whether they can be applied cleanly.
"""
from itertools import groupby
from typing import Dict, List, Optional, Tuple

from .util import remove_falsy_mapped


class FixAction:
"""
Represents a singular fix action (the changing of a single label entry)
difference.
"""

def __init__(self, kind: str,
old_label: Optional[str], new_label: Optional[str]):
self._kind = kind
self.old = old_label
self.new = new_label

def __repr__(self) -> str:
return "FixAction" \
'(' \
f"'{self._kind}', " \
f"{'%s'.format(self.old) if self.old else 'None'}, " \
f"{'%s'.format(self.new) if self.old else 'None'}" \
')'

def __eq__(self, rhs) -> bool:
if not isinstance(rhs, FixAction):
return False
return (self._kind, self.old, self.new) \
== (rhs._kind, rhs.old, rhs.new)

def __hash__(self):
return hash((self._kind, self.old, self.new))

def __lt__(self, rhs):
if not isinstance(rhs, FixAction):
raise TypeError(
"'<' not supported between instances of '%s' and '%s'"
% (type(self), type(rhs)))
return (self._kind, self.old, self.new) < (rhs._kind, rhs.old, rhs.new)


class AddLabelAction(FixAction):
"""Represents inserting a new label into the label set of a checker."""

def __init__(self, label: str):
super().__init__('+', None, label)

def __repr__(self) -> str:
return f"+++ {self.new}"


class RemoveLabelAction(FixAction):
"""Represents removing a label from the label set of a checker."""

def __init__(self, label: str):
super().__init__('-', label, None)

def __repr__(self) -> str:
return f"--- {self.old}"


class ModifyLabelAction(FixAction):
"""
Represents changing a label from one value to another.
Only a complete change is modelled, sub-string changes would be
too granular.
"""

def __init__(self, old: str, new: str):
super().__init__('~', old, new)

def __repr__(self) -> str:
return f"~~~ {self.old} -> {self.new}"


def filter_conflicting_fixes(fixes: List[FixAction]) \
-> Tuple[bool, List[FixAction]]:
"""
Checks whether a list of `fixes` are non-conflicting, that is, can be
applied safely to any existing collection.
Returns whether there were any conflicts, and the non-conflicting subset
of the `FixAction`s, which might be empty.
The following conflict cases are detected:
* The same value is added (`AddLabelAction`) and removed
(`RemoveLabelAction`) at the same time.
* A `ModifyLabelAction` changes FROM a value that is also being added.
* A `ModifyLabelAction` changes TO a value that is also being removed.
* Multiple `ModifyLabelAction`s would change the same value TO
different labels.
Notably, the following cases are **NOT** errors, albeit being somewhat
redundant:
* Adding (`AddLabelAction`) the same label multiple times.
* Removing (`RemoveLabelAction`) the same label multiple times.
* Multiple `ModifyLabelAction`s (with different ``old`` value!)
changing the values to the same ``new`` value.
"""
conflict_free = True
safe_fixes: List[FixAction] = list(fixes)

all_labels = ({action.old for action in fixes} |
{action.new for action in fixes}) - {None}
add_set = remove_falsy_mapped(
{label: [action for action in fixes if action.new == label]
for label in all_labels})
remove_set = remove_falsy_mapped(
{label: [action for action in fixes if action.old == label]
for label in all_labels})

add_and_remove = add_set.keys() & remove_set.keys()
if add_and_remove:
conflict_free = False
safe_fixes = list(filter(lambda f: f.old not in add_and_remove
and f.new not in add_and_remove,
safe_fixes))

for multiple_remove_of_same in filter(lambda kv: len(kv[1]) > 1,
remove_set.items()):
label, changes = multiple_remove_of_same
grp = groupby(changes)
if next(grp, True) and not next(grp, False):
# All changes are operator ==-equal, just duplicates exist.
continue

conflict_free = False
safe_fixes = list(filter(lambda f: f.old != label,
safe_fixes))

return conflict_free, safe_fixes


FixMap = Dict[str, List[FixAction]]
Loading

0 comments on commit 414cee2

Please sign in to comment.