diff --git a/complassist/_flict.py b/complassist/_flict.py index a31c05a..ee36395 100644 --- a/complassist/_flict.py +++ b/complassist/_flict.py @@ -30,11 +30,24 @@ def _run_flict( def flict_simplify(expression: str, output_format: str) -> str: """Simplify a license expression using flict""" - return _run_flict("simplify", expression, options=["-of", output_format]) + simplified = _run_flict("simplify", expression, options=["-of", output_format]) + + logging.debug("Simplified '%s' to '%s' using flict", expression, simplified) + + return simplified + + +def flict_simplify_list(expressions: list[str]) -> list[str]: + """Simplify a list of license expressions""" + simplified = [] + for lic in expressions: + simplified.append(flict_simplify(lic, output_format="text")) + + return list(set(simplified)) def flict_outbound_candidate(expression: str, output_format: str) -> str: """Get possible outbound license candidates using flict""" # TODO: `-el` would make this command more helpful but it has an error: # https://github.com/vinland-technology/flict/issues/391 - return _run_flict("outbound-candidate", expression, options=["-of", output_format]) + return _run_flict("outbound-candidate", expression, options=["-nr", "-of", output_format]) diff --git a/complassist/_licensing.py b/complassist/_licensing.py index 230d588..3cd3643 100644 --- a/complassist/_licensing.py +++ b/complassist/_licensing.py @@ -8,7 +8,7 @@ from license_expression import ExpressionError, Licensing, get_spdx_licensing -from ._flict import flict_outbound_candidate, flict_simplify +from ._flict import flict_outbound_candidate, flict_simplify, flict_simplify_list from ._sbom_parse import extract_items_from_cdx_sbom @@ -33,7 +33,16 @@ def _extract_license_expression_and_names_from_sbom( if lic_name := lic_dict.get("name", ""): lic_names.append(lic_name) - return sorted(list(set(lic_expressions))), sorted(list(set(lic_names))) + # Make expressions and names unique, and sort them + expressions = sorted(list(set(lic_expressions))) + # If using flict, simplify these found licenses. Will reduce possible + # duplicates and fix problematic SPDX expressions (e.g. MPL-2.0+) + # That's far more performant than doing that for each license in the SBOM + if use_flict: + expressions = flict_simplify_list(expressions) + names = sorted(list(set(lic_names))) + + return expressions, names def list_all_licenses(sbom_path: str, use_flict: bool = False) -> list[str]: diff --git a/complassist/_sbom_parse.py b/complassist/_sbom_parse.py index e908f8e..6c1a6c2 100644 --- a/complassist/_sbom_parse.py +++ b/complassist/_sbom_parse.py @@ -10,8 +10,8 @@ from ._helpers import read_json_file -def _simplify_licenses_data(licenses_data: list[dict], use_flict: bool = True) -> list[dict]: - """Simplify a list of license ids/expressions/names to a single string, +def _unify_licenses_data(licenses_data: list[dict], use_flict: bool = True) -> list[dict]: + """Convert a list of license ids/expressions/names to a single string, either an expression or a name""" # Case 1: no data @@ -92,15 +92,16 @@ def _shorten_cdx_licenses_item(licenses: list, use_flict: bool = True) -> list: licdata, ) - simplified_license_data = _simplify_licenses_data(collection, use_flict=use_flict) + simplified_license_data = _unify_licenses_data(collection, use_flict=use_flict) return _license_short_to_valid_cdx_item(simplified_license_data) def extract_items_from_component(component: dict, items: list, use_flict: bool) -> dict: """Extract certain items from a single component of a CycloneDX SBOM""" - logging.debug( - "Handling component: purl = %s, name = %s", component.get("purl"), component.get("name") - ) + # Very noisy logging, disabled + # logging.debug( + # "Handling component: purl = %s, name = %s", component.get("purl"), component.get("name") + # ) extraction = {} # Loop requested data points for extraction for item in items: diff --git a/complassist/main.py b/complassist/main.py index d5d44ba..109f309 100644 --- a/complassist/main.py +++ b/complassist/main.py @@ -201,7 +201,7 @@ def configure_logger(args) -> logging.Logger: return log -def main(): # pylint: disable=too-many-branches +def main(): # pylint: disable=too-many-branches, too-many-statements """Main function""" args = parser.parse_args() @@ -262,7 +262,7 @@ def main(): # pylint: disable=too-many-branches pass # Suggest possible outbound licenses based on detected licenses in an SBOM - if args.licensing_command == "outbound": + elif args.licensing_command == "outbound": outbound_candidates = get_outbound_candidate( sbom_path=args.file, simplify=not args.no_simplify )