From 22dae3dffa1654c6ee67597a97e84bd18001cfe5 Mon Sep 17 00:00:00 2001
From: Fabrice Fontaine <fabrice.fontaine@orange.com>
Date: Sun, 17 Dec 2023 15:05:29 +0100
Subject: [PATCH] feat: disable metrics by default

Since commit 06b55f7e2aab435e7964fec90ebb30e15ed87585, to get metrics, a
database query is made for every single CVE. This behavior dramatically
increases processing time. For example, 3 seconds was needed before this
change to scan apache2-bin_2.4.10-10+deb8u12_amd64.deb.tar.gz. After
this change, 36 seconds is needed to extract the metrics of the 60 CVEs
related to apache 2.4.10.

Time increase exponentially with the number of CVEs so the increase is
even greater when parsing a directory (e.g. an IoT firmware dump).

I assume that most users are not that interested with those metrics and
would prefer to get a quick result so only retrieve metrics if the user
asks it through the new --metrics option.

Signed-off-by: Fabrice Fontaine <fabrice.fontaine@orange.com>
---
 README.md                              |  2 +
 cve_bin_tool/cli.py                    |  8 +++
 cve_bin_tool/cve_scanner.py            | 53 ++++++++------
 cve_bin_tool/output_engine/__init__.py |  5 ++
 cve_bin_tool/output_engine/console.py  | 96 ++++++++++++++------------
 test/test_output_engine.py             |  6 ++
 6 files changed, 101 insertions(+), 69 deletions(-)
diff --git a/README.md b/README.md
index 915613b6ce..94dc85c54a 100644
--- a/README.md
+++ b/README.md
@@ -455,6 +455,8 @@ Output:
                         specify multiple output formats by using comma (',') as a separator
                         note: don't use spaces between comma (',') and the output formats.
   <a href="https://github.com/intel/cve-bin-tool/blob/main/doc/MANUAL.md#-c-cvss---cvss-cvss">-c CVSS, --cvss CVSS</a>  minimum CVSS score (as integer in range 0 to 10) to report (default: 0)
+  <a>--metrics</a>
+  retrieve CVE metrics (e.g., EPSS) (default: False)
   <a>--epss-percentile</a>
   minimum EPSS percentile of CVE range between 0 to 100 to report (input value can also be floating point) (default: 0)
   <a>--epss-probability</a>
diff --git a/cve_bin_tool/cli.py b/cve_bin_tool/cli.py
index 43f532aef8..b1a619d28f 100644
--- a/cve_bin_tool/cli.py
+++ b/cve_bin_tool/cli.py
@@ -269,6 +269,12 @@ def main(argv=None):
         help="minimum CVE severity to report (default: low)",
         default="low",
     )
+    output_group.add_argument(
+        "--metrics",
+        action="store_true",
+        help="check for metrics (e.g., EPSS) from found cves",
+        default=False,
+    )
     output_group.add_argument(
         "--epss-percentile",
         action="store",
@@ -893,6 +899,7 @@ def main(argv=None):
 
     with CVEScanner(
         score=score,
+        check_metrics=args["metrics"],
         epss_percentile=epss_percentile,
         epss_probability=epss_probability,
         check_exploits=args["exploits"],
@@ -1017,6 +1024,7 @@ def main(argv=None):
             merge_report=merged_reports,
             affected_versions=args["affected_versions"],
             exploits=args["exploits"],
+            metrics=args["metrics"],
             detailed=args["detailed"],
             vex_filename=args["vex"],
             sbom_filename=args["sbom_output"],
diff --git a/cve_bin_tool/cve_scanner.py b/cve_bin_tool/cve_scanner.py
index 7b359ed53f..a941e8f253 100644
--- a/cve_bin_tool/cve_scanner.py
+++ b/cve_bin_tool/cve_scanner.py
@@ -38,6 +38,7 @@ class CVEScanner:
     def __init__(
         self,
         score: int = 0,
+        check_metrics: bool = False,
         epss_percentile: float = 0.0,
         epss_probability: float = 0.0,
         logger: Logger = None,
@@ -49,6 +50,7 @@ def __init__(
         self.logger = logger or LOGGER.getChild(self.__class__.__name__)
         self.error_mode = error_mode
         self.score = score
+        self.check_metrics = check_metrics
         self.epss_percentile = epss_percentile
         self.epss_probability = epss_probability
         self.products_with_cve = 0
@@ -243,29 +245,34 @@ def get_cves(self, product_info: ProductInfo, triage_data: TriageData):
                             row_dict["cvss_version"] or row["cvss_version"]
                         )
                         # executing query to get metric for CVE
-                        metric_result = self.metric(
-                            (row["cve_number"],),
-                            self.epss_percentile,
-                            self.epss_probability,
-                        )
-                        # row_dict doesnt have metric as key. As it based on result from query on cve_severity table
-                        # declaring row_dict[metric]
-                        row_dict["metric"] = {}
-                        # looping for result of query for metrics.
-                        for key, value in metric_result.items():
-                            row_dict["metric"][key] = [
-                                value[0],
-                                value[1],
-                            ]
-                        # checking if epss percentile filter is applied
-                        if self.epss_percentile > 0.0 or self.epss_probability > 0.0:
-                            # if epss filter is applied and condition is failed to satisfy row_dict["metric"] will be empty
-                            if not row_dict["metric"]:
-                                # continue to not include that particular cve
-                                continue
-                        self.logger.debug(
-                            f'metrics found in CVE {row_dict["cve_number"]}  is {row_dict["metric"]}'
-                        )
+                        if self.check_metrics:
+                            metric_result = self.metric(
+                                (row["cve_number"],),
+                                self.epss_percentile,
+                                self.epss_probability,
+                            )
+                            # row_dict doesnt have metric as key. As it based on result from query on
+                            # cve_severity table declaring row_dict[metric]
+                            row_dict["metric"] = {}
+                            # looping for result of query for metrics.
+                            for key, value in metric_result.items():
+                                row_dict["metric"][key] = [
+                                    value[0],
+                                    value[1],
+                                ]
+                            # checking if epss percentile filter is applied
+                            if (
+                                self.epss_percentile > 0.0
+                                or self.epss_probability > 0.0
+                            ):
+                                # if epss filter is applied and condition is failed to satisfy
+                                # row_dict["metric"] will be empty
+                                if not row_dict["metric"]:
+                                    # continue to not include that particular cve
+                                    continue
+                            self.logger.debug(
+                                f'metrics found in CVE {row_dict["cve_number"]}  is {row_dict["metric"]}'
+                            )
                         cve = CVE(**row_dict)
                         cves.append(cve)
 
diff --git a/cve_bin_tool/output_engine/__init__.py b/cve_bin_tool/output_engine/__init__.py
index 1913ea43d3..2c66fecf7e 100644
--- a/cve_bin_tool/output_engine/__init__.py
+++ b/cve_bin_tool/output_engine/__init__.py
@@ -622,6 +622,7 @@ class OutputEngine:
         detailed (bool)
         vex_filename (str)
         exploits (bool)
+        metrics (bool)
         all_product_data
         sbom_filename (str)
         sbom_type (str)
@@ -660,6 +661,7 @@ def __init__(
         detailed: bool = False,
         vex_filename: str = "",
         exploits: bool = False,
+        metrics: bool = False,
         all_product_data=None,
         sbom_filename: str = "",
         sbom_type: str = "spdx",
@@ -687,6 +689,7 @@ def __init__(
         self.detailed = detailed
         self.vex_filename = vex_filename
         self.exploits = exploits
+        self.metrics = metrics
         self.all_product_data = all_product_data
         self.sbom_filename = sbom_filename
         self.sbom_type = sbom_type
@@ -725,6 +728,7 @@ def output_cves(self, outfile, output_type="console"):
                 self.merge_report,
                 self.affected_versions,
                 self.exploits,
+                self.metrics,
             )
         elif output_type == "html":
             output_html(
@@ -748,6 +752,7 @@ def output_cves(self, outfile, output_type="console"):
                 self.time_of_last_update,
                 self.affected_versions,
                 self.exploits,
+                self.metrics,
                 self.all_product_data,
                 self.offline,
                 outfile,
diff --git a/cve_bin_tool/output_engine/console.py b/cve_bin_tool/output_engine/console.py
index fc31632ace..5bf364d7fd 100644
--- a/cve_bin_tool/output_engine/console.py
+++ b/cve_bin_tool/output_engine/console.py
@@ -50,6 +50,7 @@ def _output_console_nowrap(
     time_of_last_update: datetime,
     affected_versions: int,
     exploits: bool = False,
+    metrics: bool = False,
     all_product_data=None,
     offline: bool = False,
     width: int = None,
@@ -218,8 +219,9 @@ def _output_console_nowrap(
         table.add_column("Source")
         table.add_column("Severity")
         table.add_column("Score (CVSS Version)")
-        table.add_column("EPSS probability")
-        table.add_column("EPSS percentile")
+        if metrics:
+            table.add_column("EPSS probability")
+            table.add_column("EPSS percentile")
         if affected_versions != 0:
             table.add_column("Affected Versions")
 
@@ -239,9 +241,10 @@ def _output_console_nowrap(
                 Text.styled(cve_data["source"], color),
                 Text.styled(cve_data["severity"], color),
                 Text.styled(cvss_text, color),
-                Text.styled(cve_data["epss_probability"], color),
-                Text.styled(cve_data["epss_percentile"], color),
             ]
+            if metrics:
+                cells.append(Text.styled(cve_data["epss_probability"], color))
+                cells.append(Text.styled(cve_data["epss_percentile"], color))
             if affected_versions != 0:
                 cells.append(Text.styled(cve_data["affected_versions"], color))
             table.add_row(*cells)
@@ -338,46 +341,47 @@ def validate_cell_length(cell_name, cell_type):
         # Print the table to the console
         console.print(table)
 
-    table = Table()
-    # Add Head Columns to the Table
-    table.add_column("CVE")
-    table.add_column("CVSS_version")
-    table.add_column("CVSS_score")
-    table.add_column("EPSS_probability")
-    table.add_column("EPSS_percentile")
-    color = "green"
-
-    cve_by_metrics: defaultdict[Remarks, list[dict[str, str]]] = defaultdict(list)
-    # group cve_data by its remarks and separately by paths
-    for product_info, cve_data in all_cve_data.items():
-        for cve in cve_data["cves"]:
-            probability = "-"
-            percentile = "-"
-            for metric, field in cve.metric.items():
-                if metric == "EPSS":
-                    probability = round(field[0], 5)
-                    percentile = field[1]
-            cve_by_metrics[cve.remarks].append(
-                {
-                    "cve_number": cve.cve_number,
-                    "cvss_version": str(cve.cvss_version),
-                    "cvss_score": str(cve.score),
-                    "epss_probability": str(probability),
-                    "epss_percentile": str(percentile),
-                    "severity": cve.severity,
-                }
-            )
+    if metrics:
+        table = Table()
+        # Add Head Columns to the Table
+        table.add_column("CVE")
+        table.add_column("CVSS_version")
+        table.add_column("CVSS_score")
+        table.add_column("EPSS_probability")
+        table.add_column("EPSS_percentile")
+        color = "green"
 
-    for remarks in sorted(cve_by_remarks):
-        color = remarks_colors[remarks]
-        for cve in cve_by_metrics[remarks]:
-            color = cve["severity"].split("-")[0].lower()
-            cells = [
-                Text.styled(cve["cve_number"], color),
-                Text.styled(cve["cvss_version"], color),
-                Text.styled(str(cve["cvss_score"]), color),
-                Text.styled(cve["epss_probability"], color),
-                Text.styled(cve["epss_percentile"], color),
-            ]
-            table.add_row(*cells)
-        console.print(table)
+        cve_by_metrics: defaultdict[Remarks, list[dict[str, str]]] = defaultdict(list)
+        # group cve_data by its remarks and separately by paths
+        for product_info, cve_data in all_cve_data.items():
+            for cve in cve_data["cves"]:
+                probability = "-"
+                percentile = "-"
+                for metric, field in cve.metric.items():
+                    if metric == "EPSS":
+                        probability = round(field[0], 5)
+                        percentile = field[1]
+                cve_by_metrics[cve.remarks].append(
+                    {
+                        "cve_number": cve.cve_number,
+                        "cvss_version": str(cve.cvss_version),
+                        "cvss_score": str(cve.score),
+                        "epss_probability": str(probability),
+                        "epss_percentile": str(percentile),
+                        "severity": cve.severity,
+                    }
+                )
+
+        for remarks in sorted(cve_by_remarks):
+            color = remarks_colors[remarks]
+            for cve in cve_by_metrics[remarks]:
+                color = cve["severity"].split("-")[0].lower()
+                cells = [
+                    Text.styled(cve["cve_number"], color),
+                    Text.styled(cve["cvss_version"], color),
+                    Text.styled(str(cve["cvss_score"]), color),
+                    Text.styled(cve["epss_probability"], color),
+                    Text.styled(cve["epss_percentile"], color),
+                ]
+                table.add_row(*cells)
+            console.print(table)
diff --git a/test/test_output_engine.py b/test/test_output_engine.py
index dde479a4bc..b2ae760d4a 100644
--- a/test/test_output_engine.py
+++ b/test/test_output_engine.py
@@ -930,6 +930,7 @@ def test_output_console(self):
         time_of_last_update = datetime.today()
         affected_versions = 0
         exploits = False
+        metrics = True
         console = Console(file=self.mock_file)
         outfile = None
         all_product_data = None
@@ -940,6 +941,7 @@ def test_output_console(self):
             time_of_last_update,
             affected_versions,
             exploits,
+            metrics,
             all_product_data,
             True,
             120,
@@ -973,6 +975,7 @@ def test_output_console_affected_versions(self):
         time_of_last_update = datetime.today()
         affected_versions = 1
         exploits = False
+        metrics = True
         console = Console(file=self.mock_file)
         outfile = None
         all_product_data = None
@@ -983,6 +986,7 @@ def test_output_console_affected_versions(self):
             time_of_last_update,
             affected_versions,
             exploits,
+            metrics,
             all_product_data,
             True,
             120,
@@ -1022,6 +1026,7 @@ def test_output_console_outfile(self):
         time_of_last_update = datetime.today()
         affected_versions = 0
         exploits = False
+        metrics = True
         outfile = tmpf.name
         all_product_data = None
 
@@ -1031,6 +1036,7 @@ def test_output_console_outfile(self):
             time_of_last_update,
             affected_versions,
             exploits,
+            metrics,
             all_product_data,
             True,
             120,