From 46b5a9b69174ed761d97f1513b057d149e8f8396 Mon Sep 17 00:00:00 2001
From: Jonas Witschel <diabonas@archlinux.org>
Date: Sat, 30 Oct 2021 14:04:17 +0200
Subject: [PATCH] extractors/oracle.py: add extractor for Oracle Critical Patch
 Updates

The descriptions are autogenerated by Oracle and need to be edited manually,
but that is still quicker than having to copy and paste everything by hand.
---
 README.md              | 18 +++++++++-
 extractors/__init__.py |  2 ++
 extractors/oracle.py   | 77 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 96 insertions(+), 1 deletion(-)
 create mode 100644 extractors/oracle.py

diff --git a/README.md b/README.md
index 9489ce5..9c1868d 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ Security Tracker](https://github.com/archlinux/arch-security-tracker) easier.
 
 * CVE entry extraction from multiple sources (currently
   [Chromium](#chromium), [GitLab](#gitlab), [Mozilla](#mozilla),
-  [NVD](#national-vulnerability-database-nvd) and [WebKitGTK](#webkitgtk)) into
+  [NVD](#national-vulnerability-database-nvd), [Oracle](#oracle) and [WebKitGTK](#webkitgtk)) into
   a JSON format consumable by the tracker
 * Automatic batch addition of the extracted CVE entries to the tracker
 
@@ -91,6 +91,22 @@ and attack vector are derived from the CVSS v3 if present (this usually takes a
 few day after the CVE has been published). The type of the vulnerability is
 always set to "Unknown" and needs to be filled by hand by the user.
 
+### Oracle
+
+```sh
+./trackertools extract oracle URL...
+```
+extracts CVEs issued by [Oracle](https://www.oracle.com/security-alerts/),
+where `URL` is the URL of the *verbose text form* of an Oracle Critical Patch
+Update (CPU), e.g.
+<https://www.oracle.com/security-alerts/cpuoct2021verbose.html>.
+
+The description is taken verbatim from the adivsory (and is therefore quite low
+quality because these texts are autogenerated). Severity and attack vector are
+derived from the CVSS v3. The URL of the advisory is used as a reference. The
+type of the vulnerability is always set to "Unknown" and needs to be filled in
+by hand by the user.
+
 ### WebKitGTK
 
 ```sh
diff --git a/extractors/__init__.py b/extractors/__init__.py
index 8e3bbb3..e5fa0f9 100644
--- a/extractors/__init__.py
+++ b/extractors/__init__.py
@@ -6,6 +6,7 @@
 from .gitlab import gitlab
 from .mozilla import mozilla
 from .nvd import nvd
+from .oracle import oracle
 from .webkitgtk import webkitgtk
 
 
@@ -19,4 +20,5 @@ def extract():
 extract.add_command(gitlab)
 extract.add_command(mozilla)
 extract.add_command(nvd)
+extract.add_command(oracle)
 extract.add_command(webkitgtk)
diff --git a/extractors/oracle.py b/extractors/oracle.py
new file mode 100644
index 0000000..aff3818
--- /dev/null
+++ b/extractors/oracle.py
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: MIT
+
+import click
+import json
+import re
+import requests
+from lxml import etree
+
+
+def parse_severity(cvss):
+    base_score = re.search("Base Score ([0-9]+\.[0-9])", cvss)
+    if base_score is None:
+        return "Unknown"
+    else:
+        base_score = float(base_score.group(1))
+
+    if base_score < 4.0:
+        return "Low"
+    elif base_score < 7.0:
+        return "Medium"
+    elif base_score < 9.0:
+        return "High"
+    else:
+        return "Critical"
+
+
+def parse_vector(cvss):
+    vector = re.search("/AV:([NALP])/", cvss)
+    if vector is None:
+        return "Unknown"
+    else:
+        vector = vector.group(1)
+
+    if vector in ["L", "P"]:
+        return "Local"
+    else:
+        return "Remote"
+
+
+@click.command()
+@click.argument("url", nargs=-1)
+@click.option(
+    "--output",
+    type=click.File("w"),
+    default=click.get_text_stream("stdout"),
+    help="Output file with CVEs in JSON format (defaults to stdout)",
+)
+def oracle(url, output):
+    """Extract CVEs for Oracle from their Critical Patch Updates (CPUs)"""
+    cves = []
+
+    for advisory_url in url:
+        advisory = requests.get(advisory_url).content
+        advisory = etree.fromstring(advisory, etree.HTMLParser())
+
+        cve_ids = advisory.xpath('//a[starts-with(@id, "CVE-")]/text()')
+
+        cves += [
+            {
+                "name": cve,
+                "type": "Unknown",
+                "severity": parse_severity(
+                    cvss := advisory.xpath(
+                        f'string((//a[@id="{cve}"])[1]/parent::td/following-sibling::td/text()[contains(., "CVSS 3.1")])'
+                    )
+                ),
+                "vector": parse_vector(cvss),
+                "description": advisory.xpath(
+                    f'string((//a[@id="{cve}"])[1]/parent::td/following-sibling::td/text()[1])'
+                ),
+                "references": [f"{advisory_url}#{cve}"],
+                "notes": None,
+            }
+            for cve in cve_ids
+        ]
+
+    output.write(json.dumps(cves, indent=2, ensure_ascii=False))