From 57a9125b56bdd7371722bec4582e3011a37ab040 Mon Sep 17 00:00:00 2001
From: Rafa Faura <rafael.fcucalon@gmail.com>
Date: Sat, 4 Nov 2023 19:15:45 +0100
Subject: [PATCH] Feature: Warn in unreliable analysis

Those that take more than 5 seconds to get a response from the URL.
---
 README.md           |   4 +-
 humble.py           | 104 ++++++++++++++++++++++++++++++++------------
 i10n/details.txt    |   6 +++
 i10n/details_es.txt |   6 +++
 4 files changed, 89 insertions(+), 31 deletions(-)
diff --git a/README.md b/README.md
index 31d949d..4228cb7 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 <a target="_blank" href="https://www.python.org/downloads/" title="Minimum Python version required to run this tool"><img src="https://img.shields.io/badge/Python-%3E%3D3.9-blue?labelColor=343b41"></a>
 <a target="_blank" href="LICENSE" title="License of this tool"><img src="https://img.shields.io/badge/License-MIT-blue.svg?labelColor=343b41"></a>
 <a target="_blank" href="https://github.com/rfc-st/humble/releases" title="Latest release of this tool"><img src="https://img.shields.io/github/v/release/rfc-st/humble?display_name=release&label=Latest%20release&labelColor=343b41"></a>
-<a target="_blank" href="https://github.com/rfc-st/humble/commits/master" title="Latest commit of this tool"><img src="https://img.shields.io/badge/Latest_Commit-2023--11--02-blue.svg?labelColor=343b41"></a>
+<a target="_blank" href="https://github.com/rfc-st/humble/commits/master" title="Latest commit of this tool"><img src="https://img.shields.io/badge/Latest_Commit-2023--11--04-blue.svg?labelColor=343b41"></a>
 <a target="_blank" href="https://github.com/rfc-st/humble/actions?query=workflow%3ACodeQL" title="Results of the last analysis of this tool with CodeQL"><img src="https://github.com/rfc-st/humble/workflows/CodeQL/badge.svg"></a>
 <a target="_blank" href="https://owasp.org/www-project-secure-headers/#div-technical" title="Tool accepted as a technical resource for OWASP"><img src="https://img.shields.io/badge/OWASP-Resource-blue?labelColor=343b41"></a>
 <a target="_blank" href="https://www.kali.org/tools/humble/" title="Tool accepted in Kali"><img src="https://img.shields.io/badge/Kali%20Linux-Tool-blue?labelColor=343b41"></a>
@@ -52,7 +52,7 @@ HTTP Headers Analyzer<br />
 :heavy_check_mark: Saves each analysis, showing (at the end) the improvements or deficiencies in relation to the last one.<br />
 :heavy_check_mark: Shows analysis statistics: either against a specific URL or all of them.<br />
 :heavy_check_mark: Shows fingerprint statistics: either against a specific term or the Top 20.<br />
-:heavy_check_mark: Code reviewed via <a href="https://flake8.pycqa.org/en/latest/" target="_blank">Flake8<a>, <a href="https://marketplace.visualstudio.com/items?itemName=SonarSource.sonarlint-vscode" target="_blank">SonarLint<a> and <a href="https://marketplace.visualstudio.com/items?itemName=sourcery.sourcery" target="_blank">Sourcery<a>.<br />
+:heavy_check_mark: Code reviewed via <a href="https://marketplace.visualstudio.com/items?itemName=ms-python.flake8" target="_blank">Flake8<a>, <a href="https://marketplace.visualstudio.com/items?itemName=SonarSource.sonarlint-vscode" target="_blank">SonarLint<a> and <a href="https://marketplace.visualstudio.com/items?itemName=sourcery.sourcery" target="_blank">Sourcery<a>.<br />
 :heavy_check_mark: Tested, one by one, on thousands of URLs.<br />
 :heavy_check_mark: Fully tested and working on Windows (10 20H2 - 19042.985) and Linux (Kali 2021.1).<br />
 :heavy_check_mark: All code under one of the most permissive licenses: <a href="https://github.com/rfc-st/humble/blob/master/LICENSE" target="_blank">MIT<a>.<br />
diff --git a/humble.py b/humble.py
index 1bfa1e5..c7a3fe0 100644
--- a/humble.py
+++ b/humble.py
@@ -48,12 +48,14 @@
 import requests
 import contextlib
 import tldextract
+import concurrent.futures
 
 A_FILE = 'analysis_h.txt'
 BOLD_S = ("[0.", "HTTP R", "[1.", "[2.", "[3.", "[4.", "[5.", "[Cabeceras")
 BRI_R = Style.BRIGHT + Fore.RED
 CAN_S = ': https://caniuse.com/?search='
 CDN_E = [520, 521, 522, 523, 524, 525, 526, 527, 530]
+CLE_O = '\x1b[1A\x1b[2K\x1b[1A\x1b[2K\x1b[1A\x1b[2K'
 CLI_E = [400, 401, 402, 403, 405, 406, 409, 410, 411, 412, 413, 414, 415, 416,
          417, 421, 422, 423, 424, 425, 426, 428, 429, 431, 451]
 F_FILE = 'fingerprint.txt'
@@ -76,7 +78,7 @@
 
 export_date = datetime.now().strftime("%Y%m%d")
 now = datetime.now().strftime("%Y/%m/%d - %H:%M:%S")
-version = datetime.strptime('2023-11-02', '%Y-%m-%d').date()
+version = datetime.strptime('2023-11-04', '%Y-%m-%d').date()
 
 
 class PDF(FPDF):
@@ -569,11 +571,19 @@ def csp_print_warnings(csp_values, csp_title, csp_desc, csp_refs):
 
 def clean_output():
     # Kudos to Aniket Navlur!!!: https://stackoverflow.com/a/52590238
-    sys.stdout.write('\x1b[1A\x1b[2K\x1b[1A\x1b[2K\x1b[1A\x1b[2K')
+    sys.stdout.write(CLE_O)
 
 
-def print_path(filename):
-    clean_output()
+def clean_output_r():
+    sys.stdout.write(CLE_O)
+    sys.stdout.write('\x1b[1A\x1b[2K\x1b[1A\x1b[2K\x1b')
+
+
+def print_path(filename, reliable):
+    if reliable:
+        clean_output_r()
+    else:
+        clean_output()
     print("")
     print_detail_l('[report]')
     print(path.abspath(filename))
@@ -597,9 +607,12 @@ def print_header_fng(header):
         print(f"{BRI_R} {header}")
 
 
-def print_summary():
+def print_summary(reliable):
     if not args.output:
-        clean_output()
+        if reliable:
+            clean_output_r()
+        else:
+            clean_output()
         print("")
         banner = '''  _                     _     _
  | |__  _   _ _ __ ___ | |__ | | ___
@@ -622,6 +635,8 @@ def print_summary():
         if detail := print_detail(id_mode, num_lines=0):
             print(detail)
         print(REF_SRV_E + str(status_code))
+    if reliable:
+        print(get_detail('[analysis_wait_note]'))
 
 
 def print_headers():
@@ -737,24 +752,24 @@ def generate_json(name_e, name_p):
         txt_content = source_txt.read()
         txt_sections = re.split(r'\[(\d+\.\s[^\]]+)\]\n', txt_content)[1:]
         data = {}
-        parse_main_txt_sections(txt_sections, data, section0, section5)
+        parse_txt_sections(txt_sections, data, section0, section5)
         json_data = json.dumps(data, indent=4, ensure_ascii=False)
         final_json.write(json_data)
 
 
-def parse_main_txt_sections(txt_sections, data, section0, section5):
+def parse_txt_sections(txt_sections, data, section0, section5):
     for i in range(0, len(txt_sections), 2):
         json_section = f"[{txt_sections[i]}]"
         json_content = txt_sections[i + 1].strip()
         if json_section == section5:
             json_content = json_content.split('.:')[0].strip()
         json_lines = json_content.split('\n')
-        json_data = write_main_json_sections(section0, section5, json_section,
-                                             json_lines)
+        json_data = write_json_sections(section0, section5, json_section,
+                                        json_lines)
         data[json_section] = json_data
 
 
-def write_main_json_sections(section0, section5, json_section, json_lines):
+def write_json_sections(section0, section5, json_section, json_lines):
     if json_section in (section0, section5):
         json_data = {}
         for line in json_lines:
@@ -800,17 +815,25 @@ def handle_http_error(http_code, id_mode):
         sys.exit()
 
 
-def request_exceptions():
-    headers = {}
-    status_c = None
+def make_http_request():
     try:
-        # Yes: Server certificates should be verified during SSL/TLS
-        # connections. Despite this, I think 'verify=False' would benefit
-        # analysis of URLs with self-signed certificates, associated with
-        # development environments, etc.
+        start_time = time()
         r = requests.get(URL, verify=False, headers=c_headers, timeout=15)
-        status_c = r.status_code
-        headers = r.headers
+        elapsed_time = time() - start_time
+        return r, elapsed_time
+    except requests.exceptions.RequestException:
+        return None, 0.0
+
+
+def wait_http_request(future):
+    with contextlib.suppress(concurrent.futures.TimeoutError):
+        future.result(timeout=5)
+
+
+def handle_http_exceptions(r, exception_d):
+    if r is None:
+        return
+    try:
         r.raise_for_status()
     except requests.exceptions.HTTPError as err_http:
         http_code = err_http.response.status_code
@@ -820,9 +843,32 @@ def request_exceptions():
         ex = exception_d.get(type(e))
         if ex and (not callable(ex) or ex(e)):
             detail_exceptions(ex, e)
-    except requests.exceptions.RequestException as err:
-        raise SystemExit from err
-    return headers, status_c
+    except requests.exceptions.RequestException as e:
+        raise SystemExit from e
+
+
+def request_exceptions():
+    headers = {}
+    status_c = None
+    reliable = None
+    request_time = 0.0
+
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        future = executor.submit(make_http_request)
+        wait_http_request(future)
+
+        if not future.done():
+            print(get_detail('[analysis_wait]'))
+            reliable = 'No'
+
+        r, request_time = future.result()
+        handle_http_exceptions(r, exception_d)
+
+        if r is not None:
+            status_c = r.status_code
+            headers = r.headers
+
+    return headers, status_c, reliable
 
 
 def custom_help_formatter(prog):
@@ -937,7 +983,7 @@ def custom_help_formatter(prog):
 c_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
 AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'}
 
-headers, status_code = request_exceptions()
+headers, status_code, reliable = request_exceptions()
 
 # Export analysis
 ext = "t.txt" if args.output in ['html', 'json', 'pdf'] else ".txt"
@@ -952,7 +998,7 @@ def custom_help_formatter(prog):
     f = open(name_e, 'w', encoding='utf8')
     sys.stdout = f
 
-print_summary()
+print_summary(reliable)
 print_headers()
 
 # Report - 1. Missing HTTP Security Headers
@@ -1540,10 +1586,10 @@ def custom_help_formatter(prog):
     sys.stdout = orig_stdout
     f.close()
 if args.output == 'txt':
-    print_path(name_e)
+    print_path(name_e, reliable)
 elif args.output == 'json':
     generate_json(name_e, name_p)
-    print_path(name_p)
+    print_path(name_p, reliable)
     remove(name_e)
 elif args.output == 'pdf':
     pdf = PDF()
@@ -1566,7 +1612,7 @@ def custom_help_formatter(prog):
         pdf.multi_cell(197, 2.6, txt=x, align='L')
 
     pdf.output(name_p)
-    print_path(name_p)
+    print_path(name_p, reliable)
     f.close()
     remove(name_e)
 elif args.output == 'html':
@@ -1643,5 +1689,5 @@ def custom_help_formatter(prog):
                 output.write(ln)
         output.write(footer)
 
-    print_path(name_p)
+    print_path(name_p, reliable)
     remove(name_e)
diff --git a/i10n/details.txt b/i10n/details.txt
index 63c25c8..9bcb8e3 100644
--- a/i10n/details.txt
+++ b/i10n/details.txt
@@ -629,6 +629,12 @@ HTTP Response Headers
 [analysis]
  Analyzing URL, please wait ...
 
+[analysis_wait]
+ (The analysis is taking longer than usual -may be due to network connectivity, WAF, etc- and may not be reliable)
+
+[analysis_wait_note]
+ Note : The analysis may not be reliable because of the time it took for the URL to respond.
+
 [analysis_output]
  Analyzing URL and saving the report, please wait ...
 
diff --git a/i10n/details_es.txt b/i10n/details_es.txt
index 0115df4..3385c09 100644
--- a/i10n/details_es.txt
+++ b/i10n/details_es.txt
@@ -629,6 +629,12 @@ Cabeceras de respuesta HTTP
 [analysis]
  Analizando URL, espera por favor ...
 
+[analysis_wait]
+ (El análisis tarda más de lo habitual -quizá por conectividad de red, WAF, etc- y puede no ser fiable)
+
+[analysis_wait_note]
+ Nota : El análisis puede no ser fiable por el tiempo que tardó en responder la URL.
+
 [analysis_output]
  Analizando URL y guardando el informe, espera por favor ...