From 4bf057b32482c06ea8c45b7dc8f0371917064590 Mon Sep 17 00:00:00 2001
From: Rafa Faura <rafael.fcucalon@gmail.com>
Date: Sat, 25 Nov 2023 17:45:58 +0100
Subject: [PATCH] Feature: The export filename will now contain the port (if
 specified).

---
 README.md |  2 +-
 humble.py | 28 ++++++++++++++++------------
 2 files changed, 17 insertions(+), 13 deletions(-)
diff --git a/README.md b/README.md
index 7d17389..90ddedc 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 <a target="_blank" href="https://www.python.org/downloads/" title="Minimum Python version required to run this tool"><img src="https://img.shields.io/badge/Python-%3E%3D3.9-blue?labelColor=343b41"></a>
 <a target="_blank" href="LICENSE" title="License of this tool"><img src="https://img.shields.io/badge/License-MIT-blue.svg?labelColor=343b41"></a>
 <a target="_blank" href="https://github.com/rfc-st/humble/releases" title="Latest release of this tool"><img src="https://img.shields.io/github/v/release/rfc-st/humble?display_name=release&label=Latest%20release&labelColor=343b41"></a>
-<a target="_blank" href="https://github.com/rfc-st/humble/commits/master" title="Latest commit of this tool"><img src="https://img.shields.io/badge/Latest_Commit-2023--11--24-blue.svg?labelColor=343b41"></a>
+<a target="_blank" href="https://github.com/rfc-st/humble/commits/master" title="Latest commit of this tool"><img src="https://img.shields.io/badge/Latest_Commit-2023--11--25-blue.svg?labelColor=343b41"></a>
 <a target="_blank" href="https://github.com/rfc-st/humble/actions?query=workflow%3ACodeQL" title="Results of the last analysis of this tool with CodeQL"><img src="https://github.com/rfc-st/humble/workflows/CodeQL/badge.svg"></a>
 <a target="_blank" href="https://owasp.org/www-project-secure-headers/#div-technical" title="Tool accepted as a technical resource for OWASP"><img src="https://img.shields.io/badge/OWASP-Resource-blue?labelColor=343b41"></a>
 <a target="_blank" href="https://www.kali.org/tools/humble/" title="Tool accepted in Kali"><img src="https://img.shields.io/badge/Kali%20Linux-Tool-blue?labelColor=343b41"></a>
diff --git a/humble.py b/humble.py
index 021ad5b..224125d 100644
--- a/humble.py
+++ b/humble.py
@@ -41,6 +41,7 @@
 from time import time
 from shlex import quote
 from datetime import datetime
+from urllib.parse import urlparse
 from os import linesep, path, remove
 from colorama import Fore, Style, init
 from collections import Counter, defaultdict
@@ -85,7 +86,7 @@
 
 export_date = datetime.now().strftime("%Y%m%d")
 now = datetime.now().strftime("%Y/%m/%d - %H:%M:%S")
-version = datetime.strptime('2023-11-24', '%Y-%m-%d').date()
+version = datetime.strptime('2023-11-25', '%Y-%m-%d').date()
 
 
 class PDF(FPDF):
@@ -906,9 +907,8 @@ def make_http_request():
     try:
         start_time = time()
         uri_safe = quote(URL)
-        # So dirty, I know!: if args.redirects ('-df' param) is not provided
-        # the last redirection will be the one analyzed (arg.redirects=True).
-        # If this is not the case, the exact URL indicated will be analyzed.
+        # If '-df' (args.redirect) param is provided the exact URL will be
+        # analyzed; otherwise the last redirected URL will be analyzed.
         r = requests.get(uri_safe, allow_redirects=not args.redirects,
                          verify=False, headers=c_headers, timeout=15)
         elapsed_time = time() - start_time
@@ -993,8 +993,8 @@ def custom_help_formatter(prog):
 displaying analyses, messages and errors (if omitted it will be in English)")
 parser.add_argument("-o", dest='output', choices=['html', 'json', 'pdf',
                                                   'txt'], help="save analysis \
-to 'URL_headers_yyyymmdd.ext' file (.json files will contain a brief analysis)\
-")
+to 'scheme_host_port_yyyymmdd.ext' file (.json files will contain a brief \
+analysis)")
 parser.add_argument("-r", dest='ret', action="store_true", help="show full \
  HTTP response headers and a detailed analysis")
 parser.add_argument('-u', type=str, dest='URL', help="schema and URL to \
@@ -1080,13 +1080,17 @@ def custom_help_formatter(prog):
 ext = "t.txt" if args.output in ['html', 'json', 'pdf'] else ".txt"
 
 if args.output:
+    # tldextract seems to be more reliable for extracting certain components of
+    # the URI.
     orig_stdout = sys.stdout
-    name_s = tldextract.extract(URL)
-    name_sch = URL.split(":", 1)[0]
-    name_sub = name_s.subdomain + '.' if name_s.subdomain else ''
-    name_dom = name_s.domain
-    name_tld = name_s.suffix
-    name_e = f"{name_sch}_{name_sub}{name_dom}.{name_tld}_{export_date}{ext}"
+    url_obj = tldextract.extract(URL)
+    url_sch = urlparse(URL).scheme
+    url_sub = f"_{url_obj.subdomain}." if url_obj.subdomain else '_'
+    url_dom = f"{url_obj.domain}."
+    url_tld = url_obj.suffix
+    url_prt = f"_{urlparse(URL).port}_" if urlparse(URL).port is not None \
+        else '_'
+    name_e = f"{url_sch}{url_sub}{url_dom}{url_tld}{url_prt}{export_date}{ext}"
     f = open(name_e, 'w', encoding='utf8')
     sys.stdout = f