From d74c8117918fcc85dff35d4402fe0b5bc7239c16 Mon Sep 17 00:00:00 2001 From: 4shen0ne <4shen.01@gmail.com> Date: Sat, 7 Sep 2024 15:22:59 +0800 Subject: [PATCH 01/19] support asynchronous mode --- lib/connection/asynchronous/__init__.py | 0 lib/connection/asynchronous/requester.py | 226 ++++++++++++++++++++++ lib/connection/asynchronous/response.py | 67 +++++++ lib/controller/controller.py | 51 ++++- lib/core/asynchronous/__init__.py | 0 lib/core/asynchronous/fuzzer.py | 230 +++++++++++++++++++++++ lib/core/asynchronous/scanner.py | 158 ++++++++++++++++ lib/core/data.py | 1 + lib/core/options.py | 1 + lib/parse/cmdline.py | 6 + 10 files changed, 739 insertions(+), 1 deletion(-) create mode 100644 lib/connection/asynchronous/__init__.py create mode 100644 lib/connection/asynchronous/requester.py create mode 100644 lib/connection/asynchronous/response.py create mode 100644 lib/core/asynchronous/__init__.py create mode 100644 lib/core/asynchronous/fuzzer.py create mode 100644 lib/core/asynchronous/scanner.py diff --git a/lib/connection/asynchronous/__init__.py b/lib/connection/asynchronous/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/connection/asynchronous/requester.py b/lib/connection/asynchronous/requester.py new file mode 100644 index 000000000..704ff12ab --- /dev/null +++ b/lib/connection/asynchronous/requester.py @@ -0,0 +1,226 @@ +import random +import re +import socket +from urllib.parse import urlparse + +import httpx + +from lib.connection.asynchronous.response import Response +from lib.core.data import options +from lib.core.decorators import cached +from lib.core.exceptions import RequestException +from lib.core.logger import logger +from lib.core.settings import ( + PROXY_SCHEMES, + RATE_UPDATE_DELAY, + READ_RESPONSE_ERROR_REGEX, + SCRIPT_PATH, +) +from lib.core.structures import CaseInsensitiveDict +from lib.utils.common import safequote +from lib.utils.file import FileUtils +from lib.utils.mimetype import guess_mimetype + + +class HTTPXBearerAuth(httpx.Auth): + def __init__(self, token: str) -> None: + self.token = token + + def auth_flow(self, request: httpx.Request) -> any: + request.headers["Authorization"] = f"Bearer {self.token}" + yield request + + +class Requester: + def __init__(self) -> None: + self._url = None + self._proxy_cred = None + self._rate = 0 + self.headers = CaseInsensitiveDict(options["headers"]) + self.agents = [] + + if options["random_agents"]: + self._fetch_agents() + + # Guess the mime type of request data if not specified + if options["data"] and "content-type" not in self.headers: + self.set_header("content-type", guess_mimetype(options["data"])) + + socket_options = [] + if options["network_interface"]: + socket_options.append( + ( + socket.SOL_SOCKET, + socket.SO_BINDTODEVICE, + options["network_interface"].encode("utf-8"), + ) + ) + + transport = httpx.AsyncHTTPTransport( + # FIXME: max_connections != thread_count + limits=httpx.Limits(max_connections=options["thread_count"]), + socket_options=socket_options, + ) + + cert = (options["cert_file"], options["key_file"]) + + self.session = httpx.AsyncClient( + verify=False, + cert=cert if cert[0] and cert[1] else None, + mounts={"http://": transport, "https://": transport}, + timeout=httpx.Timeout( + timeout=options["timeout"], + pool=None, + ), + ) + + def _fetch_agents(self) -> None: + self.agents = FileUtils.get_lines( + FileUtils.build_path(SCRIPT_PATH, "db", "user-agents.txt") + ) + + def set_url(self, url: str) -> None: + self._url = url + + def set_header(self, key: str, value: str) -> None: + self.headers[key] = value.lstrip() + + def set_auth(self, type: str, credential: str) -> None: + if type in ("bearer", "jwt"): + self.session.auth = HTTPXBearerAuth(credential) + else: + try: + user, password = credential.split(":", 1) + except ValueError: + user = credential + password = "" + + if type == "basic": + self.session.auth = httpx.BasicAuth(user, password) + elif type == "digest": + self.session.auth = httpx.DigestAuth(user, password) + else: + pass # TODO: HttpNtlmAuth + + def set_proxy(self, proxy: str) -> None: + if not proxy: + return + + if not proxy.startswith(PROXY_SCHEMES): + proxy = f"http://{proxy}" + + if self._proxy_cred and "@" not in proxy: + # socks5://localhost:9050 => socks5://[credential]@localhost:9050 + proxy = proxy.replace("://", f"://{self._proxy_cred}@", 1) + + self.session.proxies = {"https": proxy} + if not proxy.startswith("https://"): + self.session.proxies["http"] = proxy + + def set_proxy_auth(self, credential: str) -> None: + self._proxy_cred = credential + + # :path: is expected not to start with "/" + async def request(self, path: str, proxy: str = None) -> Response: + # TODO: request rate limit + # Pause if the request rate exceeded the maximum + # while self.is_rate_exceeded(): + # await asyncio.sleep(0.1) + + # self.increase_rate() + + err_msg = None + + # Safe quote all special characters to prevent them from being encoded + url = safequote(self._url + path if self._url else path) + parsed_url = urlparse(url) + + # Why using a loop instead of max_retries argument? Check issue #1009 + for _ in range(options["max_retries"] + 1): + try: + try: + proxy = proxy or random.choice(options["proxies"]) + self.set_proxy(proxy) + except IndexError: + pass + + if self.agents: + self.set_header("user-agent", random.choice(self.agents)) + + # Use "target" extension to avoid the URL path from being normalized + request = self.session.build_request( + options["http_method"], + # url.removesuffix(parsed_url.path), + url, + headers=self.headers, + data=options["data"], + ) + if p := parsed_url.path: + request.extensions = {"target": p.encode()} + + xresponse = await self.session.send( + request, + stream=True, + follow_redirects=options["follow_redirects"], + ) + response = await Response.create(xresponse) + await xresponse.aclose() + + log_msg = f'"{options["http_method"]} {response.url}" {response.status} - {response.length}B' + + if response.redirect: + log_msg += f" - LOCATION: {response.redirect}" + + logger.info(log_msg) + + return response + + except Exception as e: + logger.exception(e) + + if e == socket.gaierror: + err_msg = "Couldn't resolve DNS" + elif "SSLError" in str(e): + err_msg = "Unexpected SSL error" + elif "TooManyRedirects" in str(e): + err_msg = f"Too many redirects: {url}" + elif "ProxyError" in str(e): + if proxy: + err_msg = f"Error with the proxy: {proxy}" + else: + err_msg = "Error with the system proxy" + # Prevent from re-using it in the future + if proxy in options["proxies"] and len(options["proxies"]) > 1: + options["proxies"].remove(proxy) + elif "InvalidURL" in str(e): + err_msg = f"Invalid URL: {url}" + elif "InvalidProxyURL" in str(e): + err_msg = f"Invalid proxy URL: {proxy}" + elif "ConnectionError" in str(e): + err_msg = f"Cannot connect to: {urlparse(url).netloc}" + elif re.search(READ_RESPONSE_ERROR_REGEX, str(e)): + err_msg = f"Failed to read response body: {url}" + elif "Timeout" in str(e) or e in ( + httpx.ConnectTimeout, + httpx.ReadTimeout, + socket.timeout, + ): + err_msg = f"Request timeout: {url}" + else: + err_msg = f"There was a problem in the request to: {url}" + + raise RequestException(err_msg) + + def is_rate_exceeded(self): + return self._rate >= options["max_rate"] > 0 + + def decrease_rate(self): + self._rate -= 1 + + def increase_rate(self): + self._rate += 1 + + @property + @cached(RATE_UPDATE_DELAY) + def rate(self): + return self._rate diff --git a/lib/connection/asynchronous/response.py b/lib/connection/asynchronous/response.py new file mode 100644 index 000000000..6f97a9ab0 --- /dev/null +++ b/lib/connection/asynchronous/response.py @@ -0,0 +1,67 @@ +import httpx +from lib.core.settings import ( + DEFAULT_ENCODING, + ITER_CHUNK_SIZE, + MAX_RESPONSE_SIZE, + UNKNOWN, +) +from lib.parse.url import clean_path, parse_path +from lib.utils.common import is_binary + + +class Response: + def __init__(self, response: httpx.Response) -> None: + self.url = str(response.url) + self.full_path = parse_path(self.url) + self.path = clean_path(self.full_path) + self.status = response.status_code + self.headers = response.headers + self.redirect = self.headers.get("location") or "" + self.history = [str(res.url) for res in response.history] + self.content = "" + self.body = b"" + + @classmethod + async def create(cls, response: httpx.Response) -> "Response": + self = cls(response) + async for chunk in response.aiter_bytes(chunk_size=ITER_CHUNK_SIZE): + self.body += chunk + + if len(self.body) >= MAX_RESPONSE_SIZE or ( + "content-length" in self.headers and is_binary(self.body) + ): + break + + if not is_binary(self.body): + try: + self.content = self.body.decode( + response.encoding or DEFAULT_ENCODING, errors="ignore" + ) + except LookupError: + self.content = self.body.decode(DEFAULT_ENCODING, errors="ignore") + + return self + + @property + def type(self) -> str: + if "content-type" in self.headers: + return self.headers.get("content-type").split(";")[0] + + return UNKNOWN + + @property + def length(self) -> int: + try: + return int(self.headers.get("content-length")) + except TypeError: + return len(self.body) + + def __hash__(self) -> int: + return hash(self.body) + + def __eq__(self, other: object) -> bool: + return (self.status, self.body, self.redirect) == ( + other.status, + other.body, + other.redirect, + ) diff --git a/lib/controller/controller.py b/lib/controller/controller.py index 49207136f..673874639 100755 --- a/lib/controller/controller.py +++ b/lib/controller/controller.py @@ -16,8 +16,10 @@ # # Author: Mauro Soria +import asyncio import gc import os +import signal import psycopg import re import time @@ -69,6 +71,13 @@ from lib.utils.schemedet import detect_scheme from lib.view.terminal import interface +if options["async_mode"]: + from lib.connection.asynchronous.requester import Requester + from lib.core.asynchronous.fuzzer import Fuzzer +else: + from lib.connection.requester import Requester + from lib.core.fuzzer import Fuzzer + class Controller: def __init__(self): @@ -229,7 +238,10 @@ def run(self): if not self.old_session: interface.target(self.url) - self.start() + if options["async_mode"]: + self.astart() + else: + self.start() except ( InvalidURLException, @@ -285,6 +297,43 @@ def start(self): self.jobs_processed += 1 self.old_session = False + def astart(self): + loop = asyncio.new_event_loop() + loop.add_signal_handler(signal.SIGINT, self.handle_pause) + while self.directories: + try: + gc.collect() + + current_directory = self.directories[0] + + if not self.old_session: + current_time = time.strftime("%H:%M:%S") + msg = f"{NEW_LINE}[{current_time}] Starting: {current_directory}" + + interface.warning(msg) + + self.fuzzer.set_base_path(current_directory) + if (timeout := options["max_time"]) > 0: + loop.run_until_complete( + asyncio.wait_for(self.fuzzer.start(), timeout) + ) + else: + loop.run_until_complete(self.fuzzer.start()) + + except KeyboardInterrupt: + pass + except asyncio.TimeoutError: + raise SkipTargetInterrupt( + "Runtime exceeded the maximum set by the user" + ) + + finally: + self.dictionary.reset() + self.directories.pop(0) + + self.jobs_processed += 1 + self.old_session = False + def set_target(self, url): # If no scheme specified, unset it first if "://" not in url: diff --git a/lib/core/asynchronous/__init__.py b/lib/core/asynchronous/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/core/asynchronous/fuzzer.py b/lib/core/asynchronous/fuzzer.py new file mode 100644 index 000000000..ae2909bba --- /dev/null +++ b/lib/core/asynchronous/fuzzer.py @@ -0,0 +1,230 @@ +import asyncio +import re +import threading +from typing import Callable, Generator + +from lib.connection.asynchronous.requester import Requester +from lib.connection.asynchronous.response import Response +from lib.core.asynchronous.scanner import Scanner +from lib.core.data import blacklists, options +from lib.core.dictionary import Dictionary +from lib.core.exceptions import RequestException +from lib.core.logger import logger +from lib.core.settings import ( + DEFAULT_TEST_PREFIXES, + DEFAULT_TEST_SUFFIXES, + WILDCARD_TEST_POINT_MARKER, +) +from lib.parse.url import clean_path +from lib.utils.common import human_size, lstrip_once +from lib.utils.crawl import Crawler + + +class Fuzzer: + def __init__( + self, + requester: Requester, + dictionary: Dictionary, + *, + match_callbacks: tuple[Callable] = (), + not_found_callbacks: tuple[Callable] = (), + error_callbacks: tuple[Callable] = (), + ) -> None: + self._scanned = set() + self._requester = requester + self._dictionary = dictionary + self._play_event = asyncio.Event() + self._base_path = None + self.exc = None + self.match_callbacks = match_callbacks + self.not_found_callbacks = not_found_callbacks + self.error_callbacks = error_callbacks + self._background_tasks = set() + self.sem = asyncio.Semaphore(options["thread_count"]) + + async def setup_scanners(self) -> None: + self.scanners = { + "default": {}, + "prefixes": {}, + "suffixes": {}, + } + + # Default scanners (wildcard testers) + self.scanners["default"].update( + { + "index": await Scanner.create(self._requester, path=self._base_path), + "random": await Scanner.create( + self._requester, path=self._base_path + WILDCARD_TEST_POINT_MARKER + ), + } + ) + + if options["exclude_response"]: + self.scanners["default"]["custom"] = await Scanner.create( + self._requester, tested=self.scanners, path=options["exclude_response"] + ) + + for prefix in options["prefixes"] + DEFAULT_TEST_PREFIXES: + self.scanners["prefixes"][prefix] = await Scanner.create( + self._requester, + tested=self.scanners, + path=f"{self._base_path}{prefix}{WILDCARD_TEST_POINT_MARKER}", + context=f"/{self._base_path}{prefix}***", + ) + + for suffix in options["suffixes"] + DEFAULT_TEST_SUFFIXES: + self.scanners["suffixes"][suffix] = await Scanner.create( + self._requester, + tested=self.scanners, + path=f"{self._base_path}{WILDCARD_TEST_POINT_MARKER}{suffix}", + context=f"/{self._base_path}***{suffix}", + ) + + for extension in options["extensions"]: + if "." + extension not in self.scanners["suffixes"]: + self.scanners["suffixes"]["." + extension] = await Scanner.create( + self._requester, + tested=self.scanners, + path=f"{self._base_path}{WILDCARD_TEST_POINT_MARKER}.{extension}", + context=f"/{self._base_path}***.{extension}", + ) + + def get_scanners_for(self, path: str) -> Generator: + # Clean the path, so can check for extensions/suffixes + path = clean_path(path) + + for prefix in self.scanners["prefixes"]: + if path.startswith(prefix): + yield self.scanners["prefixes"][prefix] + + for suffix in self.scanners["suffixes"]: + if path.endswith(suffix): + yield self.scanners["suffixes"][suffix] + + for scanner in self.scanners["default"].values(): + yield scanner + + async def start(self) -> None: + await self.setup_scanners() + self.play() + + for _ in range(len(self._dictionary)): + task = asyncio.create_task(self.task_proc()) + self._background_tasks.add(task) + task.add_done_callback(self._background_tasks.discard) + + await asyncio.gather(*self._background_tasks, return_exceptions=True) + + def is_finished(self) -> bool: + if self.exc: + raise self.exc + + return len(self._background_tasks) == 0 + + def play(self) -> None: + self._play_event.set() + + def pause(self) -> None: + self._play_event.clear() + + def quit(self) -> None: + for task in self._background_tasks: + task.cancel() + + async def scan(self, path: str, scanners: Generator) -> None: + # Avoid scanned paths from being re-scanned + if path in self._scanned: + return + else: + self._scanned.add(path) + + response = await self._requester.request(path) + + if self.is_excluded(response): + for callback in self.not_found_callbacks: + callback(response) + return + + for tester in scanners: + # Check if the response is unique, not wildcard + if not tester.check(path, response): + for callback in self.not_found_callbacks: + callback(response) + return + + try: + for callback in self.match_callbacks: + callback(response) + except Exception as e: + self.exc = e + + if options["crawl"]: + logger.info(f'THREAD-{threading.get_ident()}: crawling "/{path}"') + for path_ in Crawler.crawl(response): + if self._dictionary.is_valid(path_): + logger.info( + f'THREAD-{threading.get_ident()}: found new path "/{path_}" in /{path}' + ) + await self.scan(path_, self.get_scanners_for(path_)) + + def is_excluded(self, resp: Response) -> bool: + """Validate the response by different filters""" + + if resp.status in options["exclude_status_codes"]: + return True + + if ( + options["include_status_codes"] + and resp.status not in options["include_status_codes"] + ): + return True + + if resp.status in blacklists and any( + resp.path.endswith(lstrip_once(suffix, "/")) + for suffix in blacklists.get(resp.status) + ): + return True + + if human_size(resp.length).rstrip() in options["exclude_sizes"]: + return True + + if resp.length < options["minimum_response_size"]: + return True + + if resp.length > options["maximum_response_size"] > 0: + return True + + if any(text in resp.content for text in options["exclude_texts"]): + return True + + if options["exclude_regex"] and re.search( + options["exclude_regex"], resp.content + ): + return True + + if options["exclude_redirect"] and ( + options["exclude_redirect"] in resp.redirect + or re.search(options["exclude_redirect"], resp.redirect) + ): + return True + + return False + + def set_base_path(self, path: str) -> None: + self._base_path = path + + async def task_proc(self) -> None: + async with self.sem: + await self._play_event.wait() + + try: + path = next(self._dictionary) + scanners = self.get_scanners_for(path) + await self.scan(self._base_path + path, scanners) + except StopIteration: + pass + except RequestException as e: + for callback in self.error_callbacks: + callback(e) + finally: + await asyncio.sleep(options["delay"]) diff --git a/lib/core/asynchronous/scanner.py b/lib/core/asynchronous/scanner.py new file mode 100644 index 000000000..ac907e879 --- /dev/null +++ b/lib/core/asynchronous/scanner.py @@ -0,0 +1,158 @@ +import asyncio +import re +from typing import Optional +from urllib.parse import unquote + +from lib.connection.asynchronous.requester import Requester +from lib.connection.response import Response +from lib.core.data import options +from lib.core.logger import logger +from lib.core.settings import ( + REFLECTED_PATH_MARKER, + TEST_PATH_LENGTH, + WILDCARD_TEST_POINT_MARKER, +) +from lib.parse.url import clean_path +from lib.utils.diff import DynamicContentParser, generate_matching_regex +from lib.utils.random import rand_string + + +class Scanner: + def __init__( + self, requester: Requester, path: str, tested: dict, context: str + ) -> None: + self.path = path + self.tested = tested + self.context = context + self.requester = requester + self.response = None + self.wildcard_redirect_regex = None + + @classmethod + async def create( + cls, + requester: Requester, + *, + path: str = "", + tested: dict = {}, + context: str = "all cases", + ) -> "Scanner": + self = cls(requester, path=path, tested=tested, context=context) + await self.setup() + return self + + async def setup(self) -> None: + """ + Generate wildcard response information containers, this will be + used to compare with other path responses + """ + + first_path = self.path.replace( + WILDCARD_TEST_POINT_MARKER, + rand_string(TEST_PATH_LENGTH), + ) + first_response = await self.requester.request(first_path) + self.response = first_response + await asyncio.sleep(options["delay"]) + + duplicate = self._get_duplicate(first_response) + # Another test was performed before and has the same response as this + if duplicate: + self.content_parser = duplicate.content_parser + self.wildcard_redirect_regex = duplicate.wildcard_redirect_regex + logger.debug(f'Skipped the second test for "{self.context}"') + return + + second_path = self.path.replace( + WILDCARD_TEST_POINT_MARKER, + rand_string(TEST_PATH_LENGTH, omit=first_path), + ) + second_response = await self.requester.request(second_path) + await asyncio.sleep(options["delay"]) + + if first_response.redirect and second_response.redirect: + self.wildcard_redirect_regex = self.generate_redirect_regex( + clean_path(first_response.redirect), + first_path, + clean_path(second_response.redirect), + second_path, + ) + logger.debug( + f'Pattern (regex) to detect wildcard redirects for "{self.context}": {self.wildcard_redirect_regex}' + ) + + self.content_parser = DynamicContentParser( + first_response.content, second_response.content + ) + + def check(self, path: str, response: Response) -> bool: + """ + Perform analyzing to see if the response is wildcard or not + """ + + if self.response.status != response.status: + return True + + # Read from line 129 to 138 to understand the workflow of this. + if self.wildcard_redirect_regex and response.redirect: + # - unquote(): Sometimes, some path characters get encoded or decoded in the response redirect + # but it's still a wildcard redirect, so unquote everything to prevent false positives + # - clean_path(): Get rid of queries and DOM in URL because of weird behaviours could happen + # with them, so messy that I give up on finding a way to test them + path = unquote(clean_path(path)) + redirect = unquote(clean_path(response.redirect)) + regex_to_compare = self.wildcard_redirect_regex.replace( + REFLECTED_PATH_MARKER, re.escape(path) + ) + is_wildcard_redirect = re.match(regex_to_compare, redirect, re.IGNORECASE) + + # If redirection doesn't match the rule, mark as found + if not is_wildcard_redirect: + logger.debug( + f'"{redirect}" doesn\'t match the regular expression "{regex_to_compare}", passing' + ) + return True + + if self._is_wildcard(response): + return False + + return True + + def _get_duplicate(self, response: Response) -> Optional["Scanner"]: + for category in self.tested: + for tester in self.tested[category].values(): + if response == tester.response: + return tester + + return None + + def _is_wildcard(self, response): + """Check if response is similar to wildcard response""" + + # Compare 2 binary responses (Response.content is empty if the body is binary) + if not self.response.content and not response.content: + return self.response.body == response.body + + return self.content_parser.compare_to(response.content) + + @staticmethod + def generate_redirect_regex(first_loc, first_path, second_loc, second_path): + """ + From 2 redirects of wildcard responses, generate a regexp that matches + every wildcard redirect. + + How it works: + 1. Replace path in 2 redirect URLs (if it gets reflected in) with a mark + (e.g. /path1 -> /foo/path1 and /path2 -> /foo/path2 will become /foo/[mark] for both) + 2. Compare 2 redirects and generate a regex that matches both + (e.g. /foo/[mark]?a=1 and /foo/[mark]?a=2 will have the regex: ^/foo/[mark]?a=(.*)$) + 3. Next time if it redirects, replace mark in regex with the path and check if it matches + (e.g. /path3 -> /foo/path3?a=5, the regex becomes ^/foo/path3?a=(.*)$, which matches) + """ + + if first_path: + first_loc = unquote(first_loc).replace(first_path, REFLECTED_PATH_MARKER) + if second_path: + second_loc = unquote(second_loc).replace(second_path, REFLECTED_PATH_MARKER) + + return generate_matching_regex(first_loc, second_loc) diff --git a/lib/core/data.py b/lib/core/data.py index 46292f4d4..9eff02734 100755 --- a/lib/core/data.py +++ b/lib/core/data.py @@ -83,6 +83,7 @@ "ip": None, "exit_on_error": False, "crawl": False, + "async_mode": False, "full_url": False, "redirects_history": False, "color": True, diff --git a/lib/core/options.py b/lib/core/options.py index 03119700d..0ebde0d2d 100755 --- a/lib/core/options.py +++ b/lib/core/options.py @@ -350,6 +350,7 @@ def parse_config(opt): # Advanced opt.crawl = opt.crawl or config.safe_getboolean("advanced", "crawl") + opt.async_mode = opt.async_mode or config.safe_getboolean("advanced", "async") # View opt.full_url = opt.full_url or config.safe_getboolean("view", "full-url") diff --git a/lib/parse/cmdline.py b/lib/parse/cmdline.py index fb71e9860..0f38ce39e 100755 --- a/lib/parse/cmdline.py +++ b/lib/parse/cmdline.py @@ -468,6 +468,12 @@ def parse_arguments(): dest="crawl", help="Crawl for new paths in responses" ) + advanced.add_option( + "--async", + action="store_true", + dest="async_mode", + help="Enable asynchronous mode", + ) # View Settings view = OptionGroup(parser, "View Settings") From 9c3c3168f47e92f013deb72291aa879f5d0aaa5b Mon Sep 17 00:00:00 2001 From: 4shen0ne <4shen.01@gmail.com> Date: Sat, 7 Sep 2024 17:41:19 +0800 Subject: [PATCH 02/19] handle pause in async mode --- lib/controller/controller.py | 87 ++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 43 deletions(-) diff --git a/lib/controller/controller.py b/lib/controller/controller.py index 673874639..16fe1a42c 100755 --- a/lib/controller/controller.py +++ b/lib/controller/controller.py @@ -149,6 +149,10 @@ def setup(self): self.errors = 0 self.consecutive_errors = 0 + if options["async_mode"]: + self.loop = asyncio.new_event_loop() + self.loop.add_signal_handler(signal.SIGINT, self.handle_pause) + if options["auth"]: self.requester.set_auth(options["auth_type"], options["auth"]) @@ -238,10 +242,7 @@ def run(self): if not self.old_session: interface.target(self.url) - if options["async_mode"]: - self.astart() - else: - self.start() + self.start() except ( InvalidURLException, @@ -284,8 +285,14 @@ def start(self): interface.warning(msg) self.fuzzer.set_base_path(current_directory) - self.fuzzer.start() - self.process() + if options["async_mode"]: + # use a future to get exceptions from handle_pause + # https://stackoverflow.com/a/64230941 + self.done_future = self.loop.create_future() + self.loop.run_until_complete(self._start_coroutines()) + else: + self.fuzzer.start() + self.process() except KeyboardInterrupt: pass @@ -297,42 +304,21 @@ def start(self): self.jobs_processed += 1 self.old_session = False - def astart(self): - loop = asyncio.new_event_loop() - loop.add_signal_handler(signal.SIGINT, self.handle_pause) - while self.directories: - try: - gc.collect() - - current_directory = self.directories[0] - - if not self.old_session: - current_time = time.strftime("%H:%M:%S") - msg = f"{NEW_LINE}[{current_time}] Starting: {current_directory}" - - interface.warning(msg) - - self.fuzzer.set_base_path(current_directory) - if (timeout := options["max_time"]) > 0: - loop.run_until_complete( - asyncio.wait_for(self.fuzzer.start(), timeout) - ) - else: - loop.run_until_complete(self.fuzzer.start()) - - except KeyboardInterrupt: - pass - except asyncio.TimeoutError: - raise SkipTargetInterrupt( - "Runtime exceeded the maximum set by the user" - ) + async def _start_coroutines(self): + task = self.loop.create_task(self.fuzzer.start()) + done, _ = await asyncio.wait( + [self.done_future, task], + timeout=options["max_time"] if options["max_time"] > 0 else None, + return_when=asyncio.FIRST_COMPLETED, + ) - finally: - self.dictionary.reset() - self.directories.pop(0) + if self.done_future.done(): + task.cancel() + await self.done_future # propagate the exception, if raised - self.jobs_processed += 1 - self.old_session = False + # TODO: find a better way to catch TimeoutError + if len(done) == 0: + raise SkipTargetInterrupt("Runtime exceeded the maximum set by the user") def set_target(self, url): # If no scheme specified, unset it first @@ -584,9 +570,19 @@ def handle_pause(self): ) self._export(session_file) - raise QuitInterrupt(f"Session saved to: {session_file}") + quitexc = QuitInterrupt(f"Session saved to: {session_file}") + if options["async_mode"]: + self.done_future.set_exception(quitexc) + break + else: + raise quitexc elif option.lower() == "q": - raise QuitInterrupt("Canceled by the user") + quitexc = QuitInterrupt("Canceled by the user") + if options["async_mode"]: + self.done_future.set_exception(quitexc) + break + else: + raise quitexc elif option.lower() == "c": self.fuzzer.play() @@ -597,7 +593,12 @@ def handle_pause(self): break elif option.lower() == "s" and len(options["urls"]) > 1: - raise SkipTargetInterrupt("Target skipped by the user") + skipexc = SkipTargetInterrupt("Target skipped by the user") + if options["async_mode"]: + self.done_future.set_exception(skipexc) + break + else: + raise skipexc def is_timed_out(self): return time.time() - self.start_time > options["max_time"] > 0 From 17d43c1d4ac5fe7890e6955672590933164a6219 Mon Sep 17 00:00:00 2001 From: 4shen0ne <4shen.01@gmail.com> Date: Sun, 8 Sep 2024 00:00:11 +0800 Subject: [PATCH 03/19] refactor --- lib/connection/asynchronous/__init__.py | 0 lib/connection/asynchronous/requester.py | 226 --------------- lib/connection/asynchronous/response.py | 67 ----- lib/connection/requester.py | 273 +++++++++++++----- lib/connection/response.py | 78 +++-- lib/controller/controller.py | 5 +- lib/core/asynchronous/__init__.py | 0 lib/core/asynchronous/fuzzer.py | 230 --------------- lib/core/asynchronous/scanner.py | 158 ---------- lib/core/fuzzer.py | 351 +++++++++++++++++------ lib/core/scanner.py | 230 ++++++++++----- 11 files changed, 695 insertions(+), 923 deletions(-) delete mode 100644 lib/connection/asynchronous/__init__.py delete mode 100644 lib/connection/asynchronous/requester.py delete mode 100644 lib/connection/asynchronous/response.py delete mode 100644 lib/core/asynchronous/__init__.py delete mode 100644 lib/core/asynchronous/fuzzer.py delete mode 100644 lib/core/asynchronous/scanner.py diff --git a/lib/connection/asynchronous/__init__.py b/lib/connection/asynchronous/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/lib/connection/asynchronous/requester.py b/lib/connection/asynchronous/requester.py deleted file mode 100644 index 704ff12ab..000000000 --- a/lib/connection/asynchronous/requester.py +++ /dev/null @@ -1,226 +0,0 @@ -import random -import re -import socket -from urllib.parse import urlparse - -import httpx - -from lib.connection.asynchronous.response import Response -from lib.core.data import options -from lib.core.decorators import cached -from lib.core.exceptions import RequestException -from lib.core.logger import logger -from lib.core.settings import ( - PROXY_SCHEMES, - RATE_UPDATE_DELAY, - READ_RESPONSE_ERROR_REGEX, - SCRIPT_PATH, -) -from lib.core.structures import CaseInsensitiveDict -from lib.utils.common import safequote -from lib.utils.file import FileUtils -from lib.utils.mimetype import guess_mimetype - - -class HTTPXBearerAuth(httpx.Auth): - def __init__(self, token: str) -> None: - self.token = token - - def auth_flow(self, request: httpx.Request) -> any: - request.headers["Authorization"] = f"Bearer {self.token}" - yield request - - -class Requester: - def __init__(self) -> None: - self._url = None - self._proxy_cred = None - self._rate = 0 - self.headers = CaseInsensitiveDict(options["headers"]) - self.agents = [] - - if options["random_agents"]: - self._fetch_agents() - - # Guess the mime type of request data if not specified - if options["data"] and "content-type" not in self.headers: - self.set_header("content-type", guess_mimetype(options["data"])) - - socket_options = [] - if options["network_interface"]: - socket_options.append( - ( - socket.SOL_SOCKET, - socket.SO_BINDTODEVICE, - options["network_interface"].encode("utf-8"), - ) - ) - - transport = httpx.AsyncHTTPTransport( - # FIXME: max_connections != thread_count - limits=httpx.Limits(max_connections=options["thread_count"]), - socket_options=socket_options, - ) - - cert = (options["cert_file"], options["key_file"]) - - self.session = httpx.AsyncClient( - verify=False, - cert=cert if cert[0] and cert[1] else None, - mounts={"http://": transport, "https://": transport}, - timeout=httpx.Timeout( - timeout=options["timeout"], - pool=None, - ), - ) - - def _fetch_agents(self) -> None: - self.agents = FileUtils.get_lines( - FileUtils.build_path(SCRIPT_PATH, "db", "user-agents.txt") - ) - - def set_url(self, url: str) -> None: - self._url = url - - def set_header(self, key: str, value: str) -> None: - self.headers[key] = value.lstrip() - - def set_auth(self, type: str, credential: str) -> None: - if type in ("bearer", "jwt"): - self.session.auth = HTTPXBearerAuth(credential) - else: - try: - user, password = credential.split(":", 1) - except ValueError: - user = credential - password = "" - - if type == "basic": - self.session.auth = httpx.BasicAuth(user, password) - elif type == "digest": - self.session.auth = httpx.DigestAuth(user, password) - else: - pass # TODO: HttpNtlmAuth - - def set_proxy(self, proxy: str) -> None: - if not proxy: - return - - if not proxy.startswith(PROXY_SCHEMES): - proxy = f"http://{proxy}" - - if self._proxy_cred and "@" not in proxy: - # socks5://localhost:9050 => socks5://[credential]@localhost:9050 - proxy = proxy.replace("://", f"://{self._proxy_cred}@", 1) - - self.session.proxies = {"https": proxy} - if not proxy.startswith("https://"): - self.session.proxies["http"] = proxy - - def set_proxy_auth(self, credential: str) -> None: - self._proxy_cred = credential - - # :path: is expected not to start with "/" - async def request(self, path: str, proxy: str = None) -> Response: - # TODO: request rate limit - # Pause if the request rate exceeded the maximum - # while self.is_rate_exceeded(): - # await asyncio.sleep(0.1) - - # self.increase_rate() - - err_msg = None - - # Safe quote all special characters to prevent them from being encoded - url = safequote(self._url + path if self._url else path) - parsed_url = urlparse(url) - - # Why using a loop instead of max_retries argument? Check issue #1009 - for _ in range(options["max_retries"] + 1): - try: - try: - proxy = proxy or random.choice(options["proxies"]) - self.set_proxy(proxy) - except IndexError: - pass - - if self.agents: - self.set_header("user-agent", random.choice(self.agents)) - - # Use "target" extension to avoid the URL path from being normalized - request = self.session.build_request( - options["http_method"], - # url.removesuffix(parsed_url.path), - url, - headers=self.headers, - data=options["data"], - ) - if p := parsed_url.path: - request.extensions = {"target": p.encode()} - - xresponse = await self.session.send( - request, - stream=True, - follow_redirects=options["follow_redirects"], - ) - response = await Response.create(xresponse) - await xresponse.aclose() - - log_msg = f'"{options["http_method"]} {response.url}" {response.status} - {response.length}B' - - if response.redirect: - log_msg += f" - LOCATION: {response.redirect}" - - logger.info(log_msg) - - return response - - except Exception as e: - logger.exception(e) - - if e == socket.gaierror: - err_msg = "Couldn't resolve DNS" - elif "SSLError" in str(e): - err_msg = "Unexpected SSL error" - elif "TooManyRedirects" in str(e): - err_msg = f"Too many redirects: {url}" - elif "ProxyError" in str(e): - if proxy: - err_msg = f"Error with the proxy: {proxy}" - else: - err_msg = "Error with the system proxy" - # Prevent from re-using it in the future - if proxy in options["proxies"] and len(options["proxies"]) > 1: - options["proxies"].remove(proxy) - elif "InvalidURL" in str(e): - err_msg = f"Invalid URL: {url}" - elif "InvalidProxyURL" in str(e): - err_msg = f"Invalid proxy URL: {proxy}" - elif "ConnectionError" in str(e): - err_msg = f"Cannot connect to: {urlparse(url).netloc}" - elif re.search(READ_RESPONSE_ERROR_REGEX, str(e)): - err_msg = f"Failed to read response body: {url}" - elif "Timeout" in str(e) or e in ( - httpx.ConnectTimeout, - httpx.ReadTimeout, - socket.timeout, - ): - err_msg = f"Request timeout: {url}" - else: - err_msg = f"There was a problem in the request to: {url}" - - raise RequestException(err_msg) - - def is_rate_exceeded(self): - return self._rate >= options["max_rate"] > 0 - - def decrease_rate(self): - self._rate -= 1 - - def increase_rate(self): - self._rate += 1 - - @property - @cached(RATE_UPDATE_DELAY) - def rate(self): - return self._rate diff --git a/lib/connection/asynchronous/response.py b/lib/connection/asynchronous/response.py deleted file mode 100644 index 6f97a9ab0..000000000 --- a/lib/connection/asynchronous/response.py +++ /dev/null @@ -1,67 +0,0 @@ -import httpx -from lib.core.settings import ( - DEFAULT_ENCODING, - ITER_CHUNK_SIZE, - MAX_RESPONSE_SIZE, - UNKNOWN, -) -from lib.parse.url import clean_path, parse_path -from lib.utils.common import is_binary - - -class Response: - def __init__(self, response: httpx.Response) -> None: - self.url = str(response.url) - self.full_path = parse_path(self.url) - self.path = clean_path(self.full_path) - self.status = response.status_code - self.headers = response.headers - self.redirect = self.headers.get("location") or "" - self.history = [str(res.url) for res in response.history] - self.content = "" - self.body = b"" - - @classmethod - async def create(cls, response: httpx.Response) -> "Response": - self = cls(response) - async for chunk in response.aiter_bytes(chunk_size=ITER_CHUNK_SIZE): - self.body += chunk - - if len(self.body) >= MAX_RESPONSE_SIZE or ( - "content-length" in self.headers and is_binary(self.body) - ): - break - - if not is_binary(self.body): - try: - self.content = self.body.decode( - response.encoding or DEFAULT_ENCODING, errors="ignore" - ) - except LookupError: - self.content = self.body.decode(DEFAULT_ENCODING, errors="ignore") - - return self - - @property - def type(self) -> str: - if "content-type" in self.headers: - return self.headers.get("content-type").split(";")[0] - - return UNKNOWN - - @property - def length(self) -> int: - try: - return int(self.headers.get("content-length")) - except TypeError: - return len(self.body) - - def __hash__(self) -> int: - return hash(self.body) - - def __eq__(self, other: object) -> bool: - return (self.status, self.body, self.redirect) == ( - other.status, - other.body, - other.redirect, - ) diff --git a/lib/connection/requester.py b/lib/connection/requester.py index 366253dba..c768c4d79 100755 --- a/lib/connection/requester.py +++ b/lib/connection/requester.py @@ -16,33 +16,35 @@ # # Author: Mauro Soria +import asyncio import http.client -import socket import random import re -import requests +import socket import threading import time +from urllib.parse import urlparse +import httpx +import requests from requests.auth import AuthBase, HTTPBasicAuth, HTTPDigestAuth from requests.packages import urllib3 from requests_ntlm import HttpNtlmAuth from requests_toolbelt.adapters.socket_options import SocketOptionsAdapter -from urllib.parse import urlparse +from lib.connection.dns import cached_getaddrinfo +from lib.connection.response import AsyncResponse, Response from lib.core.data import options from lib.core.decorators import cached from lib.core.exceptions import RequestException from lib.core.logger import logger from lib.core.settings import ( + PROXY_SCHEMES, RATE_UPDATE_DELAY, READ_RESPONSE_ERROR_REGEX, SCRIPT_PATH, - PROXY_SCHEMES, ) from lib.core.structures import CaseInsensitiveDict -from lib.connection.dns import cached_getaddrinfo -from lib.connection.response import Response from lib.utils.common import safequote from lib.utils.file import FileUtils from lib.utils.mimetype import guess_mimetype @@ -62,19 +64,37 @@ def __call__(self, request): return request -class Requester: +class HTTPXBearerAuth(httpx.Auth): + def __init__(self, token: str) -> None: + self.token = token + + def auth_flow(self, request: httpx.Request) -> any: + request.headers["Authorization"] = f"Bearer {self.token}" + yield request + + +class BaseRequester: def __init__(self): self._url = None self._proxy_cred = None self._rate = 0 self.headers = CaseInsensitiveDict(options["headers"]) self.agents = [] - self.session = requests.Session() - self.session.verify = False - self.session.cert = ( - options["cert_file"], - options["key_file"], - ) + self.session = None + + self._cert = None + if options["cert_file"] and options["key_file"]: + self._cert = (options["cert_file"], options["key_file"]) + + self._socket_options = [] + if options["network_interface"]: + self._socket_options.append( + ( + socket.SOL_SOCKET, + socket.SO_BINDTODEVICE, + options["network_interface"].encode("utf-8"), + ) + ) if options["random_agents"]: self._fetch_agents() @@ -83,11 +103,58 @@ def __init__(self): if options["data"] and "content-type" not in self.headers: self.set_header("content-type", guess_mimetype(options["data"])) - socket_options = [] - if options["network_interface"]: - socket_options.append( - (socket.SOL_SOCKET, socket.SO_BINDTODEVICE, options["network_interface"].encode("utf-8")) - ) + def _fetch_agents(self) -> None: + self.agents = FileUtils.get_lines( + FileUtils.build_path(SCRIPT_PATH, "db", "user-agents.txt") + ) + + def set_url(self, url: str) -> None: + self._url = url + + def set_header(self, key: str, value: str) -> None: + self.headers[key] = value.lstrip() + + def set_proxy(self, proxy: str) -> None: + if not proxy: + return + + if not proxy.startswith(PROXY_SCHEMES): + proxy = f"http://{proxy}" + + if self._proxy_cred and "@" not in proxy: + # socks5://localhost:9050 => socks5://[credential]@localhost:9050 + proxy = proxy.replace("://", f"://{self._proxy_cred}@", 1) + + self.session.proxies = {"https": proxy} + if not proxy.startswith("https://"): + self.session.proxies["http"] = proxy + + def set_proxy_auth(self, credential: str) -> None: + self._proxy_cred = credential + + def is_rate_exceeded(self): + return self._rate >= options["max_rate"] > 0 + + def decrease_rate(self): + self._rate -= 1 + + def increase_rate(self): + self._rate += 1 + threading.Timer(1, self.decrease_rate).start() + + @property + @cached(RATE_UPDATE_DELAY) + def rate(self): + return self._rate + + +class Requester(BaseRequester): + def __init__(self): + super().__init__() + + self.session = requests.Session() + self.session.verify = False + self.session.cert = self._cert for scheme in ("http://", "https://"): self.session.mount( @@ -95,21 +162,10 @@ def __init__(self): SocketOptionsAdapter( max_retries=0, pool_maxsize=options["thread_count"], - socket_options=socket_options, - ) + socket_options=self._socket_options, + ), ) - def _fetch_agents(self): - self.agents = FileUtils.get_lines( - FileUtils.build_path(SCRIPT_PATH, "db", "user-agents.txt") - ) - - def set_url(self, url): - self._url = url - - def set_header(self, key, value): - self.headers[key] = value.lstrip() - def set_auth(self, type, credential): if type in ("bearer", "jwt"): self.session.auth = HTTPBearerAuth(credential) @@ -127,24 +183,6 @@ def set_auth(self, type, credential): else: self.session.auth = HttpNtlmAuth(user, password) - def set_proxy(self, proxy): - if not proxy: - return - - if not proxy.startswith(PROXY_SCHEMES): - proxy = f"http://{proxy}" - - if self._proxy_cred and "@" not in proxy: - # socks5://localhost:9050 => socks5://[credential]@localhost:9050 - proxy = proxy.replace("://", f"://{self._proxy_cred}@", 1) - - self.session.proxies = {"https": proxy} - if not proxy.startswith("https://"): - self.session.proxies["http"] = proxy - - def set_proxy_auth(self, credential): - self._proxy_cred = credential - # :path: is expected not to start with "/" def request(self, path, proxy=None): # Pause if the request rate exceeded the maximum @@ -211,7 +249,7 @@ def request(self, path, proxy=None): if proxy: err_msg = f"Error with the proxy: {proxy}" else: - err_msg = f"Error with the system proxy" + err_msg = "Error with the system proxy" # Prevent from re-using it in the future if proxy in options["proxies"] and len(options["proxies"]) > 1: options["proxies"].remove(proxy) @@ -229,23 +267,130 @@ def request(self, path, proxy=None): ): err_msg = f"Request timeout: {url}" else: - err_msg = ( - f"There was a problem in the request to: {url}" - ) + err_msg = f"There was a problem in the request to: {url}" raise RequestException(err_msg) - def is_rate_exceeded(self): - return self._rate >= options["max_rate"] > 0 - def decrease_rate(self): - self._rate -= 1 +class AsyncRequester(BaseRequester): + def __init__(self): + super().__init__() - def increase_rate(self): - self._rate += 1 - threading.Timer(1, self.decrease_rate).start() + transport = httpx.AsyncHTTPTransport( + # FIXME: max_connections != thread_count + limits=httpx.Limits(max_connections=options["thread_count"]), + socket_options=self._socket_options, + ) - @property - @cached(RATE_UPDATE_DELAY) - def rate(self): - return self._rate + self.session = httpx.AsyncClient( + verify=False, + cert=self._cert, + mounts={"http://": transport, "https://": transport}, + timeout=httpx.Timeout(options["timeout"]), + ) + + def set_auth(self, type: str, credential: str) -> None: + if type in ("bearer", "jwt"): + self.session.auth = HTTPXBearerAuth(credential) + else: + try: + user, password = credential.split(":", 1) + except ValueError: + user = credential + password = "" + + if type == "basic": + self.session.auth = httpx.BasicAuth(user, password) + elif type == "digest": + self.session.auth = httpx.DigestAuth(user, password) + else: + pass # TODO: HttpNtlmAuth + + # :path: is expected not to start with "/" + async def request(self, path: str, proxy: str = None) -> AsyncResponse: + while self.is_rate_exceeded(): + await asyncio.sleep(0.1) + + self.increase_rate() + + err_msg = None + + # Safe quote all special characters to prevent them from being encoded + url = safequote(self._url + path if self._url else path) + parsed_url = urlparse(url) + + # Why using a loop instead of max_retries argument? Check issue #1009 + for _ in range(options["max_retries"] + 1): + try: + try: + proxy = proxy or random.choice(options["proxies"]) + self.set_proxy(proxy) + except IndexError: + pass + + if self.agents: + self.set_header("user-agent", random.choice(self.agents)) + + # Use "target" extension to avoid the URL path from being normalized + request = self.session.build_request( + options["http_method"], + # url.removesuffix(parsed_url.path), + url, + headers=self.headers, + data=options["data"], + ) + if p := parsed_url.path: + request.extensions = {"target": p.encode()} + + xresponse = await self.session.send( + request, + stream=True, + follow_redirects=options["follow_redirects"], + ) + response = await AsyncResponse.create(xresponse) + await xresponse.aclose() + + log_msg = f'"{options["http_method"]} {response.url}" {response.status} - {response.length}B' + + if response.redirect: + log_msg += f" - LOCATION: {response.redirect}" + + logger.info(log_msg) + + return response + + except Exception as e: + logger.exception(e) + + if e == socket.gaierror: + err_msg = "Couldn't resolve DNS" + elif "SSLError" in str(e): + err_msg = "Unexpected SSL error" + elif "TooManyRedirects" in str(e): + err_msg = f"Too many redirects: {url}" + elif "ProxyError" in str(e): + if proxy: + err_msg = f"Error with the proxy: {proxy}" + else: + err_msg = "Error with the system proxy" + # Prevent from re-using it in the future + if proxy in options["proxies"] and len(options["proxies"]) > 1: + options["proxies"].remove(proxy) + elif "InvalidURL" in str(e): + err_msg = f"Invalid URL: {url}" + elif "InvalidProxyURL" in str(e): + err_msg = f"Invalid proxy URL: {proxy}" + elif "ConnectionError" in str(e): + err_msg = f"Cannot connect to: {urlparse(url).netloc}" + elif re.search(READ_RESPONSE_ERROR_REGEX, str(e)): + err_msg = f"Failed to read response body: {url}" + elif "Timeout" in str(e) or e in ( + httpx.ConnectTimeout, + httpx.ReadTimeout, + socket.timeout, + ): + err_msg = f"Request timeout: {url}" + else: + err_msg = f"There was a problem in the request to: {url}" + + raise RequestException(err_msg) diff --git a/lib/connection/response.py b/lib/connection/response.py index 40823cb77..b9a56fbc9 100755 --- a/lib/connection/response.py +++ b/lib/connection/response.py @@ -16,44 +16,30 @@ # # Author: Mauro Soria +import httpx + from lib.core.settings import ( - DEFAULT_ENCODING, ITER_CHUNK_SIZE, - MAX_RESPONSE_SIZE, UNKNOWN, + DEFAULT_ENCODING, + ITER_CHUNK_SIZE, + MAX_RESPONSE_SIZE, + UNKNOWN, ) from lib.parse.url import clean_path, parse_path from lib.utils.common import is_binary -class Response: +class BaseResponse: def __init__(self, response): - self.url = response.url - self.full_path = parse_path(response.url) + self.url = str(response.url) + self.full_path = parse_path(self.url) self.path = clean_path(self.full_path) self.status = response.status_code self.headers = response.headers self.redirect = self.headers.get("location") or "" - self.history = [res.url for res in response.history] + self.history = [str(res.url) for res in response.history] self.content = "" self.body = b"" - for chunk in response.iter_content(chunk_size=ITER_CHUNK_SIZE): - self.body += chunk - - if len(self.body) >= MAX_RESPONSE_SIZE or ( - "content-length" in self.headers and is_binary(self.body) - ): - break - - if not is_binary(self.body): - try: - self.content = self.body.decode( - response.encoding or DEFAULT_ENCODING, errors="ignore" - ) - except LookupError: - self.content = self.body.decode( - DEFAULT_ENCODING, errors="ignore" - ) - @property def type(self): if "content-type" in self.headers: @@ -77,3 +63,47 @@ def __eq__(self, other): other.body, other.redirect, ) + + +class Response(BaseResponse): + def __init__(self, response): + super().__init__(response) + + for chunk in response.iter_content(chunk_size=ITER_CHUNK_SIZE): + self.body += chunk + + if len(self.body) >= MAX_RESPONSE_SIZE or ( + "content-length" in self.headers and is_binary(self.body) + ): + break + + if not is_binary(self.body): + try: + self.content = self.body.decode( + response.encoding or DEFAULT_ENCODING, errors="ignore" + ) + except LookupError: + self.content = self.body.decode(DEFAULT_ENCODING, errors="ignore") + + +class AsyncResponse(BaseResponse): + @classmethod + async def create(cls, response: httpx.Response) -> "AsyncResponse": + self = cls(response) + async for chunk in response.aiter_bytes(chunk_size=ITER_CHUNK_SIZE): + self.body += chunk + + if len(self.body) >= MAX_RESPONSE_SIZE or ( + "content-length" in self.headers and is_binary(self.body) + ): + break + + if not is_binary(self.body): + try: + self.content = self.body.decode( + response.encoding or DEFAULT_ENCODING, errors="ignore" + ) + except LookupError: + self.content = self.body.decode(DEFAULT_ENCODING, errors="ignore") + + return self diff --git a/lib/controller/controller.py b/lib/controller/controller.py index 16fe1a42c..b6b4802e0 100755 --- a/lib/controller/controller.py +++ b/lib/controller/controller.py @@ -28,7 +28,6 @@ from urllib.parse import urlparse from lib.connection.dns import cache_dns -from lib.connection.requester import Requester from lib.core.data import blacklists, options from lib.core.decorators import locked from lib.core.dictionary import Dictionary, get_blacklists @@ -72,8 +71,8 @@ from lib.view.terminal import interface if options["async_mode"]: - from lib.connection.asynchronous.requester import Requester - from lib.core.asynchronous.fuzzer import Fuzzer + from lib.connection.requester import AsyncRequester as Requester + from lib.core.fuzzer import AsyncFuzzer as Fuzzer else: from lib.connection.requester import Requester from lib.core.fuzzer import Fuzzer diff --git a/lib/core/asynchronous/__init__.py b/lib/core/asynchronous/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/lib/core/asynchronous/fuzzer.py b/lib/core/asynchronous/fuzzer.py deleted file mode 100644 index ae2909bba..000000000 --- a/lib/core/asynchronous/fuzzer.py +++ /dev/null @@ -1,230 +0,0 @@ -import asyncio -import re -import threading -from typing import Callable, Generator - -from lib.connection.asynchronous.requester import Requester -from lib.connection.asynchronous.response import Response -from lib.core.asynchronous.scanner import Scanner -from lib.core.data import blacklists, options -from lib.core.dictionary import Dictionary -from lib.core.exceptions import RequestException -from lib.core.logger import logger -from lib.core.settings import ( - DEFAULT_TEST_PREFIXES, - DEFAULT_TEST_SUFFIXES, - WILDCARD_TEST_POINT_MARKER, -) -from lib.parse.url import clean_path -from lib.utils.common import human_size, lstrip_once -from lib.utils.crawl import Crawler - - -class Fuzzer: - def __init__( - self, - requester: Requester, - dictionary: Dictionary, - *, - match_callbacks: tuple[Callable] = (), - not_found_callbacks: tuple[Callable] = (), - error_callbacks: tuple[Callable] = (), - ) -> None: - self._scanned = set() - self._requester = requester - self._dictionary = dictionary - self._play_event = asyncio.Event() - self._base_path = None - self.exc = None - self.match_callbacks = match_callbacks - self.not_found_callbacks = not_found_callbacks - self.error_callbacks = error_callbacks - self._background_tasks = set() - self.sem = asyncio.Semaphore(options["thread_count"]) - - async def setup_scanners(self) -> None: - self.scanners = { - "default": {}, - "prefixes": {}, - "suffixes": {}, - } - - # Default scanners (wildcard testers) - self.scanners["default"].update( - { - "index": await Scanner.create(self._requester, path=self._base_path), - "random": await Scanner.create( - self._requester, path=self._base_path + WILDCARD_TEST_POINT_MARKER - ), - } - ) - - if options["exclude_response"]: - self.scanners["default"]["custom"] = await Scanner.create( - self._requester, tested=self.scanners, path=options["exclude_response"] - ) - - for prefix in options["prefixes"] + DEFAULT_TEST_PREFIXES: - self.scanners["prefixes"][prefix] = await Scanner.create( - self._requester, - tested=self.scanners, - path=f"{self._base_path}{prefix}{WILDCARD_TEST_POINT_MARKER}", - context=f"/{self._base_path}{prefix}***", - ) - - for suffix in options["suffixes"] + DEFAULT_TEST_SUFFIXES: - self.scanners["suffixes"][suffix] = await Scanner.create( - self._requester, - tested=self.scanners, - path=f"{self._base_path}{WILDCARD_TEST_POINT_MARKER}{suffix}", - context=f"/{self._base_path}***{suffix}", - ) - - for extension in options["extensions"]: - if "." + extension not in self.scanners["suffixes"]: - self.scanners["suffixes"]["." + extension] = await Scanner.create( - self._requester, - tested=self.scanners, - path=f"{self._base_path}{WILDCARD_TEST_POINT_MARKER}.{extension}", - context=f"/{self._base_path}***.{extension}", - ) - - def get_scanners_for(self, path: str) -> Generator: - # Clean the path, so can check for extensions/suffixes - path = clean_path(path) - - for prefix in self.scanners["prefixes"]: - if path.startswith(prefix): - yield self.scanners["prefixes"][prefix] - - for suffix in self.scanners["suffixes"]: - if path.endswith(suffix): - yield self.scanners["suffixes"][suffix] - - for scanner in self.scanners["default"].values(): - yield scanner - - async def start(self) -> None: - await self.setup_scanners() - self.play() - - for _ in range(len(self._dictionary)): - task = asyncio.create_task(self.task_proc()) - self._background_tasks.add(task) - task.add_done_callback(self._background_tasks.discard) - - await asyncio.gather(*self._background_tasks, return_exceptions=True) - - def is_finished(self) -> bool: - if self.exc: - raise self.exc - - return len(self._background_tasks) == 0 - - def play(self) -> None: - self._play_event.set() - - def pause(self) -> None: - self._play_event.clear() - - def quit(self) -> None: - for task in self._background_tasks: - task.cancel() - - async def scan(self, path: str, scanners: Generator) -> None: - # Avoid scanned paths from being re-scanned - if path in self._scanned: - return - else: - self._scanned.add(path) - - response = await self._requester.request(path) - - if self.is_excluded(response): - for callback in self.not_found_callbacks: - callback(response) - return - - for tester in scanners: - # Check if the response is unique, not wildcard - if not tester.check(path, response): - for callback in self.not_found_callbacks: - callback(response) - return - - try: - for callback in self.match_callbacks: - callback(response) - except Exception as e: - self.exc = e - - if options["crawl"]: - logger.info(f'THREAD-{threading.get_ident()}: crawling "/{path}"') - for path_ in Crawler.crawl(response): - if self._dictionary.is_valid(path_): - logger.info( - f'THREAD-{threading.get_ident()}: found new path "/{path_}" in /{path}' - ) - await self.scan(path_, self.get_scanners_for(path_)) - - def is_excluded(self, resp: Response) -> bool: - """Validate the response by different filters""" - - if resp.status in options["exclude_status_codes"]: - return True - - if ( - options["include_status_codes"] - and resp.status not in options["include_status_codes"] - ): - return True - - if resp.status in blacklists and any( - resp.path.endswith(lstrip_once(suffix, "/")) - for suffix in blacklists.get(resp.status) - ): - return True - - if human_size(resp.length).rstrip() in options["exclude_sizes"]: - return True - - if resp.length < options["minimum_response_size"]: - return True - - if resp.length > options["maximum_response_size"] > 0: - return True - - if any(text in resp.content for text in options["exclude_texts"]): - return True - - if options["exclude_regex"] and re.search( - options["exclude_regex"], resp.content - ): - return True - - if options["exclude_redirect"] and ( - options["exclude_redirect"] in resp.redirect - or re.search(options["exclude_redirect"], resp.redirect) - ): - return True - - return False - - def set_base_path(self, path: str) -> None: - self._base_path = path - - async def task_proc(self) -> None: - async with self.sem: - await self._play_event.wait() - - try: - path = next(self._dictionary) - scanners = self.get_scanners_for(path) - await self.scan(self._base_path + path, scanners) - except StopIteration: - pass - except RequestException as e: - for callback in self.error_callbacks: - callback(e) - finally: - await asyncio.sleep(options["delay"]) diff --git a/lib/core/asynchronous/scanner.py b/lib/core/asynchronous/scanner.py deleted file mode 100644 index ac907e879..000000000 --- a/lib/core/asynchronous/scanner.py +++ /dev/null @@ -1,158 +0,0 @@ -import asyncio -import re -from typing import Optional -from urllib.parse import unquote - -from lib.connection.asynchronous.requester import Requester -from lib.connection.response import Response -from lib.core.data import options -from lib.core.logger import logger -from lib.core.settings import ( - REFLECTED_PATH_MARKER, - TEST_PATH_LENGTH, - WILDCARD_TEST_POINT_MARKER, -) -from lib.parse.url import clean_path -from lib.utils.diff import DynamicContentParser, generate_matching_regex -from lib.utils.random import rand_string - - -class Scanner: - def __init__( - self, requester: Requester, path: str, tested: dict, context: str - ) -> None: - self.path = path - self.tested = tested - self.context = context - self.requester = requester - self.response = None - self.wildcard_redirect_regex = None - - @classmethod - async def create( - cls, - requester: Requester, - *, - path: str = "", - tested: dict = {}, - context: str = "all cases", - ) -> "Scanner": - self = cls(requester, path=path, tested=tested, context=context) - await self.setup() - return self - - async def setup(self) -> None: - """ - Generate wildcard response information containers, this will be - used to compare with other path responses - """ - - first_path = self.path.replace( - WILDCARD_TEST_POINT_MARKER, - rand_string(TEST_PATH_LENGTH), - ) - first_response = await self.requester.request(first_path) - self.response = first_response - await asyncio.sleep(options["delay"]) - - duplicate = self._get_duplicate(first_response) - # Another test was performed before and has the same response as this - if duplicate: - self.content_parser = duplicate.content_parser - self.wildcard_redirect_regex = duplicate.wildcard_redirect_regex - logger.debug(f'Skipped the second test for "{self.context}"') - return - - second_path = self.path.replace( - WILDCARD_TEST_POINT_MARKER, - rand_string(TEST_PATH_LENGTH, omit=first_path), - ) - second_response = await self.requester.request(second_path) - await asyncio.sleep(options["delay"]) - - if first_response.redirect and second_response.redirect: - self.wildcard_redirect_regex = self.generate_redirect_regex( - clean_path(first_response.redirect), - first_path, - clean_path(second_response.redirect), - second_path, - ) - logger.debug( - f'Pattern (regex) to detect wildcard redirects for "{self.context}": {self.wildcard_redirect_regex}' - ) - - self.content_parser = DynamicContentParser( - first_response.content, second_response.content - ) - - def check(self, path: str, response: Response) -> bool: - """ - Perform analyzing to see if the response is wildcard or not - """ - - if self.response.status != response.status: - return True - - # Read from line 129 to 138 to understand the workflow of this. - if self.wildcard_redirect_regex and response.redirect: - # - unquote(): Sometimes, some path characters get encoded or decoded in the response redirect - # but it's still a wildcard redirect, so unquote everything to prevent false positives - # - clean_path(): Get rid of queries and DOM in URL because of weird behaviours could happen - # with them, so messy that I give up on finding a way to test them - path = unquote(clean_path(path)) - redirect = unquote(clean_path(response.redirect)) - regex_to_compare = self.wildcard_redirect_regex.replace( - REFLECTED_PATH_MARKER, re.escape(path) - ) - is_wildcard_redirect = re.match(regex_to_compare, redirect, re.IGNORECASE) - - # If redirection doesn't match the rule, mark as found - if not is_wildcard_redirect: - logger.debug( - f'"{redirect}" doesn\'t match the regular expression "{regex_to_compare}", passing' - ) - return True - - if self._is_wildcard(response): - return False - - return True - - def _get_duplicate(self, response: Response) -> Optional["Scanner"]: - for category in self.tested: - for tester in self.tested[category].values(): - if response == tester.response: - return tester - - return None - - def _is_wildcard(self, response): - """Check if response is similar to wildcard response""" - - # Compare 2 binary responses (Response.content is empty if the body is binary) - if not self.response.content and not response.content: - return self.response.body == response.body - - return self.content_parser.compare_to(response.content) - - @staticmethod - def generate_redirect_regex(first_loc, first_path, second_loc, second_path): - """ - From 2 redirects of wildcard responses, generate a regexp that matches - every wildcard redirect. - - How it works: - 1. Replace path in 2 redirect URLs (if it gets reflected in) with a mark - (e.g. /path1 -> /foo/path1 and /path2 -> /foo/path2 will become /foo/[mark] for both) - 2. Compare 2 redirects and generate a regex that matches both - (e.g. /foo/[mark]?a=1 and /foo/[mark]?a=2 will have the regex: ^/foo/[mark]?a=(.*)$) - 3. Next time if it redirects, replace mark in regex with the path and check if it matches - (e.g. /path3 -> /foo/path3?a=5, the regex becomes ^/foo/path3?a=(.*)$, which matches) - """ - - if first_path: - first_loc = unquote(first_loc).replace(first_path, REFLECTED_PATH_MARKER) - if second_path: - second_loc = unquote(second_loc).replace(second_path, REFLECTED_PATH_MARKER) - - return generate_matching_regex(first_loc, second_loc) diff --git a/lib/core/fuzzer.py b/lib/core/fuzzer.py index 5e78ccd37..821daba18 100755 --- a/lib/core/fuzzer.py +++ b/lib/core/fuzzer.py @@ -16,14 +16,19 @@ # # Author: Mauro Soria +import asyncio import re import threading import time +from typing import Callable, Generator +from lib.connection.requester import BaseRequester +from lib.connection.response import BaseResponse from lib.core.data import blacklists, options +from lib.core.dictionary import Dictionary from lib.core.exceptions import RequestException from lib.core.logger import logger -from lib.core.scanner import Scanner +from lib.core.scanner import AsyncScanner, Scanner from lib.core.settings import ( DEFAULT_TEST_PREFIXES, DEFAULT_TEST_SUFFIXES, @@ -34,20 +39,108 @@ from lib.utils.crawl import Crawler -class Fuzzer: - def __init__(self, requester, dictionary, **kwargs): - self._threads = [] +class BaseFuzzer: + def __init__( + self, + requester: BaseRequester, + dictionary: Dictionary, + *, + match_callbacks: tuple[Callable] = (), + not_found_callbacks: tuple[Callable] = (), + error_callbacks: tuple[Callable] = (), + ) -> None: self._scanned = set() self._requester = requester self._dictionary = dictionary + self._base_path = None + self.exc = None + self.match_callbacks = match_callbacks + self.not_found_callbacks = not_found_callbacks + self.error_callbacks = error_callbacks + + def set_base_path(self, path: str) -> None: + self._base_path = path + + def get_scanners_for(self, path: str) -> Generator: + # Clean the path, so can check for extensions/suffixes + path = clean_path(path) + + for prefix in self.scanners["prefixes"]: + if path.startswith(prefix): + yield self.scanners["prefixes"][prefix] + + for suffix in self.scanners["suffixes"]: + if path.endswith(suffix): + yield self.scanners["suffixes"][suffix] + + for scanner in self.scanners["default"].values(): + yield scanner + + def is_excluded(self, resp: BaseResponse) -> bool: + """Validate the response by different filters""" + + if resp.status in options["exclude_status_codes"]: + return True + + if ( + options["include_status_codes"] + and resp.status not in options["include_status_codes"] + ): + return True + + if resp.status in blacklists and any( + resp.path.endswith(lstrip_once(suffix, "/")) + for suffix in blacklists.get(resp.status) + ): + return True + + if human_size(resp.length).rstrip() in options["exclude_sizes"]: + return True + + if resp.length < options["minimum_response_size"]: + return True + + if resp.length > options["maximum_response_size"] > 0: + return True + + if any(text in resp.content for text in options["exclude_texts"]): + return True + + if options["exclude_regex"] and re.search( + options["exclude_regex"], resp.content + ): + return True + + if options["exclude_redirect"] and ( + options["exclude_redirect"] in resp.redirect + or re.search(options["exclude_redirect"], resp.redirect) + ): + return True + + return False + + +class Fuzzer(BaseFuzzer): + def __init__( + self, + requester: BaseRequester, + dictionary: Dictionary, + *, + match_callbacks: tuple[Callable] = (), + not_found_callbacks: tuple[Callable] = (), + error_callbacks: tuple[Callable] = (), + ) -> None: + super().__init__( + requester, + dictionary, + match_callbacks=match_callbacks, + not_found_callbacks=not_found_callbacks, + error_callbacks=error_callbacks, + ) + self._threads = [] self._play_event = threading.Event() self._quit_event = threading.Event() self._pause_semaphore = threading.Semaphore(0) - self._base_path = None - self.exc = None - self.match_callbacks = kwargs.get("match_callbacks", []) - self.not_found_callbacks = kwargs.get("not_found_callbacks", []) - self.error_callbacks = kwargs.get("error_callbacks", []) def setup_scanners(self): self.scanners = { @@ -57,10 +150,14 @@ def setup_scanners(self): } # Default scanners (wildcard testers) - self.scanners["default"].update({ - "index": Scanner(self._requester, path=self._base_path), - "random": Scanner(self._requester, path=self._base_path + WILDCARD_TEST_POINT_MARKER), - }) + self.scanners["default"].update( + { + "index": Scanner(self._requester, path=self._base_path), + "random": Scanner( + self._requester, path=self._base_path + WILDCARD_TEST_POINT_MARKER + ), + } + ) if options["exclude_response"]: self.scanners["default"]["custom"] = Scanner( @@ -69,14 +166,16 @@ def setup_scanners(self): for prefix in options["prefixes"] + DEFAULT_TEST_PREFIXES: self.scanners["prefixes"][prefix] = Scanner( - self._requester, tested=self.scanners, + self._requester, + tested=self.scanners, path=f"{self._base_path}{prefix}{WILDCARD_TEST_POINT_MARKER}", context=f"/{self._base_path}{prefix}***", ) for suffix in options["suffixes"] + DEFAULT_TEST_SUFFIXES: self.scanners["suffixes"][suffix] = Scanner( - self._requester, tested=self.scanners, + self._requester, + tested=self.scanners, path=f"{self._base_path}{WILDCARD_TEST_POINT_MARKER}{suffix}", context=f"/{self._base_path}***{suffix}", ) @@ -84,7 +183,8 @@ def setup_scanners(self): for extension in options["extensions"]: if "." + extension not in self.scanners["suffixes"]: self.scanners["suffixes"]["." + extension] = Scanner( - self._requester, tested=self.scanners, + self._requester, + tested=self.scanners, path=f"{self._base_path}{WILDCARD_TEST_POINT_MARKER}.{extension}", context=f"/{self._base_path}***.{extension}", ) @@ -98,21 +198,6 @@ def setup_threads(self): new_thread.daemon = True self._threads.append(new_thread) - def get_scanners_for(self, path): - # Clean the path, so can check for extensions/suffixes - path = clean_path(path) - - for prefix in self.scanners["prefixes"]: - if path.startswith(prefix): - yield self.scanners["prefixes"][prefix] - - for suffix in self.scanners["suffixes"]: - if path.endswith(suffix): - yield self.scanners["suffixes"][suffix] - - for scanner in self.scanners["default"].values(): - yield scanner - def start(self): self.setup_scanners() self.setup_threads() @@ -176,59 +261,11 @@ def scan(self, path, scanners): logger.info(f'THREAD-{threading.get_ident()}: crawling "/{path}"') for path_ in Crawler.crawl(response): if self._dictionary.is_valid(path_): - logger.info(f'THREAD-{threading.get_ident()}: found new path "/{path_}" in /{path}') + logger.info( + f'THREAD-{threading.get_ident()}: found new path "/{path_}" in /{path}' + ) self.scan(path_, self.get_scanners_for(path_)) - def is_excluded(self, resp): - """Validate the response by different filters""" - - if resp.status in options["exclude_status_codes"]: - return True - - if ( - options["include_status_codes"] - and resp.status not in options["include_status_codes"] - ): - return True - - if ( - resp.status in blacklists - and any( - resp.path.endswith(lstrip_once(suffix, "/")) - for suffix in blacklists.get(resp.status) - ) - ): - return True - - if human_size(resp.length).rstrip() in options["exclude_sizes"]: - return True - - if resp.length < options["minimum_response_size"]: - return True - - if resp.length > options["maximum_response_size"] > 0: - return True - - if any(text in resp.content for text in options["exclude_texts"]): - return True - - if options["exclude_regex"] and re.search(options["exclude_regex"], resp.content): - return True - - if ( - options["exclude_redirect"] - and ( - options["exclude_redirect"] in resp.redirect - or re.search(options["exclude_redirect"], resp.redirect) - ) - ): - return True - - return False - - def set_base_path(self, path): - self._base_path = path - def thread_proc(self): logger.info(f'THREAD-{threading.get_ident()} started"') @@ -258,3 +295,153 @@ def thread_proc(self): if self._quit_event.is_set(): break + + +class AsyncFuzzer(BaseFuzzer): + def __init__( + self, + requester: BaseRequester, + dictionary: Dictionary, + *, + match_callbacks: tuple[Callable] = (), + not_found_callbacks: tuple[Callable] = (), + error_callbacks: tuple[Callable] = (), + ) -> None: + super().__init__( + requester, + dictionary, + match_callbacks=match_callbacks, + not_found_callbacks=not_found_callbacks, + error_callbacks=error_callbacks, + ) + self._play_event = asyncio.Event() + self._background_tasks = set() + self.sem = asyncio.Semaphore(options["thread_count"]) + + async def setup_scanners(self) -> None: + self.scanners = { + "default": {}, + "prefixes": {}, + "suffixes": {}, + } + + # Default scanners (wildcard testers) + self.scanners["default"].update( + { + "index": await AsyncScanner.create( + self._requester, path=self._base_path + ), + "random": await AsyncScanner.create( + self._requester, path=self._base_path + WILDCARD_TEST_POINT_MARKER + ), + } + ) + + if options["exclude_response"]: + self.scanners["default"]["custom"] = await AsyncScanner.create( + self._requester, tested=self.scanners, path=options["exclude_response"] + ) + + for prefix in options["prefixes"] + DEFAULT_TEST_PREFIXES: + self.scanners["prefixes"][prefix] = await AsyncScanner.create( + self._requester, + tested=self.scanners, + path=f"{self._base_path}{prefix}{WILDCARD_TEST_POINT_MARKER}", + context=f"/{self._base_path}{prefix}***", + ) + + for suffix in options["suffixes"] + DEFAULT_TEST_SUFFIXES: + self.scanners["suffixes"][suffix] = await AsyncScanner.create( + self._requester, + tested=self.scanners, + path=f"{self._base_path}{WILDCARD_TEST_POINT_MARKER}{suffix}", + context=f"/{self._base_path}***{suffix}", + ) + + for extension in options["extensions"]: + if "." + extension not in self.scanners["suffixes"]: + self.scanners["suffixes"]["." + extension] = await AsyncScanner.create( + self._requester, + tested=self.scanners, + path=f"{self._base_path}{WILDCARD_TEST_POINT_MARKER}.{extension}", + context=f"/{self._base_path}***.{extension}", + ) + + async def start(self) -> None: + await self.setup_scanners() + self.play() + + for _ in range(len(self._dictionary)): + task = asyncio.create_task(self.task_proc()) + self._background_tasks.add(task) + task.add_done_callback(self._background_tasks.discard) + + await asyncio.gather(*self._background_tasks, return_exceptions=True) + + def is_finished(self) -> bool: + if self.exc: + raise self.exc + + return len(self._background_tasks) == 0 + + def play(self) -> None: + self._play_event.set() + + def pause(self) -> None: + self._play_event.clear() + + def quit(self) -> None: + for task in self._background_tasks: + task.cancel() + + async def scan(self, path: str, scanners: Generator) -> None: + # Avoid scanned paths from being re-scanned + if path in self._scanned: + return + else: + self._scanned.add(path) + + response = await self._requester.request(path) + + if self.is_excluded(response): + for callback in self.not_found_callbacks: + callback(response) + return + + for tester in scanners: + # Check if the response is unique, not wildcard + if not tester.check(path, response): + for callback in self.not_found_callbacks: + callback(response) + return + + try: + for callback in self.match_callbacks: + callback(response) + except Exception as e: + self.exc = e + + if options["crawl"]: + logger.info(f'THREAD-{threading.get_ident()}: crawling "/{path}"') + for path_ in Crawler.crawl(response): + if self._dictionary.is_valid(path_): + logger.info( + f'THREAD-{threading.get_ident()}: found new path "/{path_}" in /{path}' + ) + await self.scan(path_, self.get_scanners_for(path_)) + + async def task_proc(self) -> None: + async with self.sem: + await self._play_event.wait() + + try: + path = next(self._dictionary) + scanners = self.get_scanners_for(path) + await self.scan(self._base_path + path, scanners) + except StopIteration: + pass + except RequestException as e: + for callback in self.error_callbacks: + callback(e) + finally: + await asyncio.sleep(options["delay"]) diff --git a/lib/core/scanner.py b/lib/core/scanner.py index 82ade5e9e..ba25c81da 100755 --- a/lib/core/scanner.py +++ b/lib/core/scanner.py @@ -16,11 +16,14 @@ # # Author: Mauro Soria +import asyncio import re import time - +from typing import Optional from urllib.parse import unquote +from lib.connection.requester import AsyncRequester, BaseRequester, Requester +from lib.connection.response import BaseResponse from lib.core.data import options from lib.core.logger import logger from lib.core.settings import ( @@ -29,80 +32,26 @@ WILDCARD_TEST_POINT_MARKER, ) from lib.parse.url import clean_path -from lib.utils.diff import generate_matching_regex, DynamicContentParser +from lib.utils.diff import DynamicContentParser, generate_matching_regex from lib.utils.random import rand_string -class Scanner: - def __init__(self, requester, **kwargs): - self.path = kwargs.get("path", "") - self.tested = kwargs.get("tested", []) - self.context = kwargs.get("context", "all cases") +class BaseScanner: + def __init__( + self, + requester: BaseRequester, + path: str = "", + tested: dict = {}, + context: str = "all cases", + ) -> None: + self.path = path + self.tested = tested + self.context = context self.requester = requester self.response = None self.wildcard_redirect_regex = None - self.setup() - - def setup(self): - """ - Generate wildcard response information containers, this will be - used to compare with other path responses - """ - - first_path = self.path.replace( - WILDCARD_TEST_POINT_MARKER, - rand_string(TEST_PATH_LENGTH), - ) - first_response = self.requester.request(first_path) - self.response = first_response - time.sleep(options["delay"]) - - duplicate = self.get_duplicate(first_response) - # Another test was performed before and has the same response as this - if duplicate: - self.content_parser = duplicate.content_parser - self.wildcard_redirect_regex = duplicate.wildcard_redirect_regex - logger.debug(f'Skipped the second test for "{self.context}"') - return - - second_path = self.path.replace( - WILDCARD_TEST_POINT_MARKER, - rand_string(TEST_PATH_LENGTH, omit=first_path), - ) - second_response = self.requester.request(second_path) - time.sleep(options["delay"]) - - if first_response.redirect and second_response.redirect: - self.wildcard_redirect_regex = self.generate_redirect_regex( - clean_path(first_response.redirect), - first_path, - clean_path(second_response.redirect), - second_path, - ) - logger.debug(f'Pattern (regex) to detect wildcard redirects for "{self.context}": {self.wildcard_redirect_regex}') - - self.content_parser = DynamicContentParser( - first_response.content, second_response.content - ) - - def get_duplicate(self, response): - for category in self.tested: - for tester in self.tested[category].values(): - if response == tester.response: - return tester - - return None - def is_wildcard(self, response): - """Check if response is similar to wildcard response""" - - # Compare 2 binary responses (Response.content is empty if the body is binary) - if not self.response.content and not response.content: - return self.response.body == response.body - - return self.content_parser.compare_to(response.content) - - def check(self, path, response): + def check(self, path: str, response: BaseResponse) -> bool: """ Perform analyzing to see if the response is wildcard or not """ @@ -125,7 +74,9 @@ def check(self, path, response): # If redirection doesn't match the rule, mark as found if not is_wildcard_redirect: - logger.debug(f'"{redirect}" doesn\'t match the regular expression "{regex_to_compare}", passing') + logger.debug( + f'"{redirect}" doesn\'t match the regular expression "{regex_to_compare}", passing' + ) return True if self.is_wildcard(response): @@ -133,6 +84,23 @@ def check(self, path, response): return True + def get_duplicate(self, response: BaseResponse) -> Optional["BaseScanner"]: + for category in self.tested: + for tester in self.tested[category].values(): + if response == tester.response: + return tester + + return None + + def is_wildcard(self, response): + """Check if response is similar to wildcard response""" + + # Compare 2 binary responses (Response.content is empty if the body is binary) + if not self.response.content and not response.content: + return self.response.body == response.body + + return self.content_parser.compare_to(response.content) + @staticmethod def generate_redirect_regex(first_loc, first_path, second_loc, second_path): """ @@ -154,3 +122,127 @@ def generate_redirect_regex(first_loc, first_path, second_loc, second_path): second_loc = unquote(second_loc).replace(second_path, REFLECTED_PATH_MARKER) return generate_matching_regex(first_loc, second_loc) + + +class Scanner(BaseScanner): + def __init__( + self, + requester: Requester, + path: str = "", + tested: dict = {}, + context: str = "all cases", + ) -> None: + super().__init__(requester, path, tested, context) + self.setup() + + def setup(self): + """ + Generate wildcard response information containers, this will be + used to compare with other path responses + """ + + first_path = self.path.replace( + WILDCARD_TEST_POINT_MARKER, + rand_string(TEST_PATH_LENGTH), + ) + first_response = self.requester.request(first_path) + self.response = first_response + time.sleep(options["delay"]) + + duplicate = self.get_duplicate(first_response) + # Another test was performed before and has the same response as this + if duplicate: + self.content_parser = duplicate.content_parser + self.wildcard_redirect_regex = duplicate.wildcard_redirect_regex + logger.debug(f'Skipped the second test for "{self.context}"') + return + + second_path = self.path.replace( + WILDCARD_TEST_POINT_MARKER, + rand_string(TEST_PATH_LENGTH, omit=first_path), + ) + second_response = self.requester.request(second_path) + time.sleep(options["delay"]) + + if first_response.redirect and second_response.redirect: + self.wildcard_redirect_regex = self.generate_redirect_regex( + clean_path(first_response.redirect), + first_path, + clean_path(second_response.redirect), + second_path, + ) + logger.debug( + f'Pattern (regex) to detect wildcard redirects for "{self.context}": {self.wildcard_redirect_regex}' + ) + + self.content_parser = DynamicContentParser( + first_response.content, second_response.content + ) + + +class AsyncScanner(BaseScanner): + def __init__( + self, + requester: AsyncRequester, + path: str = "", + tested: dict = {}, + context: str = "all cases", + ) -> None: + super().__init__(requester, path, tested, context) + + @classmethod + async def create( + cls, + requester: AsyncRequester, + *, + path: str = "", + tested: dict = {}, + context: str = "all cases", + ) -> "Scanner": + self = cls(requester, path=path, tested=tested, context=context) + await self.setup() + return self + + async def setup(self) -> None: + """ + Generate wildcard response information containers, this will be + used to compare with other path responses + """ + + first_path = self.path.replace( + WILDCARD_TEST_POINT_MARKER, + rand_string(TEST_PATH_LENGTH), + ) + first_response = await self.requester.request(first_path) + self.response = first_response + await asyncio.sleep(options["delay"]) + + duplicate = self.get_duplicate(first_response) + # Another test was performed before and has the same response as this + if duplicate: + self.content_parser = duplicate.content_parser + self.wildcard_redirect_regex = duplicate.wildcard_redirect_regex + logger.debug(f'Skipped the second test for "{self.context}"') + return + + second_path = self.path.replace( + WILDCARD_TEST_POINT_MARKER, + rand_string(TEST_PATH_LENGTH, omit=first_path), + ) + second_response = await self.requester.request(second_path) + await asyncio.sleep(options["delay"]) + + if first_response.redirect and second_response.redirect: + self.wildcard_redirect_regex = self.generate_redirect_regex( + clean_path(first_response.redirect), + first_path, + clean_path(second_response.redirect), + second_path, + ) + logger.debug( + f'Pattern (regex) to detect wildcard redirects for "{self.context}": {self.wildcard_redirect_regex}' + ) + + self.content_parser = DynamicContentParser( + first_response.content, second_response.content + ) From ee94fa1c5b226b9be5279daf7dc1d92aea1f4b19 Mon Sep 17 00:00:00 2001 From: 4shen0ne <4shen.01@gmail.com> Date: Sun, 8 Sep 2024 00:24:32 +0800 Subject: [PATCH 04/19] add httpx to requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 0d30a9117..92f77bf94 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,3 +15,4 @@ mysql-connector-python>=8.0.20 psycopg[binary]>=3.0 requests-toolbelt>=1.0.0 setuptools>=66.0.0 +httpx>=0.27.2 From f01725bf70d679a5835f2920a74546c2383893ef Mon Sep 17 00:00:00 2001 From: 4shen0ne <4shen.01@gmail.com> Date: Sun, 8 Sep 2024 11:07:45 +0800 Subject: [PATCH 05/19] support HTTP NTLM authentication in httpx --- lib/connection/requester.py | 3 ++- requirements.txt | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/connection/requester.py b/lib/connection/requester.py index c768c4d79..b7aee41ac 100755 --- a/lib/connection/requester.py +++ b/lib/connection/requester.py @@ -30,6 +30,7 @@ from requests.auth import AuthBase, HTTPBasicAuth, HTTPDigestAuth from requests.packages import urllib3 from requests_ntlm import HttpNtlmAuth +from httpx_ntlm import HttpNtlmAuth as HttpxNtlmAuth from requests_toolbelt.adapters.socket_options import SocketOptionsAdapter from lib.connection.dns import cached_getaddrinfo @@ -304,7 +305,7 @@ def set_auth(self, type: str, credential: str) -> None: elif type == "digest": self.session.auth = httpx.DigestAuth(user, password) else: - pass # TODO: HttpNtlmAuth + self.session.auth = HttpxNtlmAuth(user, password) # :path: is expected not to start with "/" async def request(self, path: str, proxy: str = None) -> AsyncResponse: diff --git a/requirements.txt b/requirements.txt index 92f77bf94..be90c2aef 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,3 +16,4 @@ psycopg[binary]>=3.0 requests-toolbelt>=1.0.0 setuptools>=66.0.0 httpx>=0.27.2 +httpx-ntlm>=1.4.0 From 01ff69c31f8110d418efc0c6add79c8f030957fe Mon Sep 17 00:00:00 2001 From: 4shen0ne <4shen.01@gmail.com> Date: Sun, 8 Sep 2024 11:23:00 +0800 Subject: [PATCH 06/19] update change log --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d9460881a..4677bafa5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - Support non-default network interface - Remove unused dependencies (urllib3, cryptography, cffi, idna, chardet) - Load targets from a Nmap XML report +- Added --async option to enable asynchronous mode (use coroutines instead of threads) ## [0.4.3] - October 2nd, 2022 - Automatically detect the URI scheme (`http` or `https`) if no scheme is provided From 96ed5d8e436d85bd58c2493c1867c744e1442ff7 Mon Sep 17 00:00:00 2001 From: 4shen0ne <4shen.01@gmail.com> Date: Sun, 8 Sep 2024 13:23:57 +0800 Subject: [PATCH 07/19] add AsyncRequester's increase_rate --- lib/connection/requester.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/connection/requester.py b/lib/connection/requester.py index b7aee41ac..6606179d8 100755 --- a/lib/connection/requester.py +++ b/lib/connection/requester.py @@ -335,7 +335,6 @@ async def request(self, path: str, proxy: str = None) -> AsyncResponse: # Use "target" extension to avoid the URL path from being normalized request = self.session.build_request( options["http_method"], - # url.removesuffix(parsed_url.path), url, headers=self.headers, data=options["data"], @@ -395,3 +394,7 @@ async def request(self, path: str, proxy: str = None) -> AsyncResponse: err_msg = f"There was a problem in the request to: {url}" raise RequestException(err_msg) + + def increase_rate(self) -> None: + self._rate += 1 + asyncio.get_running_loop().call_later(1, self.decrease_rate) From 558f959d9e789bba00c88772391a6d9e773ccf88 Mon Sep 17 00:00:00 2001 From: 4shen0ne <4shen.01@gmail.com> Date: Sun, 8 Sep 2024 14:17:46 +0800 Subject: [PATCH 08/19] fix bugs --- lib/connection/requester.py | 33 +++++++++++++++++++++++++-------- lib/controller/controller.py | 25 +++++++++++++++---------- 2 files changed, 40 insertions(+), 18 deletions(-) diff --git a/lib/connection/requester.py b/lib/connection/requester.py index 6606179d8..4aff0ed78 100755 --- a/lib/connection/requester.py +++ b/lib/connection/requester.py @@ -277,19 +277,39 @@ class AsyncRequester(BaseRequester): def __init__(self): super().__init__() + proxy = None + try: + proxy = self.parse_proxy(random.choice(options["proxies"])) + except IndexError: + pass + transport = httpx.AsyncHTTPTransport( - # FIXME: max_connections != thread_count + verify=False, + cert=self._cert, limits=httpx.Limits(max_connections=options["thread_count"]), + # FIXME: proxy will not change when retry request + proxy=proxy, socket_options=self._socket_options, ) self.session = httpx.AsyncClient( - verify=False, - cert=self._cert, mounts={"http://": transport, "https://": transport}, timeout=httpx.Timeout(options["timeout"]), ) + def parse_proxy(self, proxy: str): + if not proxy: + return None + + if not proxy.startswith(PROXY_SCHEMES): + proxy = f"http://{proxy}" + + if self._proxy_cred and "@" not in proxy: + # socks5://localhost:9050 => socks5://[credential]@localhost:9050 + proxy = proxy.replace("://", f"://{self._proxy_cred}@", 1) + + return proxy + def set_auth(self, type: str, credential: str) -> None: if type in ("bearer", "jwt"): self.session.auth = HTTPXBearerAuth(credential) @@ -323,11 +343,8 @@ async def request(self, path: str, proxy: str = None) -> AsyncResponse: # Why using a loop instead of max_retries argument? Check issue #1009 for _ in range(options["max_retries"] + 1): try: - try: - proxy = proxy or random.choice(options["proxies"]) - self.set_proxy(proxy) - except IndexError: - pass + # FIXME: set proxy here is not work + # https://github.com/encode/httpx/discussions/3183 if self.agents: self.set_header("user-agent", random.choice(self.agents)) diff --git a/lib/controller/controller.py b/lib/controller/controller.py index b6b4802e0..37e41da03 100755 --- a/lib/controller/controller.py +++ b/lib/controller/controller.py @@ -305,20 +305,20 @@ def start(self): async def _start_coroutines(self): task = self.loop.create_task(self.fuzzer.start()) - done, _ = await asyncio.wait( - [self.done_future, task], - timeout=options["max_time"] if options["max_time"] > 0 else None, - return_when=asyncio.FIRST_COMPLETED, - ) + max_time = options["max_time"] if options["max_time"] > 0 else None + try: + async with asyncio.timeout(max_time): + await asyncio.wait( + [self.done_future, task], + return_when=asyncio.FIRST_COMPLETED, + ) + except asyncio.TimeoutError: + raise SkipTargetInterrupt("Runtime exceeded the maximum set by the user") if self.done_future.done(): task.cancel() await self.done_future # propagate the exception, if raised - # TODO: find a better way to catch TimeoutError - if len(done) == 0: - raise SkipTargetInterrupt("Runtime exceeded the maximum set by the user") - def set_target(self, url): # If no scheme specified, unset it first if "://" not in url: @@ -497,7 +497,12 @@ def match_callback(self, response): if options["replay_proxy"]: # Replay the request with new proxy - self.requester.request(response.full_path, proxy=options["replay_proxy"]) + if options["async_mode"]: + # FIXME: httpx does not currently allow setting proxies per-request + # self.loop.create_task(self.requester.request(response.full_path, proxy=options["replay_proxy"])) + pass + else: + self.requester.request(response.full_path, proxy=options["replay_proxy"]) if self.report: self.results.append(response) From d8522ad1c4d4b447059992d4ac0e848db4e8b871 Mon Sep 17 00:00:00 2001 From: 4shen0ne <4shen.01@gmail.com> Date: Tue, 10 Sep 2024 09:48:45 +0800 Subject: [PATCH 09/19] work with python3.9 --- lib/controller/controller.py | 8 +++++--- lib/core/fuzzer.py | 26 ++++++++++++++------------ 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/lib/controller/controller.py b/lib/controller/controller.py index 37e41da03..efe49050f 100755 --- a/lib/controller/controller.py +++ b/lib/controller/controller.py @@ -307,11 +307,13 @@ async def _start_coroutines(self): task = self.loop.create_task(self.fuzzer.start()) max_time = options["max_time"] if options["max_time"] > 0 else None try: - async with asyncio.timeout(max_time): - await asyncio.wait( + await asyncio.wait_for( + asyncio.wait( [self.done_future, task], return_when=asyncio.FIRST_COMPLETED, - ) + ), + timeout=max_time, + ) except asyncio.TimeoutError: raise SkipTargetInterrupt("Runtime exceeded the maximum set by the user") diff --git a/lib/core/fuzzer.py b/lib/core/fuzzer.py index 821daba18..f692726f2 100755 --- a/lib/core/fuzzer.py +++ b/lib/core/fuzzer.py @@ -20,7 +20,7 @@ import re import threading import time -from typing import Callable, Generator +from typing import Callable, Generator, Tuple from lib.connection.requester import BaseRequester from lib.connection.response import BaseResponse @@ -45,9 +45,9 @@ def __init__( requester: BaseRequester, dictionary: Dictionary, *, - match_callbacks: tuple[Callable] = (), - not_found_callbacks: tuple[Callable] = (), - error_callbacks: tuple[Callable] = (), + match_callbacks: Tuple[Callable] = (), + not_found_callbacks: Tuple[Callable] = (), + error_callbacks: Tuple[Callable] = (), ) -> None: self._scanned = set() self._requester = requester @@ -126,9 +126,9 @@ def __init__( requester: BaseRequester, dictionary: Dictionary, *, - match_callbacks: tuple[Callable] = (), - not_found_callbacks: tuple[Callable] = (), - error_callbacks: tuple[Callable] = (), + match_callbacks: Tuple[Callable] = (), + not_found_callbacks: Tuple[Callable] = (), + error_callbacks: Tuple[Callable] = (), ) -> None: super().__init__( requester, @@ -303,9 +303,9 @@ def __init__( requester: BaseRequester, dictionary: Dictionary, *, - match_callbacks: tuple[Callable] = (), - not_found_callbacks: tuple[Callable] = (), - error_callbacks: tuple[Callable] = (), + match_callbacks: Tuple[Callable] = (), + not_found_callbacks: Tuple[Callable] = (), + error_callbacks: Tuple[Callable] = (), ) -> None: super().__init__( requester, @@ -316,7 +316,6 @@ def __init__( ) self._play_event = asyncio.Event() self._background_tasks = set() - self.sem = asyncio.Semaphore(options["thread_count"]) async def setup_scanners(self) -> None: self.scanners = { @@ -368,6 +367,9 @@ async def setup_scanners(self) -> None: ) async def start(self) -> None: + # In Python 3.9, initialize the Semaphore within the coroutine + # to avoid binding to a different event loop. + self.sem = asyncio.Semaphore(options["thread_count"]) await self.setup_scanners() self.play() @@ -376,7 +378,7 @@ async def start(self) -> None: self._background_tasks.add(task) task.add_done_callback(self._background_tasks.discard) - await asyncio.gather(*self._background_tasks, return_exceptions=True) + await asyncio.gather(*self._background_tasks) def is_finished(self) -> bool: if self.exc: From ab630e8fd437dc3ea27f0743f431d978741002f2 Mon Sep 17 00:00:00 2001 From: 4shen0ne <4shen.01@gmail.com> Date: Tue, 10 Sep 2024 11:24:23 +0800 Subject: [PATCH 10/19] lint --- lib/controller/controller.py | 3 +-- lib/parse/cmdline.py | 1 - lib/parse/nmap.py | 3 ++- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/controller/controller.py b/lib/controller/controller.py index efe49050f..76531fba9 100755 --- a/lib/controller/controller.py +++ b/lib/controller/controller.py @@ -39,7 +39,6 @@ QuitInterrupt, UnpicklingError, ) -from lib.core.fuzzer import Fuzzer from lib.core.logger import enable_logging, logger from lib.core.settings import ( BANNER, @@ -174,7 +173,7 @@ def setup(self): ) exit(1) - if options["autosave_report"] and not options["output"] : + if options["autosave_report"] and not options["output"]: self.report_path = options["output_path"] or FileUtils.build_path( SCRIPT_PATH, "reports" ) diff --git a/lib/parse/cmdline.py b/lib/parse/cmdline.py index 0f38ce39e..5de7bab3e 100755 --- a/lib/parse/cmdline.py +++ b/lib/parse/cmdline.py @@ -459,7 +459,6 @@ def parse_arguments(): connection.add_option("--ip", action="store", dest="ip", help="Server IP address") connection.add_option("--interface", action="store", dest="network_interface", help="Network interface to use") - # Advanced Settings advanced = OptionGroup(parser, "Advanced Settings") advanced.add_option( diff --git a/lib/parse/nmap.py b/lib/parse/nmap.py index 7ff9809ec..b1223fc27 100644 --- a/lib/parse/nmap.py +++ b/lib/parse/nmap.py @@ -1,5 +1,6 @@ import xml.etree.ElementTree as ET + def parse_nmap(file): root = ET.parse(file).getroot() targets = [] @@ -12,7 +13,7 @@ def parse_nmap(file): f"{hostname}:{port.get('portid')}" for port in host.find("ports").iter("port") if ( - port.get("protocol") == "tcp" # UDP is not used in HTTP because it is not a "reliable transport" + port.get("protocol") == "tcp" # UDP is not used in HTTP because it is not a "reliable transport" and port.find("state").get("state") == "open" and port.find("service").get("name") in ["http", "unknown"] ) From 70f3496c4c792c16667a47d7401d2404e46d9c87 Mon Sep 17 00:00:00 2001 From: 4shen0ne <4shen.01@gmail.com> Date: Tue, 10 Sep 2024 14:25:02 +0800 Subject: [PATCH 11/19] fix exception propagation --- lib/controller/controller.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/controller/controller.py b/lib/controller/controller.py index 76531fba9..1d5dd07ee 100755 --- a/lib/controller/controller.py +++ b/lib/controller/controller.py @@ -304,14 +304,14 @@ def start(self): async def _start_coroutines(self): task = self.loop.create_task(self.fuzzer.start()) - max_time = options["max_time"] if options["max_time"] > 0 else None + try: await asyncio.wait_for( asyncio.wait( [self.done_future, task], return_when=asyncio.FIRST_COMPLETED, ), - timeout=max_time, + timeout=options["max_time"] if options["max_time"] > 0 else None, ) except asyncio.TimeoutError: raise SkipTargetInterrupt("Runtime exceeded the maximum set by the user") @@ -320,6 +320,8 @@ async def _start_coroutines(self): task.cancel() await self.done_future # propagate the exception, if raised + await task # propagate the exception, if raised + def set_target(self, url): # If no scheme specified, unset it first if "://" not in url: From 0f91c8e331f785ea21635ce3e1c96cffa596a5fe Mon Sep 17 00:00:00 2001 From: 4shen0ne <4shen.01@gmail.com> Date: Thu, 12 Sep 2024 18:13:53 +0800 Subject: [PATCH 12/19] clean and style --- lib/connection/requester.py | 136 ++++++++++++++++------------------- lib/controller/controller.py | 23 +++--- lib/core/fuzzer.py | 30 ++++---- 3 files changed, 85 insertions(+), 104 deletions(-) diff --git a/lib/connection/requester.py b/lib/connection/requester.py index 4aff0ed78..81dc2adde 100755 --- a/lib/connection/requester.py +++ b/lib/connection/requester.py @@ -23,6 +23,7 @@ import socket import threading import time +from typing import Generator from urllib.parse import urlparse import httpx @@ -69,7 +70,7 @@ class HTTPXBearerAuth(httpx.Auth): def __init__(self, token: str) -> None: self.token = token - def auth_flow(self, request: httpx.Request) -> any: + def auth_flow(self, request: httpx.Request) -> Generator: request.headers["Authorization"] = f"Bearer {self.token}" yield request @@ -287,17 +288,19 @@ def __init__(self): verify=False, cert=self._cert, limits=httpx.Limits(max_connections=options["thread_count"]), - # FIXME: proxy will not change when retry request + # httpx doesn't let you choose different proxy for each request + # https://github.com/encode/httpx/discussions/3183 proxy=proxy, + retries=options["max_retries"], socket_options=self._socket_options, ) self.session = httpx.AsyncClient( - mounts={"http://": transport, "https://": transport}, + mounts={"all://": transport}, timeout=httpx.Timeout(options["timeout"]), ) - def parse_proxy(self, proxy: str): + def parse_proxy(self, proxy: str) -> str: if not proxy: return None @@ -328,7 +331,7 @@ def set_auth(self, type: str, credential: str) -> None: self.session.auth = HttpxNtlmAuth(user, password) # :path: is expected not to start with "/" - async def request(self, path: str, proxy: str = None) -> AsyncResponse: + async def request(self, path: str) -> AsyncResponse: while self.is_rate_exceeded(): await asyncio.sleep(0.1) @@ -340,75 +343,62 @@ async def request(self, path: str, proxy: str = None) -> AsyncResponse: url = safequote(self._url + path if self._url else path) parsed_url = urlparse(url) - # Why using a loop instead of max_retries argument? Check issue #1009 - for _ in range(options["max_retries"] + 1): - try: - # FIXME: set proxy here is not work - # https://github.com/encode/httpx/discussions/3183 - - if self.agents: - self.set_header("user-agent", random.choice(self.agents)) - - # Use "target" extension to avoid the URL path from being normalized - request = self.session.build_request( - options["http_method"], - url, - headers=self.headers, - data=options["data"], - ) - if p := parsed_url.path: - request.extensions = {"target": p.encode()} - - xresponse = await self.session.send( - request, - stream=True, - follow_redirects=options["follow_redirects"], - ) - response = await AsyncResponse.create(xresponse) - await xresponse.aclose() - - log_msg = f'"{options["http_method"]} {response.url}" {response.status} - {response.length}B' - - if response.redirect: - log_msg += f" - LOCATION: {response.redirect}" - - logger.info(log_msg) - - return response - - except Exception as e: - logger.exception(e) + try: + if self.agents: + self.set_header("user-agent", random.choice(self.agents)) + + # Use "target" extension to avoid the URL path from being normalized + request = self.session.build_request( + options["http_method"], + url, + headers=self.headers, + data=options["data"], + ) + if p := parsed_url.path: + request.extensions = {"target": p.encode()} - if e == socket.gaierror: - err_msg = "Couldn't resolve DNS" - elif "SSLError" in str(e): - err_msg = "Unexpected SSL error" - elif "TooManyRedirects" in str(e): - err_msg = f"Too many redirects: {url}" - elif "ProxyError" in str(e): - if proxy: - err_msg = f"Error with the proxy: {proxy}" - else: - err_msg = "Error with the system proxy" - # Prevent from re-using it in the future - if proxy in options["proxies"] and len(options["proxies"]) > 1: - options["proxies"].remove(proxy) - elif "InvalidURL" in str(e): - err_msg = f"Invalid URL: {url}" - elif "InvalidProxyURL" in str(e): - err_msg = f"Invalid proxy URL: {proxy}" - elif "ConnectionError" in str(e): - err_msg = f"Cannot connect to: {urlparse(url).netloc}" - elif re.search(READ_RESPONSE_ERROR_REGEX, str(e)): - err_msg = f"Failed to read response body: {url}" - elif "Timeout" in str(e) or e in ( - httpx.ConnectTimeout, - httpx.ReadTimeout, - socket.timeout, - ): - err_msg = f"Request timeout: {url}" - else: - err_msg = f"There was a problem in the request to: {url}" + xresponse = await self.session.send( + request, + stream=True, + follow_redirects=options["follow_redirects"], + ) + response = await AsyncResponse.create(xresponse) + await xresponse.aclose() + + log_msg = f'"{options["http_method"]} {response.url}" {response.status} - {response.length}B' + + if response.redirect: + log_msg += f" - LOCATION: {response.redirect}" + + logger.info(log_msg) + + return response + + except Exception as e: + logger.exception(e) + + if e == socket.gaierror: + err_msg = "Couldn't resolve DNS" + elif "SSLError" in str(e): + err_msg = "Unexpected SSL error" + elif "TooManyRedirects" in str(e): + err_msg = f"Too many redirects: {url}" + elif "ProxyError" in str(e): + err_msg = "Error with the system proxy" + elif "InvalidURL" in str(e): + err_msg = f"Invalid URL: {url}" + elif "ConnectionError" in str(e): + err_msg = f"Cannot connect to: {urlparse(url).netloc}" + elif re.search(READ_RESPONSE_ERROR_REGEX, str(e)): + err_msg = f"Failed to read response body: {url}" + elif "Timeout" in str(e) or e in ( + httpx.ConnectTimeout, + httpx.ReadTimeout, + socket.timeout, + ): + err_msg = f"Request timeout: {url}" + else: + err_msg = f"There was a problem in the request to: {url}" raise RequestException(err_msg) diff --git a/lib/controller/controller.py b/lib/controller/controller.py index 1d5dd07ee..fd98ffda2 100755 --- a/lib/controller/controller.py +++ b/lib/controller/controller.py @@ -286,7 +286,7 @@ def start(self): if options["async_mode"]: # use a future to get exceptions from handle_pause # https://stackoverflow.com/a/64230941 - self.done_future = self.loop.create_future() + self.pause_future = self.loop.create_future() self.loop.run_until_complete(self._start_coroutines()) else: self.fuzzer.start() @@ -308,7 +308,7 @@ async def _start_coroutines(self): try: await asyncio.wait_for( asyncio.wait( - [self.done_future, task], + [self.pause_future, task], return_when=asyncio.FIRST_COMPLETED, ), timeout=options["max_time"] if options["max_time"] > 0 else None, @@ -316,9 +316,9 @@ async def _start_coroutines(self): except asyncio.TimeoutError: raise SkipTargetInterrupt("Runtime exceeded the maximum set by the user") - if self.done_future.done(): + if self.pause_future.done(): task.cancel() - await self.done_future # propagate the exception, if raised + await self.pause_future # propagate the exception, if raised await task # propagate the exception, if raised @@ -498,14 +498,9 @@ def match_callback(self, response): if added_to_queue: interface.new_directories(added_to_queue) - if options["replay_proxy"]: + if options["replay_proxy"] and not options["async_mode"]: # Replay the request with new proxy - if options["async_mode"]: - # FIXME: httpx does not currently allow setting proxies per-request - # self.loop.create_task(self.requester.request(response.full_path, proxy=options["replay_proxy"])) - pass - else: - self.requester.request(response.full_path, proxy=options["replay_proxy"]) + self.requester.request(response.full_path, proxy=options["replay_proxy"]) if self.report: self.results.append(response) @@ -579,14 +574,14 @@ def handle_pause(self): self._export(session_file) quitexc = QuitInterrupt(f"Session saved to: {session_file}") if options["async_mode"]: - self.done_future.set_exception(quitexc) + self.pause_future.set_exception(quitexc) break else: raise quitexc elif option.lower() == "q": quitexc = QuitInterrupt("Canceled by the user") if options["async_mode"]: - self.done_future.set_exception(quitexc) + self.pause_future.set_exception(quitexc) break else: raise quitexc @@ -602,7 +597,7 @@ def handle_pause(self): elif option.lower() == "s" and len(options["urls"]) > 1: skipexc = SkipTargetInterrupt("Target skipped by the user") if options["async_mode"]: - self.done_future.set_exception(skipexc) + self.pause_future.set_exception(skipexc) break else: raise skipexc diff --git a/lib/core/fuzzer.py b/lib/core/fuzzer.py index f692726f2..29a1519fe 100755 --- a/lib/core/fuzzer.py +++ b/lib/core/fuzzer.py @@ -28,7 +28,7 @@ from lib.core.dictionary import Dictionary from lib.core.exceptions import RequestException from lib.core.logger import logger -from lib.core.scanner import AsyncScanner, Scanner +from lib.core.scanner import AsyncScanner, BaseScanner, Scanner from lib.core.settings import ( DEFAULT_TEST_PREFIXES, DEFAULT_TEST_SUFFIXES, @@ -58,10 +58,16 @@ def __init__( self.not_found_callbacks = not_found_callbacks self.error_callbacks = error_callbacks + self.scanners = { + "default": {}, + "prefixes": {}, + "suffixes": {}, + } + def set_base_path(self, path: str) -> None: self._base_path = path - def get_scanners_for(self, path: str) -> Generator: + def get_scanners_for(self, path: str) -> Generator[BaseScanner, None, None]: # Clean the path, so can check for extensions/suffixes path = clean_path(path) @@ -76,7 +82,8 @@ def get_scanners_for(self, path: str) -> Generator: for scanner in self.scanners["default"].values(): yield scanner - def is_excluded(self, resp: BaseResponse) -> bool: + @staticmethod + def is_excluded(resp: BaseResponse) -> bool: """Validate the response by different filters""" if resp.status in options["exclude_status_codes"]: @@ -143,12 +150,6 @@ def __init__( self._pause_semaphore = threading.Semaphore(0) def setup_scanners(self): - self.scanners = { - "default": {}, - "prefixes": {}, - "suffixes": {}, - } - # Default scanners (wildcard testers) self.scanners["default"].update( { @@ -318,12 +319,6 @@ def __init__( self._background_tasks = set() async def setup_scanners(self) -> None: - self.scanners = { - "default": {}, - "prefixes": {}, - "suffixes": {}, - } - # Default scanners (wildcard testers) self.scanners["default"].update( { @@ -424,11 +419,12 @@ async def scan(self, path: str, scanners: Generator) -> None: self.exc = e if options["crawl"]: - logger.info(f'THREAD-{threading.get_ident()}: crawling "/{path}"') + task = asyncio.current_task() + logger.info(f'{task.get_name()}: crawling "/{path}"') for path_ in Crawler.crawl(response): if self._dictionary.is_valid(path_): logger.info( - f'THREAD-{threading.get_ident()}: found new path "/{path_}" in /{path}' + f'{task.get_name()}: found new path "/{path_}" in /{path}' ) await self.scan(path_, self.get_scanners_for(path_)) From 50a25ad6fa15c19286f390f7abdded8db77868ef Mon Sep 17 00:00:00 2001 From: 4shen0ne <4shen.01@gmail.com> Date: Fri, 13 Sep 2024 10:05:37 +0800 Subject: [PATCH 13/19] handle exceptions --- lib/connection/requester.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/lib/connection/requester.py b/lib/connection/requester.py index 81dc2adde..260cd2201 100755 --- a/lib/connection/requester.py +++ b/lib/connection/requester.py @@ -21,6 +21,7 @@ import random import re import socket +from ssl import SSLError import threading import time from typing import Generator @@ -377,26 +378,23 @@ async def request(self, path: str) -> AsyncResponse: except Exception as e: logger.exception(e) - if e == socket.gaierror: - err_msg = "Couldn't resolve DNS" - elif "SSLError" in str(e): + if isinstance(e, httpx.ConnectError): + if str(e).startswith("[Errno -2]"): + err_msg = "Couldn't resolve DNS" + else: + err_msg = f"Cannot connect to: {urlparse(url).netloc}" + elif isinstance(e, SSLError): err_msg = "Unexpected SSL error" - elif "TooManyRedirects" in str(e): + elif isinstance(e, httpx.TooManyRedirects): err_msg = f"Too many redirects: {url}" - elif "ProxyError" in str(e): - err_msg = "Error with the system proxy" - elif "InvalidURL" in str(e): + elif isinstance(e, httpx.ProxyError): + err_msg = "Cannot establish the proxy connection" + elif isinstance(e, httpx.InvalidURL): err_msg = f"Invalid URL: {url}" - elif "ConnectionError" in str(e): - err_msg = f"Cannot connect to: {urlparse(url).netloc}" - elif re.search(READ_RESPONSE_ERROR_REGEX, str(e)): - err_msg = f"Failed to read response body: {url}" - elif "Timeout" in str(e) or e in ( - httpx.ConnectTimeout, - httpx.ReadTimeout, - socket.timeout, - ): + elif isinstance(e, httpx.TimeoutException): err_msg = f"Request timeout: {url}" + elif isinstance(e, httpx.ReadError) or isinstance(e, httpx.DecodingError): # not sure + err_msg = f"Failed to read response body: {url}" else: err_msg = f"There was a problem in the request to: {url}" From 115de35325ae684b1f628d2ee4512ae9d1f3ec86 Mon Sep 17 00:00:00 2001 From: 4shen0ne <4shen.01@gmail.com> Date: Fri, 13 Sep 2024 10:25:20 +0800 Subject: [PATCH 14/19] requires Python 3.8+ --- .github/workflows/ci.yml | 2 +- README.md | 2 +- dirsearch.py | 4 ++-- setup.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 39cc9a1ae..8551da363 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,7 +9,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.7, 3.9] + python-version: [3.8, 3.9] os: ['ubuntu-latest', 'windows-latest'] steps: diff --git a/README.md b/README.md index c8d3b4a74..a21a3879e 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ Table of Contents Installation & Usage ------------ -**Requirement: python 3.7 or higher** +**Requirement: python 3.8 or higher** Choose one of these installation options: diff --git a/dirsearch.py b/dirsearch.py index 60cef5161..1b0a6f4b7 100755 --- a/dirsearch.py +++ b/dirsearch.py @@ -28,8 +28,8 @@ from lib.core.settings import OPTIONS_FILE from lib.parse.config import ConfigParser -if sys.version_info < (3, 7): - sys.stderr.write("Sorry, dirsearch requires Python 3.7 or higher\n") +if sys.version_info < (3, 8): + sys.stderr.write("Sorry, dirsearch requires Python 3.8 or higher\n") sys.exit(1) diff --git a/setup.py b/setup.py index b0941d64e..bc7cfc127 100755 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ entry_points={"console_scripts": ["dirsearch=dirsearch.dirsearch:main"]}, package_data={"dirsearch": ["*", "db/*"]}, include_package_data=True, - python_requires=">=3.7", + python_requires=">=3.8", install_requires=get_dependencies(), classifiers=[ "Programming Language :: Python", @@ -39,7 +39,7 @@ "License :: OSI Approved :: GNU General Public License v2 (GPLv2)", "Operating System :: OS Independent", "Topic :: Security", - "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", ], keywords=["infosec", "bug bounty", "pentesting", "security"], ) From 6888a358c7aea0fe44474e7ea253ba20e97ab53b Mon Sep 17 00:00:00 2001 From: 4shen0ne <4shen.01@gmail.com> Date: Fri, 13 Sep 2024 14:37:48 +0800 Subject: [PATCH 15/19] silence pkg_resources deprecation warnings --- dirsearch.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dirsearch.py b/dirsearch.py index 1b0a6f4b7..05af81c1f 100755 --- a/dirsearch.py +++ b/dirsearch.py @@ -19,8 +19,7 @@ # Author: Mauro Soria import sys - -from pkg_resources import DistributionNotFound, VersionConflict +import warnings from lib.core.data import options from lib.core.exceptions import FailedDependenciesInstallation @@ -32,6 +31,10 @@ sys.stderr.write("Sorry, dirsearch requires Python 3.8 or higher\n") sys.exit(1) +# silence pkg_resources deprecation warnings +warnings.simplefilter("ignore", DeprecationWarning) +from pkg_resources import DistributionNotFound, VersionConflict # noqa: E402 + def main(): config = ConfigParser() From 1b0df56db065bec6f17bf1cf05f276b4c05925ad Mon Sep 17 00:00:00 2001 From: 4shen0ne <4shen.01@gmail.com> Date: Fri, 13 Sep 2024 15:16:11 +0800 Subject: [PATCH 16/19] use uvloop if possible --- lib/controller/controller.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/controller/controller.py b/lib/controller/controller.py index fd98ffda2..f7b446c75 100755 --- a/lib/controller/controller.py +++ b/lib/controller/controller.py @@ -72,6 +72,12 @@ if options["async_mode"]: from lib.connection.requester import AsyncRequester as Requester from lib.core.fuzzer import AsyncFuzzer as Fuzzer + + try: + import uvloop + asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) + except ImportError: + pass else: from lib.connection.requester import Requester from lib.core.fuzzer import Fuzzer From 7076d5d80457171ffea89045a98f0a57b09c3187 Mon Sep 17 00:00:00 2001 From: 4shen0ne <4shen.01@gmail.com> Date: Fri, 13 Sep 2024 16:27:21 +0800 Subject: [PATCH 17/19] ignore some features in async mode --- lib/controller/controller.py | 8 ++++++++ lib/core/options.py | 6 ++++++ 2 files changed, 14 insertions(+) diff --git a/lib/controller/controller.py b/lib/controller/controller.py index f7b446c75..4fe4759e9 100755 --- a/lib/controller/controller.py +++ b/lib/controller/controller.py @@ -564,6 +564,14 @@ def handle_pause(self): option = input() if option.lower() == "q": + if options["async_mode"]: + quitexc = QuitInterrupt("Canceled by the user") + if options["async_mode"]: + self.pause_future.set_exception(quitexc) + break + else: + raise quitexc + interface.in_line("[s]ave / [q]uit without saving: ") option = input() diff --git a/lib/core/options.py b/lib/core/options.py index 0ebde0d2d..c541d3f0c 100755 --- a/lib/core/options.py +++ b/lib/core/options.py @@ -35,6 +35,9 @@ def parse_options(): opt = parse_config(parse_arguments()) if opt.session_file: + if opt.async_mode: + print("Cannot resume a session in asynchronous mode") + exit(1) return vars(opt) opt.http_method = opt.http_method.upper() @@ -200,6 +203,9 @@ def parse_options(): ) exit(1) + if opt.async_mode and opt.replay_proxy: + print("WARNING: --replay-proxy doesn't work in asynchronous mode") + return vars(opt) From bbdb60d9310956689d717e76aa3764e2e6fb96aa Mon Sep 17 00:00:00 2001 From: 4shen0ne <4shen.01@gmail.com> Date: Thu, 19 Sep 2024 21:35:03 +0800 Subject: [PATCH 18/19] proxy roating between different requests --- lib/connection/requester.py | 163 +++++++++++++++++++----------------- 1 file changed, 86 insertions(+), 77 deletions(-) diff --git a/lib/connection/requester.py b/lib/connection/requester.py index 260cd2201..df3ac7605 100755 --- a/lib/connection/requester.py +++ b/lib/connection/requester.py @@ -58,24 +58,6 @@ socket.getaddrinfo = cached_getaddrinfo -class HTTPBearerAuth(AuthBase): - def __init__(self, token): - self.token = token - - def __call__(self, request): - request.headers["Authorization"] = f"Bearer {self.token}" - return request - - -class HTTPXBearerAuth(httpx.Auth): - def __init__(self, token: str) -> None: - self.token = token - - def auth_flow(self, request: httpx.Request) -> Generator: - request.headers["Authorization"] = f"Bearer {self.token}" - yield request - - class BaseRequester: def __init__(self): self._url = None @@ -151,6 +133,15 @@ def rate(self): return self._rate +class HTTPBearerAuth(AuthBase): + def __init__(self, token): + self.token = token + + def __call__(self, request): + request.headers["Authorization"] = f"Bearer {self.token}" + return request + + class Requester(BaseRequester): def __init__(self): super().__init__() @@ -275,25 +266,42 @@ def request(self, path, proxy=None): raise RequestException(err_msg) +class HTTPXBearerAuth(httpx.Auth): + def __init__(self, token: str) -> None: + self.token = token + + def auth_flow(self, request: httpx.Request) -> Generator: + request.headers["Authorization"] = f"Bearer {self.token}" + yield request + + +class ProxyRoatingTransport(httpx.AsyncBaseTransport): + def __init__(self, proxies, **kwargs) -> None: + self._transports = [ + httpx.AsyncHTTPTransport(proxy=proxy, **kwargs) for proxy in proxies + ] + + async def handle_async_request(self, request: httpx.Request) -> httpx.Response: + transport = random.choice(self._transports) + return await transport.handle_async_request(request) + + class AsyncRequester(BaseRequester): def __init__(self): super().__init__() - proxy = None - try: - proxy = self.parse_proxy(random.choice(options["proxies"])) - except IndexError: - pass - - transport = httpx.AsyncHTTPTransport( - verify=False, - cert=self._cert, - limits=httpx.Limits(max_connections=options["thread_count"]), - # httpx doesn't let you choose different proxy for each request - # https://github.com/encode/httpx/discussions/3183 - proxy=proxy, - retries=options["max_retries"], - socket_options=self._socket_options, + tpargs = { + "verify": False, + "cert": self._cert, + "limits": httpx.Limits(max_connections=options["thread_count"]), + "socket_options": self._socket_options, + } + transport = ( + ProxyRoatingTransport( + list(map(self.parse_proxy, options["proxies"])), **tpargs + ) + if options["proxies"] + else httpx.AsyncHTTPTransport(**tpargs) ) self.session = httpx.AsyncClient( @@ -344,59 +352,60 @@ async def request(self, path: str) -> AsyncResponse: url = safequote(self._url + path if self._url else path) parsed_url = urlparse(url) - try: - if self.agents: - self.set_header("user-agent", random.choice(self.agents)) + for _ in range(options["max_retries"] + 1): + try: + if self.agents: + self.set_header("user-agent", random.choice(self.agents)) - # Use "target" extension to avoid the URL path from being normalized - request = self.session.build_request( - options["http_method"], - url, - headers=self.headers, - data=options["data"], - ) - if p := parsed_url.path: - request.extensions = {"target": p.encode()} + # Use "target" extension to avoid the URL path from being normalized + request = self.session.build_request( + options["http_method"], + url, + headers=self.headers, + data=options["data"], + ) + if p := parsed_url.path: + request.extensions = {"target": p.encode()} - xresponse = await self.session.send( - request, - stream=True, - follow_redirects=options["follow_redirects"], - ) - response = await AsyncResponse.create(xresponse) - await xresponse.aclose() + xresponse = await self.session.send( + request, + stream=True, + follow_redirects=options["follow_redirects"], + ) + response = await AsyncResponse.create(xresponse) + await xresponse.aclose() - log_msg = f'"{options["http_method"]} {response.url}" {response.status} - {response.length}B' + log_msg = f'"{options["http_method"]} {response.url}" {response.status} - {response.length}B' - if response.redirect: - log_msg += f" - LOCATION: {response.redirect}" + if response.redirect: + log_msg += f" - LOCATION: {response.redirect}" - logger.info(log_msg) + logger.info(log_msg) - return response + return response - except Exception as e: - logger.exception(e) + except Exception as e: + logger.exception(e) - if isinstance(e, httpx.ConnectError): - if str(e).startswith("[Errno -2]"): - err_msg = "Couldn't resolve DNS" + if isinstance(e, httpx.ConnectError): + if str(e).startswith("[Errno -2]"): + err_msg = "Couldn't resolve DNS" + else: + err_msg = f"Cannot connect to: {urlparse(url).netloc}" + elif isinstance(e, SSLError): + err_msg = "Unexpected SSL error" + elif isinstance(e, httpx.TooManyRedirects): + err_msg = f"Too many redirects: {url}" + elif isinstance(e, httpx.ProxyError): + err_msg = "Cannot establish the proxy connection" + elif isinstance(e, httpx.InvalidURL): + err_msg = f"Invalid URL: {url}" + elif isinstance(e, httpx.TimeoutException): + err_msg = f"Request timeout: {url}" + elif isinstance(e, httpx.ReadError) or isinstance(e, httpx.DecodingError): # not sure + err_msg = f"Failed to read response body: {url}" else: - err_msg = f"Cannot connect to: {urlparse(url).netloc}" - elif isinstance(e, SSLError): - err_msg = "Unexpected SSL error" - elif isinstance(e, httpx.TooManyRedirects): - err_msg = f"Too many redirects: {url}" - elif isinstance(e, httpx.ProxyError): - err_msg = "Cannot establish the proxy connection" - elif isinstance(e, httpx.InvalidURL): - err_msg = f"Invalid URL: {url}" - elif isinstance(e, httpx.TimeoutException): - err_msg = f"Request timeout: {url}" - elif isinstance(e, httpx.ReadError) or isinstance(e, httpx.DecodingError): # not sure - err_msg = f"Failed to read response body: {url}" - else: - err_msg = f"There was a problem in the request to: {url}" + err_msg = f"There was a problem in the request to: {url}" raise RequestException(err_msg) From e1793329b0f276028bda83fd95143187d88a7998 Mon Sep 17 00:00:00 2001 From: 4shen0ne <4shen.01@gmail.com> Date: Sun, 22 Sep 2024 20:38:52 +0800 Subject: [PATCH 19/19] support replay_proxy in async mode --- lib/connection/requester.py | 27 +++++++++++++++++++++++---- lib/controller/controller.py | 7 +++++-- lib/core/options.py | 3 --- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/lib/connection/requester.py b/lib/connection/requester.py index df3ac7605..c1fb386ce 100755 --- a/lib/connection/requester.py +++ b/lib/connection/requester.py @@ -24,7 +24,7 @@ from ssl import SSLError import threading import time -from typing import Generator +from typing import Generator, Optional from urllib.parse import urlparse import httpx @@ -308,6 +308,7 @@ def __init__(self): mounts={"all://": transport}, timeout=httpx.Timeout(options["timeout"]), ) + self.replay_session = None def parse_proxy(self, proxy: str) -> str: if not proxy: @@ -339,8 +340,25 @@ def set_auth(self, type: str, credential: str) -> None: else: self.session.auth = HttpxNtlmAuth(user, password) + async def replay_request(self, path: str, proxy: str): + if self.replay_session is None: + transport = httpx.AsyncHTTPTransport( + verify=False, + cert=self._cert, + limits=httpx.Limits(max_connections=options["thread_count"]), + proxy=self.parse_proxy(proxy), + socket_options=self._socket_options, + ) + self.replay_session = httpx.AsyncClient( + mounts={"all://": transport}, + timeout=httpx.Timeout(options["timeout"]), + ) + return await self.request(path, self.replay_session) + # :path: is expected not to start with "/" - async def request(self, path: str) -> AsyncResponse: + async def request( + self, path: str, session: Optional[httpx.AsyncClient] = None + ) -> AsyncResponse: while self.is_rate_exceeded(): await asyncio.sleep(0.1) @@ -352,13 +370,14 @@ async def request(self, path: str) -> AsyncResponse: url = safequote(self._url + path if self._url else path) parsed_url = urlparse(url) + session = session or self.session for _ in range(options["max_retries"] + 1): try: if self.agents: self.set_header("user-agent", random.choice(self.agents)) # Use "target" extension to avoid the URL path from being normalized - request = self.session.build_request( + request = session.build_request( options["http_method"], url, headers=self.headers, @@ -367,7 +386,7 @@ async def request(self, path: str) -> AsyncResponse: if p := parsed_url.path: request.extensions = {"target": p.encode()} - xresponse = await self.session.send( + xresponse = await session.send( request, stream=True, follow_redirects=options["follow_redirects"], diff --git a/lib/controller/controller.py b/lib/controller/controller.py index 4fe4759e9..8abae1871 100755 --- a/lib/controller/controller.py +++ b/lib/controller/controller.py @@ -504,9 +504,12 @@ def match_callback(self, response): if added_to_queue: interface.new_directories(added_to_queue) - if options["replay_proxy"] and not options["async_mode"]: + if options["replay_proxy"]: # Replay the request with new proxy - self.requester.request(response.full_path, proxy=options["replay_proxy"]) + if options["async_mode"]: + self.loop.create_task(self.requester.replay_request(response.full_path, proxy=options["replay_proxy"])) + else: + self.requester.request(response.full_path, proxy=options["replay_proxy"]) if self.report: self.results.append(response) diff --git a/lib/core/options.py b/lib/core/options.py index c541d3f0c..88dbbb306 100755 --- a/lib/core/options.py +++ b/lib/core/options.py @@ -203,9 +203,6 @@ def parse_options(): ) exit(1) - if opt.async_mode and opt.replay_proxy: - print("WARNING: --replay-proxy doesn't work in asynchronous mode") - return vars(opt)