From b20bef5ebbf7d00f80a8f4d922d269a2c570a4b3 Mon Sep 17 00:00:00 2001 From: shelld3v <59408894+shelld3v@users.noreply.github.com> Date: Wed, 23 Oct 2024 00:43:30 +0700 Subject: [PATCH] Improve performance and close #1209 --- lib/controller/controller.py | 13 ++++++++--- lib/core/dictionary.py | 30 +++++++++++++++++--------- lib/core/fuzzer.py | 42 +++++------------------------------- 3 files changed, 35 insertions(+), 50 deletions(-) diff --git a/lib/controller/controller.py b/lib/controller/controller.py index 4a070429b..56620b4fd 100755 --- a/lib/controller/controller.py +++ b/lib/controller/controller.py @@ -63,6 +63,7 @@ from lib.parse.url import clean_path, parse_path from lib.report.manager import ReportManager from lib.utils.common import lstrip_once +from lib.utils.crawl import Crawler from lib.utils.file import FileUtils from lib.utils.schemedet import detect_scheme from lib.view.terminal import interface @@ -269,7 +270,7 @@ def start(self) -> None: if not self.old_session: current_time = time.strftime("%H:%M:%S") - msg = f"{NEW_LINE}[{current_time}] Starting: {current_directory}" + msg = f"{NEW_LINE}[{current_time}] Scanning: {current_directory}" interface.warning(msg) @@ -400,6 +401,13 @@ def match_callback(self, response: BaseResponse) -> None: else: self.requester.request(response.full_path, proxy=options["replay_proxy"]) + if options["crawl"]: + for path in Crawler.crawl(response): + if not self.dictionary.is_valid(path): + continue + path = lstrip_once(path, self.base_path) + self.dictionary.add_extra(path) + def update_progress_bar(self, response: BaseResponse) -> None: jobs_count = ( # Jobs left for unscanned targets @@ -507,14 +515,13 @@ def process(self) -> None: raise SkipTargetInterrupt( "Runtime exceeded the maximum set by the user" ) + time.sleep(0.5) break except KeyboardInterrupt: self.handle_pause() - time.sleep(0.3) - def add_directory(self, path: str) -> None: """Add directory to the recursion queue""" diff --git a/lib/core/dictionary.py b/lib/core/dictionary.py index 1424de298..d4d7dd8d1 100755 --- a/lib/core/dictionary.py +++ b/lib/core/dictionary.py @@ -62,6 +62,9 @@ class Dictionary: def __init__(self, **kwargs: Any) -> None: self._index = 0 self._items = self.generate(**kwargs) + # Items in self._extra will be cleared when self.reset() is called + self._extra_index = 0 + self._extra = [] @property def index(self) -> int: @@ -69,23 +72,23 @@ def index(self) -> int: @locked def __next__(self) -> str: - try: - path = self._items[self._index] - except IndexError: + if len(self._extra) > self._extra_index: + self._extra_index += 1 + return self._extra[self._extra_index - 1] + elif len(self._items) > self._index: + self._index += 1 + return self._items[self._index - 1] + else: raise StopIteration - self._index += 1 - - return path - def __contains__(self, item: str) -> bool: return item in self._items def __getstate__(self) -> tuple[list[str], int]: - return self._items, self._index + return self._items, self._index, self._extra, self._extra_index def __setstate__(self, state: tuple[list[str], int]) -> None: - self._items, self._index = state + self._items, self._index, self._extra, self._extra_index = state def __iter__(self) -> Iterator[str]: return iter(self._items) @@ -209,5 +212,12 @@ def is_valid(self, path: str) -> bool: return True + def add_extra(self, path) -> None: + if path in self._items or path in self._extra: + return + + self._extra.append(path) + def reset(self) -> None: - self._index = 0 + self._index = self._extra_index = 0 + self._extra.clear() diff --git a/lib/core/fuzzer.py b/lib/core/fuzzer.py index 2ce421747..95e07b048 100755 --- a/lib/core/fuzzer.py +++ b/lib/core/fuzzer.py @@ -38,7 +38,6 @@ ) from lib.parse.url import clean_path from lib.utils.common import get_readable_size, lstrip_once -from lib.utils.crawl import Crawler class BaseFuzzer: @@ -51,7 +50,6 @@ def __init__( not_found_callbacks: tuple[Callable[[BaseResponse], Any], ...], error_callbacks: tuple[Callable[[RequestException], Any], ...], ) -> None: - self._scanned = set() self._requester = requester self._dictionary = dictionary self._base_path: str = "" @@ -237,13 +235,8 @@ def quit(self) -> None: self._quit_event.set() self.play() - def scan(self, path: str, scanners: Generator[Scanner, None, None]) -> None: - # Avoid scanned paths from being re-scanned - if path in self._scanned: - return - else: - self._scanned.add(path) - + def scan(self, path: str) -> None: + scanners = self.get_scanners_for(path) response = self._requester.request(path) if self.is_excluded(response): @@ -264,23 +257,13 @@ def scan(self, path: str, scanners: Generator[Scanner, None, None]) -> None: except Exception as e: self.exc = e - if options["crawl"]: - logger.info(f'THREAD-{threading.get_ident()}: crawling "/{path}"') - for path_ in Crawler.crawl(response): - if self._dictionary.is_valid(path_): - logger.info( - f'THREAD-{threading.get_ident()}: found new path "/{path_}" in /{path}' - ) - self.scan(path_, self.get_scanners_for(path_)) - def thread_proc(self) -> None: logger.info(f'THREAD-{threading.get_ident()} started"') while True: try: path = next(self._dictionary) - scanners = self.get_scanners_for(path) - self.scan(self._base_path + path, scanners) + self.scan(self._base_path + path) except StopIteration: break @@ -397,13 +380,8 @@ def quit(self) -> None: for task in self._background_tasks: task.cancel() - async def scan(self, path: str, scanners: Generator) -> None: - # Avoid scanned paths from being re-scanned - if path in self._scanned: - return - else: - self._scanned.add(path) - + async def scan(self, path: str) -> None: + scanners = self.get_scanners_for(path) response = await self._requester.request(path) if self.is_excluded(response): @@ -424,16 +402,6 @@ async def scan(self, path: str, scanners: Generator) -> None: except Exception as e: self.exc = e - if options["crawl"]: - task = asyncio.current_task() - logger.info(f'{task.get_name()}: crawling "/{path}"') - for path_ in Crawler.crawl(response): - if self._dictionary.is_valid(path_): - logger.info( - f'{task.get_name()}: found new path "/{path_}" in /{path}' - ) - await self.scan(path_, self.get_scanners_for(path_)) - async def task_proc(self) -> None: async with self.sem: await self._play_event.wait()