Skip to content

Commit

Permalink
Merge pull request #1840 from blacklanternsecurity/shodan-pagination
Browse files Browse the repository at this point in the history
Misc improvements
  • Loading branch information
TheTechromancer authored Oct 18, 2024
2 parents e7e5c6a + 0f7c266 commit 37ae382
Show file tree
Hide file tree
Showing 19 changed files with 455 additions and 367 deletions.
11 changes: 10 additions & 1 deletion bbot/core/event/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
is_domain,
is_subdomain,
is_ip,
is_ip_type,
is_ptr,
is_uri,
url_depth,
Expand Down Expand Up @@ -352,6 +353,12 @@ def port(self):
return 80
return self._port

@property
def netloc(self):
if self.host and is_ip_type(self.host, network=False):
return make_netloc(self.host, self.port)
return None

@property
def host_stem(self):
"""
Expand Down Expand Up @@ -741,7 +748,7 @@ def json(self, mode="json", siem_friendly=False):
"""
j = dict()
# type, ID, scope description
for i in ("type", "id", "uuid", "scope_description"):
for i in ("type", "id", "uuid", "scope_description", "netloc"):
v = getattr(self, i, "")
if v:
j.update({i: str(v)})
Expand All @@ -760,6 +767,8 @@ def json(self, mode="json", siem_friendly=False):
j["host"] = str(self.host)
j["resolved_hosts"] = sorted(str(h) for h in self.resolved_hosts)
j["dns_children"] = {k: list(v) for k, v in self.dns_children.items()}
if isinstance(self.port, int):
j["port"] = self.port
# web spider distance
web_spider_distance = getattr(self, "web_spider_distance", None)
if web_spider_distance is not None:
Expand Down
13 changes: 10 additions & 3 deletions bbot/core/helpers/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,12 +621,13 @@ def is_ip(d, version=None):
return False


def is_ip_type(i):
def is_ip_type(i, network=None):
"""
Checks if the given object is an instance of an IPv4 or IPv6 type from the ipaddress module.
Args:
i (ipaddress._BaseV4 or ipaddress._BaseV6): The IP object to check.
network (bool, optional): Whether to restrict the check to network types (IPv4Network or IPv6Network). Defaults to False.
Returns:
bool: True if the object is an instance of ipaddress._BaseV4 or ipaddress._BaseV6, False otherwise.
Expand All @@ -639,6 +640,12 @@ def is_ip_type(i):
>>> is_ip_type("192.168.1.0/24")
False
"""
if network is not None:
is_network = ipaddress._BaseNetwork in i.__class__.__mro__
if network:
return is_network
else:
return not is_network
return ipaddress._IPAddressBase in i.__class__.__mro__


Expand Down Expand Up @@ -1260,7 +1267,7 @@ def gen_numbers(n, padding=2):
return results


def make_netloc(host, port):
def make_netloc(host, port=None):
"""Constructs a network location string from a given host and port.
Args:
Expand Down Expand Up @@ -1289,7 +1296,7 @@ def make_netloc(host, port):
if is_ip(host, version=6):
host = f"[{host}]"
if port is None:
return host
return str(host)
return f"{host}:{port}"


Expand Down
5 changes: 4 additions & 1 deletion bbot/modules/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -984,8 +984,11 @@ def _incoming_dedup_hash(self, event):
def _outgoing_dedup_hash(self, event):
"""
Determines the criteria for what is considered to be a duplicate event if `suppress_dupes` is True.
We take into account the `internal` attribute we don't want an internal event (which isn't distributed to output modules)
to inadvertently suppress a non-internal event.
"""
return hash((event, self.name))
return hash((event, self.name, event.internal, event.always_emit))

def get_per_host_hash(self, event):
"""
Expand Down
6 changes: 0 additions & 6 deletions bbot/modules/internal/dnsresolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,6 @@ class HostModule(BaseModule):
_name = "host"
_type = "internal"

def _outgoing_dedup_hash(self, event):
# this exists to ensure a second, more interesting host isn't passed up
# because its ugly cousin spent its one dedup token before it arrived
# by removing those race conditions, this makes for more consistent results
return hash((event, self.name, event.always_emit))

@property
def module_threads(self):
return self.dns_config.get("threads", 25)
Expand Down
2 changes: 2 additions & 0 deletions bbot/modules/internal/excavate.py
Original file line number Diff line number Diff line change
Expand Up @@ -876,6 +876,8 @@ async def setup(self):
yara_rules_combined = "\n".join(self.yara_rules_dict.values())
try:
self.info(f"Compiling {len(self.yara_rules_dict):,} YARA rules")
for rule_name, rule_content in self.yara_rules_dict.items():
self.debug(f" - {rule_name}")
self.yara_rules = yara.compile(source=yara_rules_combined)
except yara.SyntaxError as e:
self.debug(yara_rules_combined)
Expand Down
7 changes: 3 additions & 4 deletions bbot/modules/internal/speculate.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,15 @@ async def handle_event(self, event):
speculate_open_ports = self.emit_open_ports and event_in_scope_distance

# URL --> OPEN_TCP_PORT
if event.type == "URL" or (event.type == "URL_UNVERIFIED" and self.open_port_consumers):
event_is_url = event.type == "URL"
if event_is_url or (event.type == "URL_UNVERIFIED" and self.open_port_consumers):
# only speculate port from a URL if it wouldn't be speculated naturally from the host
if event.host and (event.port not in self.ports or not speculate_open_ports):
await self.emit_event(
self.helpers.make_netloc(event.host, event.port),
"OPEN_TCP_PORT",
parent=event,
internal=True,
quick=(event.type == "URL"),
internal=not event_is_url, # if the URL is verified, the port is definitely open
context=f"speculated {{event.type}} from {event.type}: {{event.data}}",
)

Expand Down Expand Up @@ -169,7 +169,6 @@ async def handle_event(self, event):
"OPEN_TCP_PORT",
parent=event,
internal=True,
quick=True,
context="speculated {event.type}: {event.data}",
)

Expand Down
16 changes: 7 additions & 9 deletions bbot/modules/shodan_dns.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,11 @@ class shodan_dns(shodan):

base_url = "https://api.shodan.io"

async def request_url(self, query):
url = f"{self.base_url}/dns/domain/{self.helpers.quote(query)}?key={{api_key}}"
response = await self.api_request(url)
return response
async def handle_event(self, event):
await self.handle_event_paginated(event)

def parse_results(self, r, query):
json = r.json()
if json:
for hostname in json.get("subdomains", []):
yield f"{hostname}.{query}"
def make_url(self, query):
return f"{self.base_url}/dns/domain/{self.helpers.quote(query)}?key={{api_key}}&page={{page}}"

def parse_results(self, json, query):
return [f"{sub}.{query}" for sub in json.get("subdomains", [])]
47 changes: 43 additions & 4 deletions bbot/modules/templates/subdomain_enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ class subdomain_enum(BaseModule):
# "lowest_parent": dedupe by lowest parent (lowest parent of www.api.test.evilcorp.com is api.test.evilcorp.com)
dedup_strategy = "highest_parent"

# how many results to request per API call
page_size = 100
# arguments to pass to api_page_iter
api_page_iter_kwargs = {}

@property
def source_pretty_name(self):
return f"{self.__class__.__name__} API"
Expand Down Expand Up @@ -61,10 +66,31 @@ async def handle_event(self, event):
context=f'{{module}} searched {self.source_pretty_name} for "{query}" and found {{event.type}}: {{event.data}}',
)

async def handle_event_paginated(self, event):
query = self.make_query(event)
async for result_batch in self.query_paginated(query):
for hostname in set(result_batch):
try:
hostname = self.helpers.validators.validate_host(hostname)
except ValueError as e:
self.verbose(e)
continue
if hostname and hostname.endswith(f".{query}") and not hostname == event.data:
await self.emit_event(
hostname,
"DNS_NAME",
event,
abort_if=self.abort_if,
context=f'{{module}} searched {self.source_pretty_name} for "{query}" and found {{event.type}}: {{event.data}}',
)

async def request_url(self, query):
url = f"{self.base_url}/subdomains/{self.helpers.quote(query)}"
url = self.make_url(query)
return await self.api_request(url)

def make_url(self, query):
return f"{self.base_url}/subdomains/{self.helpers.quote(query)}"

def make_query(self, event):
query = event.data
parents = list(self.helpers.domain_parents(event.data))
Expand All @@ -86,11 +112,11 @@ def parse_results(self, r, query=None):
for hostname in json:
yield hostname

async def query(self, query, parse_fn=None, request_fn=None):
if parse_fn is None:
parse_fn = self.parse_results
async def query(self, query, request_fn=None, parse_fn=None):
if request_fn is None:
request_fn = self.request_url
if parse_fn is None:
parse_fn = self.parse_results
try:
response = await request_fn(query)
if response is None:
Expand All @@ -113,6 +139,19 @@ async def query(self, query, parse_fn=None, request_fn=None):
except Exception as e:
self.info(f"Error retrieving results for {query}: {e}", trace=True)

async def query_paginated(self, query):
url = self.make_url(query)
agen = self.api_page_iter(url, page_size=self.page_size, **self.api_page_iter_kwargs)
try:
async for response in agen:
subdomains = self.parse_results(response, query)
self.verbose(f'Got {len(subdomains):,} subdomains for "{query}"')
if not subdomains:
break
yield subdomains
finally:
agen.aclose()

async def _is_wildcard(self, query):
rdtypes = ("A", "AAAA", "CNAME")
if self.helpers.is_dns_name(query):
Expand Down
32 changes: 4 additions & 28 deletions bbot/modules/trickest.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,39 +28,15 @@ def prepare_api_request(self, url, kwargs):
return url, kwargs

async def handle_event(self, event):
query = self.make_query(event)
async for result_batch in self.query(query):
for hostname in set(result_batch):
try:
hostname = self.helpers.validators.validate_host(hostname)
except ValueError as e:
self.verbose(e)
continue
if hostname and hostname.endswith(f".{query}") and not hostname == event.data:
await self.emit_event(
hostname,
"DNS_NAME",
event,
abort_if=self.abort_if,
context=f'{{module}} searched {self.source_pretty_name} for "{query}" and found {{event.type}}: {{event.data}}',
)
await self.handle_event_paginated(event)

async def query(self, query):
def make_url(self, query):
url = f"{self.base_url}/view?q=hostname%20~%20%22.{self.helpers.quote(query)}%22"
url += f"&dataset_id={self.dataset_id}"
url += "&limit={page_size}&offset={offset}&select=hostname&orderby=hostname"
agen = self.api_page_iter(url, page_size=self.page_size)
try:
async for response in agen:
subdomains = self.parse_results(response)
self.verbose(f'Got {len(subdomains):,} subdomains for "{query}"')
if not subdomains:
break
yield subdomains
finally:
agen.aclose()
return url

def parse_results(self, j):
def parse_results(self, j, query):
results = j.get("results", [])
subdomains = set()
for item in results:
Expand Down
18 changes: 4 additions & 14 deletions bbot/modules/virustotal.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ class virustotal(subdomain_enum_apikey):
options_desc = {"api_key": "VirusTotal API Key"}

base_url = "https://www.virustotal.com/api/v3"
api_page_iter_kwargs = {"json": False, "next_key": lambda r: r.json().get("links", {}).get("next", "")}

def make_url(self, query):
return f"{self.base_url}/domains/{self.helpers.quote(query)}/subdomains"

def prepare_api_request(self, url, kwargs):
kwargs["headers"]["x-apikey"] = self.api_key
Expand All @@ -28,17 +32,3 @@ def parse_results(self, r, query):
if match.endswith(query):
results.add(match)
return results

async def query(self, query):
results = set()
url = f"{self.base_url}/domains/{self.helpers.quote(query)}/subdomains"
agen = self.api_page_iter(url, json=False, next_key=lambda r: r.json().get("links", {}).get("next", ""))
try:
async for response in agen:
r = self.parse_results(response, query)
if not r:
break
results.update(r)
finally:
agen.aclose()
return results
24 changes: 13 additions & 11 deletions bbot/scanner/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,20 +130,22 @@ def __init__(
else:
self.id = f"SCAN:{sha1(rand_string(20)).hexdigest()}"

preset = kwargs.pop("preset", None)
custom_preset = kwargs.pop("preset", None)
kwargs["_log"] = True

from .preset import Preset

if preset is None:
preset = Preset(*targets, **kwargs)
else:
if not isinstance(preset, Preset):
raise ValidationError(f'Preset must be of type Preset, not "{type(preset).__name__}"')
self.preset = preset.bake(self)
base_preset = Preset(*targets, **kwargs)

if custom_preset is not None:
if not isinstance(custom_preset, Preset):
raise ValidationError(f'Preset must be of type Preset, not "{type(custom_preset).__name__}"')
base_preset.merge(custom_preset)

self.preset = base_preset.bake(self)

# scan name
if preset.scan_name is None:
if self.preset.scan_name is None:
tries = 0
while 1:
if tries > 5:
Expand All @@ -158,16 +160,16 @@ def __init__(
break
tries += 1
else:
scan_name = str(preset.scan_name)
scan_name = str(self.preset.scan_name)
self.name = scan_name

# make sure the preset has a description
if not self.preset.description:
self.preset.description = self.name

# scan output dir
if preset.output_dir is not None:
self.home = Path(preset.output_dir).resolve() / self.name
if self.preset.output_dir is not None:
self.home = Path(self.preset.output_dir).resolve() / self.name
else:
self.home = self.preset.bbot_home / "scans" / self.name

Expand Down
Loading

0 comments on commit 37ae382

Please sign in to comment.