Skip to content

Commit

Permalink
Merge pull request #1313 from yogeshojha/1287-bug-clocked-scan-and-pe…
Browse files Browse the repository at this point in the history
…riodic-scan-is-not-working-fix

Fixes for Clocked and Periodic Scans Fix #1287 Fixes #1015
  • Loading branch information
yogeshojha authored Jul 20, 2024
2 parents 49bbb75 + 5667893 commit 063c82e
Show file tree
Hide file tree
Showing 4 changed files with 203 additions and 173 deletions.
31 changes: 30 additions & 1 deletion web/reNgine/common_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -1028,4 +1028,33 @@ def parse_llm_vulnerability_report(report):
except Exception as e:
return data

return data
return data


def create_scan_object(host_id, engine_id, initiated_by_id=None):
'''
create task with pending status so that celery task will execute when
threads are free
Args:
host_id: int: id of Domain model
engine_id: int: id of EngineType model
initiated_by_id: int : id of User model (Optional)
'''
# get current time
current_scan_time = timezone.now()
# fetch engine and domain object
engine = EngineType.objects.get(pk=engine_id)
domain = Domain.objects.get(pk=host_id)
scan = ScanHistory()
scan.scan_status = INITIATED_TASK
scan.domain = domain
scan.scan_type = engine
scan.start_scan_date = current_scan_time
if initiated_by_id:
user = User.objects.get(pk=initiated_by_id)
scan.initiated_by = user
scan.save()
# save last scan date for domain model
domain.start_scan_date = current_scan_time
domain.save()
return scan.id
4 changes: 0 additions & 4 deletions web/reNgine/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,6 @@
USE_L10N = True
USE_TZ = True

# Temporary fix for celery beat crash
# See https://github.com/yogeshojha/rengine/issues/971
DJANGO_CELERY_BEAT_TZ_AWARE = False

MEDIA_URL = '/media/'
MEDIA_ROOT = '/usr/src/scan_results/'
FILE_UPLOAD_MAX_MEMORY_SIZE = 100000000
Expand Down
257 changes: 137 additions & 120 deletions web/reNgine/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def initiate_scan(
results_dir=RENGINE_RESULTS,
imported_subdomains=[],
out_of_scope_subdomains=[],
initiated_by_id=None,
url_filter=''):
"""Initiate a new scan.
Expand All @@ -68,137 +69,153 @@ def initiate_scan(
results_dir (str): Results directory.
imported_subdomains (list): Imported subdomains.
out_of_scope_subdomains (list): Out-of-scope subdomains.
url_filter (str): URL path. Default: ''
url_filter (str): URL path. Default: ''.
initiated_by (int): User ID initiating the scan.
"""
logger.info('Initiating scan on celery')
scan = None
try:
# Get scan engine
engine_id = engine_id or scan.scan_type.id # scan history engine_id
engine = EngineType.objects.get(pk=engine_id)

# Get YAML config
config = yaml.safe_load(engine.yaml_configuration)
enable_http_crawl = config.get(ENABLE_HTTP_CRAWL, DEFAULT_ENABLE_HTTP_CRAWL)
gf_patterns = config.get(GF_PATTERNS, [])

# Get domain and set last_scan_date
domain = Domain.objects.get(pk=domain_id)
domain.last_scan_date = timezone.now()
domain.save()

# Get path filter
url_filter = url_filter.rstrip('/')

# for live scan scan history id is passed as scan_history_id
# and no need to create scan_history object

if scan_type == SCHEDULED_SCAN: # scheduled
# we need to create scan_history object for each scheduled scan
scan_history_id = create_scan_object(
host_id=domain_id,
engine_id=engine_id,
initiated_by_id=initiated_by_id,
)

# Get scan history
scan = ScanHistory.objects.get(pk=scan_history_id)

# Get scan engine
engine_id = engine_id or scan.scan_type.id # scan history engine_id
engine = EngineType.objects.get(pk=engine_id)

# Get YAML config
config = yaml.safe_load(engine.yaml_configuration)
enable_http_crawl = config.get(ENABLE_HTTP_CRAWL, DEFAULT_ENABLE_HTTP_CRAWL)
gf_patterns = config.get(GF_PATTERNS, [])

# Get domain and set last_scan_date
domain = Domain.objects.get(pk=domain_id)
domain.last_scan_date = timezone.now()
domain.save()

# Get path filter
url_filter = url_filter.rstrip('/')

# Get or create ScanHistory() object
if scan_type == LIVE_SCAN: # immediate
scan = ScanHistory.objects.get(pk=scan_history_id)
scan.scan_status = RUNNING_TASK
elif scan_type == SCHEDULED_SCAN: # scheduled
scan = ScanHistory()
scan.scan_status = INITIATED_TASK
scan.scan_type = engine
scan.celery_ids = [initiate_scan.request.id]
scan.domain = domain
scan.start_scan_date = timezone.now()
scan.tasks = engine.tasks
scan.results_dir = f'{results_dir}/{domain.name}_{scan.id}'
add_gf_patterns = gf_patterns and 'fetch_url' in engine.tasks
if add_gf_patterns:
scan.used_gf_patterns = ','.join(gf_patterns)
scan.save()

# Create scan results dir
os.makedirs(scan.results_dir)

# Build task context
ctx = {
'scan_history_id': scan_history_id,
'engine_id': engine_id,
'domain_id': domain.id,
'results_dir': scan.results_dir,
'url_filter': url_filter,
'yaml_configuration': config,
'out_of_scope_subdomains': out_of_scope_subdomains
}
ctx_str = json.dumps(ctx, indent=2)

# Send start notif
logger.warning(f'Starting scan {scan_history_id} with context:\n{ctx_str}')
send_scan_notif.delay(
scan_history_id,
subscan_id=None,
engine_id=engine_id,
status=CELERY_TASK_STATUS_MAP[scan.scan_status])

# Save imported subdomains in DB
save_imported_subdomains(imported_subdomains, ctx=ctx)

# Create initial subdomain in DB: make a copy of domain as a subdomain so
# that other tasks using subdomains can use it.
subdomain_name = domain.name
subdomain, _ = save_subdomain(subdomain_name, ctx=ctx)
scan.scan_type = engine
scan.celery_ids = [initiate_scan.request.id]
scan.domain = domain
scan.start_scan_date = timezone.now()
scan.tasks = engine.tasks
scan.results_dir = f'{results_dir}/{domain.name}_{scan.id}'
add_gf_patterns = gf_patterns and 'fetch_url' in engine.tasks
if add_gf_patterns:
scan.used_gf_patterns = ','.join(gf_patterns)
scan.save()

# Create scan results dir
os.makedirs(scan.results_dir)

# Build task context
ctx = {
'scan_history_id': scan_history_id,
'engine_id': engine_id,
'domain_id': domain.id,
'results_dir': scan.results_dir,
'url_filter': url_filter,
'yaml_configuration': config,
'out_of_scope_subdomains': out_of_scope_subdomains
}
ctx_str = json.dumps(ctx, indent=2)

# Send start notif
logger.warning(f'Starting scan {scan_history_id} with context:\n{ctx_str}')
send_scan_notif.delay(
scan_history_id,
subscan_id=None,
engine_id=engine_id,
status=CELERY_TASK_STATUS_MAP[scan.scan_status])

# Save imported subdomains in DB
save_imported_subdomains(imported_subdomains, ctx=ctx)

# Create initial subdomain in DB: make a copy of domain as a subdomain so
# that other tasks using subdomains can use it.
subdomain_name = domain.name
subdomain, _ = save_subdomain(subdomain_name, ctx=ctx)

# If enable_http_crawl is set, create an initial root HTTP endpoint so that
# HTTP crawling can start somewhere
http_url = f'{domain.name}{url_filter}' if url_filter else domain.name
endpoint, _ = save_endpoint(
http_url,
ctx=ctx,
crawl=enable_http_crawl,
is_default=True,
subdomain=subdomain
)
if endpoint and endpoint.is_alive:
# TODO: add `root_endpoint` property to subdomain and simply do
# subdomain.root_endpoint = endpoint instead
logger.warning(f'Found subdomain root HTTP URL {endpoint.http_url}')
subdomain.http_url = endpoint.http_url
subdomain.http_status = endpoint.http_status
subdomain.response_time = endpoint.response_time
subdomain.page_title = endpoint.page_title
subdomain.content_type = endpoint.content_type
subdomain.content_length = endpoint.content_length
for tech in endpoint.techs.all():
subdomain.technologies.add(tech)
subdomain.save()
# If enable_http_crawl is set, create an initial root HTTP endpoint so that
# HTTP crawling can start somewhere
http_url = f'{domain.name}{url_filter}' if url_filter else domain.name
endpoint, _ = save_endpoint(
http_url,
ctx=ctx,
crawl=enable_http_crawl,
is_default=True,
subdomain=subdomain
)
if endpoint and endpoint.is_alive:
# TODO: add `root_endpoint` property to subdomain and simply do
# subdomain.root_endpoint = endpoint instead
logger.warning(f'Found subdomain root HTTP URL {endpoint.http_url}')
subdomain.http_url = endpoint.http_url
subdomain.http_status = endpoint.http_status
subdomain.response_time = endpoint.response_time
subdomain.page_title = endpoint.page_title
subdomain.content_type = endpoint.content_type
subdomain.content_length = endpoint.content_length
for tech in endpoint.techs.all():
subdomain.technologies.add(tech)
subdomain.save()


# Build Celery tasks, crafted according to the dependency graph below:
# subdomain_discovery --> port_scan --> fetch_url --> dir_file_fuzz
# osint vulnerability_scan
# osint dalfox xss scan
# screenshot
# waf_detection
workflow = chain(
group(
subdomain_discovery.si(ctx=ctx, description='Subdomain discovery'),
osint.si(ctx=ctx, description='OS Intelligence')
),
port_scan.si(ctx=ctx, description='Port scan'),
fetch_url.si(ctx=ctx, description='Fetch URL'),
group(
dir_file_fuzz.si(ctx=ctx, description='Directories & files fuzz'),
vulnerability_scan.si(ctx=ctx, description='Vulnerability scan'),
screenshot.si(ctx=ctx, description='Screenshot'),
waf_detection.si(ctx=ctx, description='WAF detection')
# Build Celery tasks, crafted according to the dependency graph below:
# subdomain_discovery --> port_scan --> fetch_url --> dir_file_fuzz
# osint vulnerability_scan
# osint dalfox xss scan
# screenshot
# waf_detection
workflow = chain(
group(
subdomain_discovery.si(ctx=ctx, description='Subdomain discovery'),
osint.si(ctx=ctx, description='OS Intelligence')
),
port_scan.si(ctx=ctx, description='Port scan'),
fetch_url.si(ctx=ctx, description='Fetch URL'),
group(
dir_file_fuzz.si(ctx=ctx, description='Directories & files fuzz'),
vulnerability_scan.si(ctx=ctx, description='Vulnerability scan'),
screenshot.si(ctx=ctx, description='Screenshot'),
waf_detection.si(ctx=ctx, description='WAF detection')
)
)
)

# Build callback
callback = report.si(ctx=ctx).set(link_error=[report.si(ctx=ctx)])
# Build callback
callback = report.si(ctx=ctx).set(link_error=[report.si(ctx=ctx)])

# Run Celery chord
logger.info(f'Running Celery workflow with {len(workflow.tasks) + 1} tasks')
task = chain(workflow, callback).on_error(callback).delay()
scan.celery_ids.append(task.id)
scan.save()
# Run Celery chord
logger.info(f'Running Celery workflow with {len(workflow.tasks) + 1} tasks')
task = chain(workflow, callback).on_error(callback).delay()
scan.celery_ids.append(task.id)
scan.save()

return {
'success': True,
'task_id': task.id
}
return {
'success': True,
'task_id': task.id
}
except Exception as e:
logger.exception(e)
if scan:
scan.scan_status = FAILED_TASK
scan.error_message = str(e)
scan.save()
return {
'success': False,
'error': str(e)
}


@app.task(name='initiate_subscan', bind=False, queue='subscan_queue')
Expand Down
Loading

0 comments on commit 063c82e

Please sign in to comment.