From 489cfcd6306655f1a14201a3b9b0acfa0e642b07 Mon Sep 17 00:00:00 2001 From: AlanCoding Date: Wed, 29 Jan 2025 16:47:53 -0500 Subject: [PATCH] Document, implement, and test remaining indirect host audit fields --- awx/main/migrations/0201_eventquery.py | 25 ---- .../0201_indirect_managed_node_audit.py | 85 +++++++++++ .../0202_indirectmanagednodeaudit.py | 47 ------ .../0203_job_event_queries_processed.py | 18 --- awx/main/models/event_query.py | 7 +- .../models/indirect_managed_node_audit.py | 22 +-- awx/main/tasks/host_indirect.py | 55 ++++++- .../functional/tasks/test_host_indirect.py | 141 +++++++++++++++++- 8 files changed, 285 insertions(+), 115 deletions(-) delete mode 100644 awx/main/migrations/0201_eventquery.py create mode 100644 awx/main/migrations/0201_indirect_managed_node_audit.py delete mode 100644 awx/main/migrations/0202_indirectmanagednodeaudit.py delete mode 100644 awx/main/migrations/0203_job_event_queries_processed.py diff --git a/awx/main/migrations/0201_eventquery.py b/awx/main/migrations/0201_eventquery.py deleted file mode 100644 index 09fd4e371b21..000000000000 --- a/awx/main/migrations/0201_eventquery.py +++ /dev/null @@ -1,25 +0,0 @@ -# Generated by Django 4.2.16 on 2025-01-27 12:19 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('main', '0200_delete_token_cleanup_job'), - ] - - operations = [ - migrations.CreateModel( - name='EventQuery', - fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('fqcn', models.CharField(max_length=255)), - ('collection_version', models.CharField(max_length=32)), - ('event_query', models.JSONField(default=dict)), - ], - options={ - 'unique_together': {('fqcn', 'collection_version')}, - }, - ), - ] diff --git a/awx/main/migrations/0201_indirect_managed_node_audit.py b/awx/main/migrations/0201_indirect_managed_node_audit.py new file mode 100644 index 000000000000..b4ce65f052a6 --- /dev/null +++ b/awx/main/migrations/0201_indirect_managed_node_audit.py @@ -0,0 +1,85 @@ +# Generated by Django 4.2.16 on 2025-01-29 20:13 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('main', '0200_delete_token_cleanup_job'), + ] + + operations = [ + migrations.AddField( + model_name='job', + name='event_queries_processed', + field=models.BooleanField(default=False, help_text='Events of this job have been queried for indirect host information'), + ), + migrations.CreateModel( + name='EventQuery', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('fqcn', models.CharField(help_text='Fully-qualified collection name.', max_length=255)), + ('collection_version', models.CharField(help_text='Version of the collection this data applies to.', max_length=32)), + ('event_query', models.JSONField(default=dict, help_text='The extensions/audit/event_query.yml file content scraped from the collection.')), + ], + options={ + 'unique_together': {('fqcn', 'collection_version')}, + }, + ), + migrations.CreateModel( + name='IndirectManagedNodeAudit', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('created', models.DateTimeField(auto_now_add=True)), + ('name', models.CharField(help_text='The Ansible name of the host that this audit record is for.', max_length=255)), + ('canonical_facts', models.JSONField(default=dict, help_text='Facts about the host that will be used for managed node deduplication.')), + ('facts', models.JSONField(default=dict, help_text='Non canonical facts having additional info about the managed node.')), + ('events', models.JSONField(default=list, help_text='List of fully-qualified names of modules that ran against the host in the job.')), + ('count', models.PositiveIntegerField(default=0, help_text='Counter of how many times registered modules were invoked on the host.')), + ( + 'host', + models.ForeignKey( + help_text='The host this audit record is for.', + null=True, + on_delete=django.db.models.deletion.DO_NOTHING, + related_name='host_indirect_host_audits', + to='main.host', + ), + ), + ( + 'inventory', + models.ForeignKey( + help_text='The inventory the related job ran against, and which the related host is in.', + null=True, + on_delete=django.db.models.deletion.DO_NOTHING, + related_name='inventory_indirect_host_audits', + to='main.inventory', + ), + ), + ( + 'job', + models.ForeignKey( + editable=False, + help_text='Data saved in this record only applies to this specified job.', + on_delete=django.db.models.deletion.DO_NOTHING, + related_name='job_indirect_host_audits', + to='main.job', + ), + ), + ( + 'organization', + models.ForeignKey( + help_text='Applicable organization, inferred from the related job.', + on_delete=django.db.models.deletion.DO_NOTHING, + related_name='organization_indirect_host_audits', + to='main.organization', + ), + ), + ], + options={ + 'unique_together': {('name', 'job')}, + }, + ), + ] diff --git a/awx/main/migrations/0202_indirectmanagednodeaudit.py b/awx/main/migrations/0202_indirectmanagednodeaudit.py deleted file mode 100644 index eb1cdd8cf66d..000000000000 --- a/awx/main/migrations/0202_indirectmanagednodeaudit.py +++ /dev/null @@ -1,47 +0,0 @@ -# Generated by Django 4.2.16 on 2025-01-29 12:23 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - dependencies = [ - ('main', '0201_eventquery'), - ] - - operations = [ - migrations.CreateModel( - name='IndirectManagedNodeAudit', - fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('created', models.DateTimeField(auto_now_add=True)), - ('name', models.CharField(max_length=255)), - ('canonical_facts', models.JSONField(default=dict)), - ('facts', models.JSONField(default=dict)), - ('events', models.JSONField(default=list)), - ('count', models.PositiveIntegerField(default=0)), - ( - 'host', - models.ForeignKey(null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='host_indirect_host_audits', to='main.host'), - ), - ( - 'inventory', - models.ForeignKey( - null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='inventory_indirect_host_audits', to='main.inventory' - ), - ), - ( - 'job', - models.ForeignKey(editable=False, on_delete=django.db.models.deletion.DO_NOTHING, related_name='job_indirect_host_audits', to='main.job'), - ), - ( - 'organization', - models.ForeignKey(on_delete=django.db.models.deletion.DO_NOTHING, related_name='organization_indirect_host_audits', to='main.organization'), - ), - ], - options={ - 'unique_together': {('name', 'job')}, - }, - ), - ] diff --git a/awx/main/migrations/0203_job_event_queries_processed.py b/awx/main/migrations/0203_job_event_queries_processed.py deleted file mode 100644 index 1985aeac3c4e..000000000000 --- a/awx/main/migrations/0203_job_event_queries_processed.py +++ /dev/null @@ -1,18 +0,0 @@ -# Generated by Django 4.2.16 on 2025-01-29 12:26 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('main', '0202_indirectmanagednodeaudit'), - ] - - operations = [ - migrations.AddField( - model_name='job', - name='event_queries_processed', - field=models.BooleanField(default=False, help_text='Events of this job have been queried for indirect host information'), - ), - ] diff --git a/awx/main/models/event_query.py b/awx/main/models/event_query.py index 49110f08fc67..b608fea2ea68 100644 --- a/awx/main/models/event_query.py +++ b/awx/main/models/event_query.py @@ -1,5 +1,6 @@ from django.core.exceptions import ValidationError from django.db import models +from django.utils.translation import gettext_lazy as _ from awx.main.models import BaseModel @@ -14,9 +15,9 @@ class Meta: app_label = 'main' unique_together = ['fqcn', 'collection_version'] - fqcn = models.CharField(max_length=255) - collection_version = models.CharField(max_length=32) - event_query = models.JSONField(default=dict) + fqcn = models.CharField(max_length=255, help_text=_('Fully-qualified collection name.')) + collection_version = models.CharField(max_length=32, help_text=_('Version of the collection this data applies to.')) + event_query = models.JSONField(default=dict, help_text=_('The extensions/audit/event_query.yml file content scraped from the collection.')) def validate_unique(self, exclude=None): try: diff --git a/awx/main/models/indirect_managed_node_audit.py b/awx/main/models/indirect_managed_node_audit.py index 624f2f6b4c11..08e18403eadd 100644 --- a/awx/main/models/indirect_managed_node_audit.py +++ b/awx/main/models/indirect_managed_node_audit.py @@ -2,6 +2,8 @@ from django.db.models.fields import DateTimeField, CharField, PositiveIntegerField from django.db.models.fields.json import JSONField from django.db.models.fields.related import ForeignKey +from django.utils.translation import gettext_lazy as _ + from awx.main.models import BaseModel @@ -21,12 +23,14 @@ class Meta: related_name='job_indirect_host_audits', on_delete=DO_NOTHING, editable=False, + help_text=_('Data saved in this record only applies to this specified job.'), ) organization = ForeignKey( 'Organization', related_name='organization_indirect_host_audits', on_delete=DO_NOTHING, + help_text=_('Applicable organization, inferred from the related job.'), ) inventory = ForeignKey( @@ -34,21 +38,17 @@ class Meta: related_name='inventory_indirect_host_audits', null=True, on_delete=DO_NOTHING, + help_text=_('The inventory the related job ran against, and which the related host is in.'), ) - host = ForeignKey( - 'Host', - related_name='host_indirect_host_audits', - null=True, - on_delete=DO_NOTHING, - ) + host = ForeignKey('Host', related_name='host_indirect_host_audits', null=True, on_delete=DO_NOTHING, help_text=_('The host this audit record is for.')) - name = CharField(max_length=255) + name = CharField(max_length=255, help_text=_('The Ansible name of the host that this audit record is for.')) - canonical_facts = JSONField(default=dict) + canonical_facts = JSONField(default=dict, help_text=_('Facts about the host that will be used for managed node deduplication.')) - facts = JSONField(default=dict) + facts = JSONField(default=dict, help_text=_('Non canonical facts having additional info about the managed node.')) - events = JSONField(default=list) + events = JSONField(default=list, help_text=_('List of fully-qualified names of modules that ran against the host in the job.')) - count = PositiveIntegerField(default=0) + count = PositiveIntegerField(default=0, help_text=_('Counter of how many times registered modules were invoked on the host.')) diff --git a/awx/main/tasks/host_indirect.py b/awx/main/tasks/host_indirect.py index c14fd4bc97da..7d7225b9506e 100644 --- a/awx/main/tasks/host_indirect.py +++ b/awx/main/tasks/host_indirect.py @@ -1,4 +1,5 @@ import logging +from typing import Tuple, Union import yaml @@ -13,11 +14,30 @@ logger = logging.getLogger(__name__) +class UnhashableFacts(RuntimeError): + pass + + +def get_hashable_form(python_dict: Union[dict, list, int, float, str, bool]) -> Tuple[Union[Tuple, dict, int, float]]: + "Given a dictionary of JSON types, return something that can be hashed and is the same data" + if isinstance(python_dict, (int, float, str, bool)): + return python_dict # return scalars as-is + if isinstance(python_dict, dict): + # Can't hash? Make it a tuple. Can't hash the tuples in the tuple? We'll make tuples out of them too. + return tuple(sorted(((get_hashable_form(k), get_hashable_form(v)) for k, v in python_dict.items()))) + elif isinstance(python_dict, (list, tuple)): + return tuple(python_dict) + raise UnhashableFacts(f'Cannonical facts contains a {type(python_dict)} type which can not be hashed.') + + def build_indirect_host_data(job, job_event_queries: dict[str, str]) -> list[IndirectManagedNodeAudit]: - results = [] + results = {} compiled_jq_expressions = {} # Cache for compiled jq expressions facts_missing_logged = False + unhashable_facts_logged = False + print(f'using event queries {job_event_queries}') for event in job.job_events.filter(task__in=job_event_queries.keys()).iterator(): + print(f'inspecting event {event}') if 'res' not in event.event_data: continue jq_str_for_event = job_event_queries[event.task] @@ -25,14 +45,41 @@ def build_indirect_host_data(job, job_event_queries: dict[str, str]) -> list[Ind compiled_jq_expressions[event.task] = jq.compile(jq_str_for_event) compiled_jq = compiled_jq_expressions[event.task] for data in compiled_jq.input(event.event_data['res']).all(): + + # From the JQ result, get index information about this record if not data.get('canonical_facts'): if not facts_missing_logged: logger.error(f'jq output missing canonical_facts for module {event.task} on event {event.id} using jq:{jq_str_for_event}') continue canonical_facts = data['canonical_facts'] + try: + hashable_facts = get_hashable_form(canonical_facts) + except UnhashableFacts: + if not unhashable_facts_logged: + logger.info(f'Could not hash canonical_facts {canonical_facts}, skipping') + unhashable_facts_logged = True + continue + + # Obtain the record based on the hashable canonical_facts now determined facts = data.get('facts') - results.append(IndirectManagedNodeAudit(canonical_facts=canonical_facts, facts=facts, job=job, organization=job.organization)) - return results + if hashable_facts in results: + audit_record = results[hashable_facts] + else: + audit_record = IndirectManagedNodeAudit( + canonical_facts=canonical_facts, + facts=facts, + job=job, + organization=job.organization, + name=event.host_name, + ) + results[hashable_facts] = audit_record + + # Increment rolling count fields + if event.task not in audit_record.events: + audit_record.events.append(event.task) + audit_record.count += 1 + + return list(results.values()) def fetch_job_event_query(job) -> dict[str, str]: @@ -59,3 +106,5 @@ def save_indirect_host_entries(job_id): job_event_queries = fetch_job_event_query(job) records = build_indirect_host_data(job, job_event_queries) IndirectManagedNodeAudit.objects.bulk_create(records) + job.event_queries_processed = True + job.save(update_fields=['event_queries_processed']) diff --git a/awx/main/tests/functional/tasks/test_host_indirect.py b/awx/main/tests/functional/tasks/test_host_indirect.py index 385c1ace3239..0b617a202d73 100644 --- a/awx/main/tests/functional/tasks/test_host_indirect.py +++ b/awx/main/tests/functional/tasks/test_host_indirect.py @@ -2,7 +2,7 @@ import pytest -from awx.main.tasks.host_indirect import build_indirect_host_data, fetch_job_event_query, save_indirect_host_entries +from awx.main.tasks.host_indirect import build_indirect_host_data, fetch_job_event_query, save_indirect_host_entries, get_hashable_form from awx.main.models.event_query import EventQuery from awx.main.models.indirect_managed_node_audit import IndirectManagedNodeAudit @@ -16,23 +16,33 @@ @pytest.fixture def bare_job(job_factory): job = job_factory() - job.installed_collections = {'demo.query.example': {'version': '1.0.1'}} + job.installed_collections = {'demo.query': {'version': '1.0.1'}, 'demo2.query': {'version': '1.0.1'}} job.save(update_fields=['installed_collections']) return job +def create_registered_event(job, task_name='demo.query.example'): + return job.job_events.create(task=task_name, event_data={'res': {'direct_host_name': 'foo_host'}}) + + @pytest.fixture def job_with_counted_event(bare_job): - bare_job.job_events.create(task='demo.query.example', event_data={'res': {'direct_host_name': 'foo_host'}}) + create_registered_event(bare_job) return bare_job +def create_event_query(fqcn='demo.query'): + module_name = f'{fqcn}.example' + return EventQuery.objects.create(fqcn=fqcn, collection_version='1.0.1', event_query=yaml.dump({module_name: TEST_JQ}, default_flow_style=False)) + + @pytest.fixture def event_query(): "This is ordinarily created by the artifacts callback" - return EventQuery.objects.create( - fqcn='demo.query.example', collection_version='1.0.1', event_query=yaml.dump({'demo.query.example': TEST_JQ}, default_flow_style=False) - ) + return create_event_query() + + +# ---- end fixtures ---- @pytest.mark.django_db @@ -50,15 +60,130 @@ def test_collect_an_event(job_with_counted_event): @pytest.mark.django_db -def test_fetch_job_event_query(job_with_counted_event, event_query): - assert fetch_job_event_query(job_with_counted_event) == {'demo.query.example': TEST_JQ} +def test_fetch_job_event_query(bare_job, event_query): + assert fetch_job_event_query(bare_job) == {'demo.query.example': TEST_JQ} + + +@pytest.mark.django_db +def test_fetch_multiple_job_event_query(bare_job): + create_event_query(fqcn='demo.query') + create_event_query(fqcn='demo2.query') + assert fetch_job_event_query(bare_job) == {'demo.query.example': TEST_JQ, 'demo2.query.example': TEST_JQ} @pytest.mark.django_db def test_save_indirect_host_entries(job_with_counted_event, event_query): + assert job_with_counted_event.event_queries_processed is False save_indirect_host_entries(job_with_counted_event.id) + job_with_counted_event.refresh_from_db() + assert job_with_counted_event.event_queries_processed is True assert IndirectManagedNodeAudit.objects.filter(job=job_with_counted_event).count() == 1 host_audit = IndirectManagedNodeAudit.objects.filter(job=job_with_counted_event).first() + assert host_audit.count == 1 assert host_audit.canonical_facts == {'host_name': 'foo_host'} assert host_audit.facts == {'another_host_name': 'foo_host'} assert host_audit.organization == job_with_counted_event.organization + + +@pytest.mark.django_db +def test_multiple_events_same_module_same_host(bare_job, event_query): + "This tests that the count field gives correct answers" + create_registered_event(bare_job) + create_registered_event(bare_job) + create_registered_event(bare_job) + + save_indirect_host_entries(bare_job.id) + + assert IndirectManagedNodeAudit.objects.filter(job=bare_job).count() == 1 + host_audit = IndirectManagedNodeAudit.objects.filter(job=bare_job).first() + + assert host_audit.count == 3 + assert host_audit.events == ['demo.query.example'] + + +@pytest.mark.django_db +def test_multiple_registered_modules(bare_job): + "This tests that the events will list multiple modules if more than 1 module from different collections is registered and used" + create_registered_event(bare_job, task_name='demo.query.example') + create_registered_event(bare_job, task_name='demo2.query.example') + + # These take the place of using the event_query fixture + create_event_query(fqcn='demo.query') + create_event_query(fqcn='demo2.query') + + save_indirect_host_entries(bare_job.id) + + assert IndirectManagedNodeAudit.objects.filter(job=bare_job).count() == 1 + host_audit = IndirectManagedNodeAudit.objects.filter(job=bare_job).first() + + assert host_audit.count == 2 + assert set(host_audit.events) == {'demo.query.example', 'demo2.query.example'} + + +@pytest.mark.django_db +def test_multiple_registered_modules_same_collection(bare_job): + "This tests that the events will list multiple modules if more than 1 module in same collection is registered and used" + create_registered_event(bare_job, task_name='demo.query.example') + create_registered_event(bare_job, task_name='demo.query.example2') + + # Takes place of event_query fixture, doing manually here + EventQuery.objects.create( + fqcn='demo.query', + collection_version='1.0.1', + event_query=yaml.dump( + { + 'demo.query.example': TEST_JQ, + 'demo.query.example2': TEST_JQ, + }, + default_flow_style=False, + ), + ) + + save_indirect_host_entries(bare_job.id) + + assert IndirectManagedNodeAudit.objects.filter(job=bare_job).count() == 1 + host_audit = IndirectManagedNodeAudit.objects.filter(job=bare_job).first() + + assert host_audit.count == 2 + assert set(host_audit.events) == {'demo.query.example', 'demo.query.example2'} + + +class TestHashableForm: + def test_same_dict(self): + assert get_hashable_form({'a': 'b'}) == get_hashable_form({'a': 'b'}) + + def test_same_list(self): + assert get_hashable_form(['a', 'b']) == get_hashable_form(['a', 'b']) + assert get_hashable_form(('a', 'b')) == get_hashable_form(('a', 'b')) + + def test_different_list(self): + assert get_hashable_form(['a', 'b']) != get_hashable_form(['a', 'c']) + assert get_hashable_form(('a', 'b')) != get_hashable_form(('a', 'c')) + + def test_values_different(self): + assert get_hashable_form({'a': 'b'}) != get_hashable_form({'a': 'c'}) + + def test_has_extra_key(self): + assert get_hashable_form({'a': 'b'}) != get_hashable_form({'a': 'b', 'c': 'd'}) + + def test_nested_dictionaries_different(self): + assert get_hashable_form({'a': {'b': 'c'}}) != get_hashable_form({'a': {'b': 'd'}}) + + def test_nested_dictionaries_same(self): + assert get_hashable_form({'a': {'b': 'c'}}) == get_hashable_form({'a': {'b': 'c'}}) + + def test_nested_lists_different(self): + assert get_hashable_form({'a': ['b', 'c']}) != get_hashable_form({'a': ['b', 'd']}) + assert get_hashable_form({'a': ('b', 'c')}) != get_hashable_form({'a': ('b', 'd')}) + + def test_nested_lists_same(self): + assert get_hashable_form({'a': ['b', 'c']}) == get_hashable_form({'a': ['b', 'c']}) + assert get_hashable_form({'a': ('b', 'c')}) == get_hashable_form({'a': ('b', 'c')}) + + def test_list_nested_lists_different(self): + assert get_hashable_form(['a', ['b', 'c']]) != get_hashable_form(['a', ['b', 'd']]) + assert get_hashable_form(['a', ('b', 'c')]) != get_hashable_form(['a', ('b', 'd')]) + + def test_list_nested_lists_same(self): + assert get_hashable_form(['a', ['b', 'c']]) == get_hashable_form(['a', ['b', 'c']]) + assert get_hashable_form(['a', ('b', 'c')]) == get_hashable_form(['a', ('b', 'c')])