From 4fa291037d57c6ce55febbb4ae6e695de750fa23 Mon Sep 17 00:00:00 2001 From: David Kubek Date: Thu, 18 Apr 2024 11:51:46 +0200 Subject: [PATCH] Split metadata data --- leapp/utils/audit/__init__.py | 97 +++++++++++++------ res/schema/audit-layout.sql | 8 +- .../0002-add-metadata-dialog-tables.sql | 9 +- tests/scripts/test_metadata.py | 46 +++++++-- 4 files changed, 115 insertions(+), 45 deletions(-) diff --git a/leapp/utils/audit/__init__.py b/leapp/utils/audit/__init__.py index fd3a0ef4b..16db10796 100644 --- a/leapp/utils/audit/__init__.py +++ b/leapp/utils/audit/__init__.py @@ -3,6 +3,7 @@ import json import os import sqlite3 +import hashlib from leapp.config import get_config from leapp.compat import string_types @@ -221,9 +222,31 @@ def do_store(self, connection): self._data_source_id = cursor.fetchone()[0] -class Metadata(Host): +class Metadata(Storable): """ - Metadata of an entity (e.g. actor, workflow) + Metadata of an Entity + """ + + def __init__(self, metadata=None, hash_id=None): + """ + :param metadata: Entity metadata + :type metadata: str + :param hash_id: SHA256 hash in hexadecimal representation of data + :type hash_id: str + """ + super(Metadata, self).__init__() + self.metadata = metadata + self.hash_id = hash_id + + def do_store(self, connection): + super(Metadata, self).do_store(connection) + connection.execute('INSERT OR IGNORE INTO metadata (hash, metadata) VALUES(?, ?)', + (self.hash_id, self.metadata)) + + +class Entity(Host): + """ + Leapp framework entity (e.g. actor, workflow) """ def __init__(self, context=None, hostname=None, kind=None, metadata=None, name=None): @@ -234,34 +257,35 @@ def __init__(self, context=None, hostname=None, kind=None, metadata=None, name=N :type hostname: str :param kind: Kind of the entity for which metadata is stored :type kind: str - :param metadata: Actual metadata - :type metadata: dict + :param metadata: Entity metadata + :type metadata: :py:class:`leapp.utils.audit.Metadata` :param name: Name of the entity :type name: str """ - super(Metadata, self).__init__(context=context, hostname=hostname) + super(Entity, self).__init__(context=context, hostname=hostname) self.kind = kind self.name = name self.metadata = metadata - self._metadata_id = None + self._entity_id = None @property - def metadata_id(self): + def entity_id(self): """ Returns the id of the entry, which is only set when already stored. :return: Integer id or None """ - return self._metadata_id + return self._entity_id def do_store(self, connection): - super(Metadata, self).do_store(connection) + super(Entity, self).do_store(connection) + self.metadata.do_store(connection) connection.execute( - 'INSERT OR IGNORE INTO metadata (context, kind, name, metadata) VALUES(?, ?, ?, ?)', - (self.context, self.kind, self.name, json.dumps(self.metadata))) + 'INSERT OR IGNORE INTO entity (context, kind, name, metadata_hash) VALUES(?, ?, ?, ?)', + (self.context, self.kind, self.name, self.metadata.hash_id)) cursor = connection.execute( - 'SELECT id FROM metadata WHERE context = ? AND kind = ? AND name = ?', + 'SELECT id FROM entity WHERE context = ? AND kind = ? AND name = ?', (self.context, self.kind, self.name)) - self._metadata_id = cursor.fetchone()[0] + self._entity_id = cursor.fetchone()[0] class Message(DataSource): @@ -601,10 +625,16 @@ def store_workflow_metadata(workflow): :type workflow: :py:class:`leapp.workflows.Workflow` """ - metadata = type(workflow).serialize() - md = Metadata(kind='workflow', name=workflow.name, context=os.environ['LEAPP_EXECUTION_ID'], - hostname=os.environ['LEAPP_HOSTNAME'], metadata=metadata) - md.store() + metadata = json.dumps(type(workflow).serialize(), sort_keys=True) + metadata_hash_id = hashlib.sha256(metadata.encode('utf-8')).hexdigest() + + md = Metadata(metadata=metadata, hash_id=metadata_hash_id) + ent = Entity(kind='workflow', + name=workflow.name, + context=os.environ['LEAPP_EXECUTION_ID'], + hostname=os.environ['LEAPP_HOSTNAME'], + metadata=md) + ent.store() def store_actor_metadata(actor_definition, phase): @@ -616,19 +646,22 @@ def store_actor_metadata(actor_definition, phase): :type actor_definition: :py:class:`leapp.repository.actor_definition.ActorDefinition` """ - metadata = dict(actor_definition.discover()) - metadata.update({ - 'consumes': [model.__name__ for model in metadata.get('consumes', ())], - 'produces': [model.__name__ for model in metadata.get('produces', ())], - 'tags': [tag.__name__ for tag in metadata.get('tags', ())], + _metadata = dict(actor_definition.discover()) + _metadata.update({ + 'consumes': sorted(model.__name__ for model in _metadata.get('consumes', ())), + 'produces': sorted(model.__name__ for model in _metadata.get('produces', ())), + 'tags': sorted(tag.__name__ for tag in _metadata.get('tags', ())), }) - metadata['phase'] = phase - - actor_metadata_fields = ( - 'class_name', 'name', 'description', 'phase', 'tags', 'consumes', 'produces', 'path' - ) - md = Metadata(kind='actor', name=actor_definition.name, - context=os.environ['LEAPP_EXECUTION_ID'], - hostname=os.environ['LEAPP_HOSTNAME'], - metadata={field: metadata[field] for field in actor_metadata_fields}) - md.store() + _metadata['phase'] = phase + + actor_metadata_fields = ('class_name', 'name', 'description', 'phase', 'tags', 'consumes', 'produces', 'path') + metadata = json.dumps({field: _metadata[field] for field in actor_metadata_fields}, sort_keys=True) + metadata_hash_id = hashlib.sha256(metadata.encode('utf-8')).hexdigest() + + md = Metadata(metadata=metadata, hash_id=metadata_hash_id) + ent = Entity(kind='actor', + name=actor_definition.name, + context=os.environ['LEAPP_EXECUTION_ID'], + hostname=os.environ['LEAPP_HOSTNAME'], + metadata=md) + ent.store() diff --git a/res/schema/audit-layout.sql b/res/schema/audit-layout.sql index eb2b19f6a..d567ce494 100644 --- a/res/schema/audit-layout.sql +++ b/res/schema/audit-layout.sql @@ -42,13 +42,17 @@ CREATE TABLE IF NOT EXISTS message ( message_data_hash VARCHAR(64) NOT NULL REFERENCES message_data (hash) ); - CREATE TABLE IF NOT EXISTS metadata ( + hash VARCHAR(64) PRIMARY KEY NOT NULL, + metadata TEXT +); + +CREATE TABLE IF NOT EXISTS entity ( id INTEGER PRIMARY KEY NOT NULL, context VARCHAR(36) NOT NULL REFERENCES execution (context), kind VARCHAR(256) NOT NULL DEFAULT '', name VARCHAR(1024) NOT NULL DEFAULT '', - metadata TEXT DEFAULT NULL, + metadata_hash VARCHAR(64) NOT NULL REFERENCES metadata (hash), UNIQUE (context, kind, name) ); diff --git a/res/schema/migrations/0002-add-metadata-dialog-tables.sql b/res/schema/migrations/0002-add-metadata-dialog-tables.sql index bf3260eb3..d6c0a9fdd 100644 --- a/res/schema/migrations/0002-add-metadata-dialog-tables.sql +++ b/res/schema/migrations/0002-add-metadata-dialog-tables.sql @@ -1,11 +1,16 @@ BEGIN; -CREATE TABLE metadata ( +CREATE TABLE IF NOT EXISTS metadata ( + hash VARCHAR(64) PRIMARY KEY NOT NULL, + metadata TEXT +); + +CREATE TABLE IF NOT EXISTS entity ( id INTEGER PRIMARY KEY NOT NULL, context VARCHAR(36) NOT NULL REFERENCES execution (context), kind VARCHAR(256) NOT NULL DEFAULT '', name VARCHAR(1024) NOT NULL DEFAULT '', - metadata TEXT DEFAULT NULL, + metadata_hash VARCHAR(64) NOT NULL REFERENCES metadata (hash), UNIQUE (context, kind, name) ); diff --git a/tests/scripts/test_metadata.py b/tests/scripts/test_metadata.py index c850fdb09..9b5c07f62 100644 --- a/tests/scripts/test_metadata.py +++ b/tests/scripts/test_metadata.py @@ -1,6 +1,7 @@ import os import json import logging +import hashlib import mock import py @@ -8,7 +9,8 @@ from leapp.repository.scan import scan_repo from leapp.repository.actor_definition import ActorDefinition -from leapp.utils.audit import get_connection, dict_factory, Metadata, store_actor_metadata, store_workflow_metadata +from leapp.utils.audit import (get_connection, dict_factory, Metadata, Entity, store_actor_metadata, + store_workflow_metadata) from leapp.config import get_config _HOSTNAME = 'test-host.example.com' @@ -94,21 +96,38 @@ def setup(): def test_save_empty_metadata(): - e = Metadata( + hash_id = hashlib.sha256('test-empty-metadata'.encode('utf-8')).hexdigest() + md = Metadata(hash_id=hash_id, metadata='') + md.store() + + entry = None + with get_connection(None) as conn: + cursor = conn.execute('SELECT * FROM metadata WHERE hash = ?;', (hash_id,)) + cursor.row_factory = dict_factory + entry = cursor.fetchone() + + assert entry is not None + assert entry['metadata'] == '' + + +def test_save_empty_entity(): + hash_id = hashlib.sha256('test-empty-entity'.encode('utf-8')).hexdigest() + md = Metadata(hash_id=hash_id, metadata='') + e = Entity( name='test-name', - metadata=None, + metadata=md, kind='test-kind', context=_CONTEXT_NAME, hostname=_HOSTNAME, ) e.store() - assert e.metadata_id + assert e.entity_id assert e.host_id entry = None with get_connection(None) as conn: - cursor = conn.execute('SELECT * FROM metadata WHERE id = ?;', (e.metadata_id,)) + cursor = conn.execute('SELECT * FROM entity WHERE id = ?;', (e.entity_id,)) cursor.row_factory = dict_factory entry = cursor.fetchone() @@ -116,7 +135,7 @@ def test_save_empty_metadata(): assert entry['kind'] == 'test-kind' assert entry['name'] == 'test-name' assert entry['context'] == _CONTEXT_NAME - assert entry['metadata'] == 'null' + assert entry['metadata_hash'] == hash_id def test_store_actor_metadata(monkeypatch, repository_dir): @@ -142,7 +161,11 @@ def test_store_actor_metadata(monkeypatch, repository_dir): # --- entry = None with get_connection(None) as conn: - cursor = conn.execute('SELECT * FROM metadata WHERE name="test-actor";') + cursor = conn.execute('SELECT * ' + 'FROM entity ' + 'JOIN metadata ' + 'ON entity.metadata_hash = metadata.hash ' + 'WHERE name="test-actor";') cursor.row_factory = dict_factory entry = cursor.fetchone() @@ -180,8 +203,13 @@ def test_workflow_metadata(monkeypatch, repository): entry = None with get_connection(None) as conn: cursor = conn.execute( - 'SELECT * FROM metadata WHERE kind == "workflow" AND context = ? ORDER BY id DESC LIMIT 1;', - (_CONTEXT_NAME,)) + 'SELECT * ' + 'FROM entity ' + 'JOIN metadata ' + 'ON entity.metadata_hash = metadata.hash ' + 'WHERE kind == "workflow" AND context = ? ' + 'ORDER BY id DESC ' + 'LIMIT 1;', (_CONTEXT_NAME,)) cursor.row_factory = dict_factory entry = cursor.fetchone()