From 5caa3e075cb0ae225d3610802e487863dbba2b7e Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Thu, 27 Jun 2024 16:34:57 +0200 Subject: [PATCH 01/16] ENH: Add possibility to delay generating MISP Feed Generating MISP feed on every incoming message slows down processing. The new config option let us decide to save them in batches. Cached events are stored in a cache list in Redis. In addition, a code related to Python 3.6 was removed as we do not support this version any more. --- intelmq/bots/outputs/misp/output_feed.py | 106 +++++++++++------- intelmq/lib/bot.py | 4 + intelmq/lib/mixins/cache.py | 18 ++- .../bots/outputs/misp/test_output_feed.py | 54 ++++++++- 4 files changed, 135 insertions(+), 47 deletions(-) diff --git a/intelmq/bots/outputs/misp/output_feed.py b/intelmq/bots/outputs/misp/output_feed.py index cbeeec09ea..49829d9ed1 100644 --- a/intelmq/bots/outputs/misp/output_feed.py +++ b/intelmq/bots/outputs/misp/output_feed.py @@ -5,12 +5,13 @@ # -*- coding: utf-8 -*- import datetime import json +import re from pathlib import Path from uuid import uuid4 -import re from intelmq.lib.bot import OutputBot from intelmq.lib.exceptions import MissingDependencyError +from intelmq.lib.mixins import CacheMixin from intelmq.lib.utils import parse_relative try: @@ -19,19 +20,14 @@ except ImportError: # catching SyntaxError because of https://github.com/MISP/PyMISP/issues/501 MISPEvent = None - import_fail_reason = 'import' -except SyntaxError: - # catching SyntaxError because of https://github.com/MISP/PyMISP/issues/501 - MISPEvent = None - import_fail_reason = 'syntax' - + import_fail_reason = "import" -# NOTE: This module is compatible with Python 3.6+ - -class MISPFeedOutputBot(OutputBot): +class MISPFeedOutputBot(OutputBot, CacheMixin): """Generate an output in the MISP Feed format""" + interval_event: str = "1 hour" + delay_save_event_count: int = None misp_org_name = None misp_org_uuid = None output_dir: str = "/opt/intelmq/var/lib/bots/mispfeed-output" # TODO: should be path @@ -45,13 +41,8 @@ def check_output_dir(dirname): return True def init(self): - if MISPEvent is None and import_fail_reason == 'syntax': - raise MissingDependencyError("pymisp", - version='>=2.4.117.3', - additional_text="Python versions below 3.6 are " - "only supported by pymisp <= 2.4.119.1.") - elif MISPEvent is None: - raise MissingDependencyError('pymisp', version='>=2.4.117.3') + if MISPEvent is None: + raise MissingDependencyError("pymisp", version=">=2.4.117.3") self.current_event = None @@ -71,59 +62,90 @@ def init(self): try: with (self.output_dir / '.current').open() as f: self.current_file = Path(f.read()) - self.current_event = MISPEvent() - self.current_event.load_file(self.current_file) - - last_min_time, last_max_time = re.findall('IntelMQ event (.*) - (.*)', self.current_event.info)[0] - last_min_time = datetime.datetime.strptime(last_min_time, '%Y-%m-%dT%H:%M:%S.%f') - last_max_time = datetime.datetime.strptime(last_max_time, '%Y-%m-%dT%H:%M:%S.%f') - if last_max_time < datetime.datetime.now(): - self.min_time_current = datetime.datetime.now() - self.max_time_current = self.min_time_current + self.timedelta - self.current_event = None - else: - self.min_time_current = last_min_time - self.max_time_current = last_max_time + + if self.current_file.exists(): + self.current_event = MISPEvent() + self.current_event.load_file(self.current_file) + + last_min_time, last_max_time = re.findall( + "IntelMQ event (.*) - (.*)", self.current_event.info + )[0] + last_min_time = datetime.datetime.strptime( + last_min_time, "%Y-%m-%dT%H:%M:%S.%f" + ) + last_max_time = datetime.datetime.strptime( + last_max_time, "%Y-%m-%dT%H:%M:%S.%f" + ) + if last_max_time < datetime.datetime.now(): + self.min_time_current = datetime.datetime.now() + self.max_time_current = self.min_time_current + self.timedelta + self.current_event = None + else: + self.min_time_current = last_min_time + self.max_time_current = last_max_time except: - self.logger.exception("Loading current event %s failed. Skipping it.", self.current_event) + self.logger.exception( + "Loading current event %s failed. Skipping it.", self.current_event + ) self.current_event = None else: self.min_time_current = datetime.datetime.now() self.max_time_current = self.min_time_current + self.timedelta def process(self): - if not self.current_event or datetime.datetime.now() > self.max_time_current: self.min_time_current = datetime.datetime.now() self.max_time_current = self.min_time_current + self.timedelta self.current_event = MISPEvent() - self.current_event.info = ('IntelMQ event {begin} - {end}' - ''.format(begin=self.min_time_current.isoformat(), - end=self.max_time_current.isoformat())) + self.current_event.info = "IntelMQ event {begin} - {end}" "".format( + begin=self.min_time_current.isoformat(), + end=self.max_time_current.isoformat(), + ) self.current_event.set_date(datetime.date.today()) self.current_event.Orgc = self.misp_org self.current_event.uuid = str(uuid4()) - self.current_file = self.output_dir / f'{self.current_event.uuid}.json' - with (self.output_dir / '.current').open('w') as f: + self.current_file = self.output_dir / f"{self.current_event.uuid}.json" + with (self.output_dir / ".current").open("w") as f: f.write(str(self.current_file)) + # On startup or when timeout occurs, clean the queue to ensure we do not + # keep events forever because there was not enough generated + self._generate_feed() + event = self.receive_message().to_dict(jsondict_as_string=True) - obj = self.current_event.add_object(name='intelmq_event') - for object_relation, value in event.items(): + cache_size = None + if self.delay_save_event_count: + cache_size = self.cache_put(event) + + if cache_size is None: + self._generate_feed(event) + elif cache_size >= self.delay_save_event_count: + self._generate_feed() + + self.acknowledge_message() + + def _add_message_to_feed(self, message: dict): + obj = self.current_event.add_object(name="intelmq_event") + for object_relation, value in message.items(): try: obj.add_attribute(object_relation, value=value) except NewAttributeError: # This entry isn't listed in the harmonization file, ignoring. pass - feed_output = self.current_event.to_feed(with_meta=False) + def _generate_feed(self, message: dict = None): + if message: + self._add_message_to_feed(message) + + while message := self.cache_pop(): + self._add_message_to_feed(message) - with self.current_file.open('w') as f: + feed_output = self.current_event.to_feed(with_meta=False) + with self.current_file.open("w") as f: json.dump(feed_output, f) feed_meta_generator(self.output_dir) - self.acknowledge_message() @staticmethod def check(parameters): diff --git a/intelmq/lib/bot.py b/intelmq/lib/bot.py index f1b0ed3335..ef09f51a3f 100644 --- a/intelmq/lib/bot.py +++ b/intelmq/lib/bot.py @@ -279,6 +279,10 @@ def catch_shutdown(): def harmonization(self): return self._harmonization + @property + def bot_id(self): + return self.__bot_id_full + def __handle_sigterm_signal(self, signum: int, stack: Optional[object]): """ Calls when a SIGTERM is received. Stops the bot. diff --git a/intelmq/lib/mixins/cache.py b/intelmq/lib/mixins/cache.py index 3cf5365023..9565175402 100644 --- a/intelmq/lib/mixins/cache.py +++ b/intelmq/lib/mixins/cache.py @@ -1,4 +1,4 @@ -""" CacheMixin for IntelMQ +"""CacheMixin for IntelMQ SPDX-FileCopyrightText: 2021 Sebastian Waldbauer SPDX-License-Identifier: AGPL-3.0-or-later @@ -6,6 +6,7 @@ CacheMixin is used for caching/storing data in redis. """ +import json from typing import Any, Optional import redis import intelmq.lib.utils as utils @@ -31,7 +32,9 @@ def __init__(self, **kwargs): "socket_timeout": 5, } - self.__redis = redis.Redis(db=self.redis_cache_db, password=self.redis_cache_password, **kwargs) + self.__redis = redis.Redis( + db=self.redis_cache_db, password=self.redis_cache_password, **kwargs + ) super().__init__() def cache_exists(self, key: str): @@ -51,6 +54,17 @@ def cache_set(self, key: str, value: Any, ttl: Optional[int] = None): if self.redis_cache_ttl: self.__redis.expire(key, self.redis_cache_ttl) + def cache_put(self, value: dict) -> int: + # Returns the length of the list after pushing + size = self.__redis.lpush(self.bot_id, json.dumps(value)) + return size + + def cache_pop(self) -> dict: + data = self.__redis.rpop(self.bot_id) + if data is None: + return None + return json.loads(data) + def cache_flush(self): """ Flushes the currently opened database by calling FLUSHDB. diff --git a/intelmq/tests/bots/outputs/misp/test_output_feed.py b/intelmq/tests/bots/outputs/misp/test_output_feed.py index 783f2bfa94..1627e29c4c 100644 --- a/intelmq/tests/bots/outputs/misp/test_output_feed.py +++ b/intelmq/tests/bots/outputs/misp/test_output_feed.py @@ -3,8 +3,9 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # -*- coding: utf-8 -*- +import json import unittest -import sys +from pathlib import Path from tempfile import TemporaryDirectory import intelmq.lib.test as test @@ -37,9 +38,9 @@ @test.skip_exotic() class TestMISPFeedOutputBot(test.BotTestCase, unittest.TestCase): - @classmethod def set_bot(cls): + cls.use_cache = True cls.bot_reference = MISPFeedOutputBot cls.default_input_message = EXAMPLE_EVENT cls.directory = TemporaryDirectory() @@ -51,10 +52,57 @@ def set_bot(cls): def test_event(self): self.run_bot() + current_event = open(f"{self.directory.name}/.current").read() + with open(current_event) as f: + objects = json.load(f).get("Event", {}).get("Object", []) + assert len(objects) == 1 + + def test_accumulating_events(self): + self.input_message = [EXAMPLE_EVENT, EXAMPLE_EVENT] + self.run_bot(iterations=2, parameters={"delay_save_event_count": 3}) + + current_event = open(f"{self.directory.name}/.current").read() + + # First, the feed is empty - not enough events came + with open(current_event) as f: + objects = json.load(f).get("Event", {}).get("Object", []) + assert len(objects) == 0 + + self.input_message = [EXAMPLE_EVENT] + self.run_bot(parameters={"delay_save_event_count": 3}) + + # When enough events were collected, save them + with open(current_event) as f: + objects = json.load(f)["Event"]["Object"] + assert len(objects) == 3 + + self.input_message = [EXAMPLE_EVENT, EXAMPLE_EVENT, EXAMPLE_EVENT] + self.run_bot(iterations=3, parameters={"delay_save_event_count": 3}) + + # We continue saving to the same file until interval timeout + with open(current_event) as f: + objects = json.load(f)["Event"]["Object"] + assert len(objects) == 6 + + # Simulating leftovers in the queue when it's time to generate new event + Path(f"{self.directory.name}/.current").unlink() + self.bot.cache_put(EXAMPLE_EVENT) + self.run_bot(parameters={"delay_save_event_count": 3}) + + new_event = open(f"{self.directory.name}/.current").read() + with open(new_event) as f: + objects = json.load(f)["Event"]["Object"] + assert len(objects) == 1 + + + def tearDown(self): + self.cache.delete(self.bot_id) + super().tearDown() + @classmethod def tearDownClass(cls): cls.directory.cleanup() -if __name__ == '__main__': # pragma: no cover +if __name__ == "__main__": # pragma: no cover unittest.main() From efb761de86c213a9d3b26baf324928d1f62223c4 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Wed, 3 Jul 2024 15:46:59 +0200 Subject: [PATCH 02/16] Add documentation. Fix code compatibility --- CHANGELOG.md | 4 +- docs/user/bots.md | 7 ++ intelmq/bots/outputs/misp/output_feed.py | 10 ++- intelmq/lib/mixins/cache.py | 11 +++ .../bots/outputs/misp/test_output_feed.py | 76 ++++++++++--------- 5 files changed, 66 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6fb896a73..4c9a7df569 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,7 +40,9 @@ - Treat value `false` for parameter `filter_regex` as false (PR#2499 by Sebastian Wagner). #### Outputs -- `intelmq.bots.outputs.misp.output_feed`: Handle failures if saved current event wasn't saved or is incorrect (PR by Kamil Mankowski). +- `intelmq.bots.outputs.misp.output_feed`: + - Handle failures if saved current event wasn't saved or is incorrect (PR by Kamil Mankowski). + - Allow saving messages in bulks instead of refreshing the feed immediately (PR#2505 by Kamil Mankowski). - `intelmq.bots.outputs.smtp_batch.output`: Documentation on multiple recipients added (PR#2501 by Edvard Rejthar). ### Documentation diff --git a/docs/user/bots.md b/docs/user/bots.md index 5b826843db..3e5edba136 100644 --- a/docs/user/bots.md +++ b/docs/user/bots.md @@ -4609,6 +4609,13 @@ The PyMISP library >= 2.4.119.1 is required, see () The output bot creates one event per each interval, all data in this time frame is part of this event. Default "1 hour", string. +**`bulk_save_count`** + +(optional, int) If set to a non-0 value, the bot won't refresh the MISP feed immeadiately, but will cache +incoming messages until the given number of them. Use it if your bot proceeds a high number of messages +and constant saving to the disk is a problem. Reloading or restarting bot as well as generating +a new MISP event based on `interval_event` triggers saving regardless of the cache size. + **Usage in MISP** Configure the destination directory of this feed as feed in MISP, either as local location, or served via a web server. diff --git a/intelmq/bots/outputs/misp/output_feed.py b/intelmq/bots/outputs/misp/output_feed.py index 49829d9ed1..53c6556795 100644 --- a/intelmq/bots/outputs/misp/output_feed.py +++ b/intelmq/bots/outputs/misp/output_feed.py @@ -27,7 +27,7 @@ class MISPFeedOutputBot(OutputBot, CacheMixin): """Generate an output in the MISP Feed format""" interval_event: str = "1 hour" - delay_save_event_count: int = None + bulk_save_count: int = None misp_org_name = None misp_org_uuid = None output_dir: str = "/opt/intelmq/var/lib/bots/mispfeed-output" # TODO: should be path @@ -115,12 +115,12 @@ def process(self): event = self.receive_message().to_dict(jsondict_as_string=True) cache_size = None - if self.delay_save_event_count: + if self.bulk_save_count: cache_size = self.cache_put(event) if cache_size is None: self._generate_feed(event) - elif cache_size >= self.delay_save_event_count: + elif cache_size >= self.bulk_save_count: self._generate_feed() self.acknowledge_message() @@ -138,8 +138,10 @@ def _generate_feed(self, message: dict = None): if message: self._add_message_to_feed(message) - while message := self.cache_pop(): + message = self.cache_pop() + while message: self._add_message_to_feed(message) + message = self.cache_pop() feed_output = self.current_event.to_feed(with_meta=False) with self.current_file.open("w") as f: diff --git a/intelmq/lib/mixins/cache.py b/intelmq/lib/mixins/cache.py index 9565175402..ee945fbb53 100644 --- a/intelmq/lib/mixins/cache.py +++ b/intelmq/lib/mixins/cache.py @@ -13,6 +13,17 @@ class CacheMixin: + """Provides caching possibilities for bots + + For key-value cache, use methods: + cache_exists + cache_get + cache_set + + To store dict elements in a cache queue named after bot id, use methods: + cache_put + cache_pop + """ __redis: redis.Redis = None redis_cache_host: str = "127.0.0.1" redis_cache_port: int = 6379 diff --git a/intelmq/tests/bots/outputs/misp/test_output_feed.py b/intelmq/tests/bots/outputs/misp/test_output_feed.py index 1627e29c4c..631b7b7bd4 100644 --- a/intelmq/tests/bots/outputs/misp/test_output_feed.py +++ b/intelmq/tests/bots/outputs/misp/test_output_feed.py @@ -11,29 +11,30 @@ import intelmq.lib.test as test from intelmq.bots.outputs.misp.output_feed import MISPFeedOutputBot -EXAMPLE_EVENT = {"classification.type": "infected-system", - "destination.port": 9796, - "feed.accuracy": 100.0, - "destination.ip": "52.18.196.169", - "malware.name": "salityp2p", - "event_description.text": "Sinkhole attempted connection", - "time.source": "2016-04-19T23:16:08+00:00", - "source.ip": "152.166.119.2", - "feed.url": "http://alerts.bitsighttech.com:8080/stream?", - "source.geolocation.country": "Dominican Republic", - "time.observation": "2016-04-19T23:16:08+00:00", - "source.port": 65118, - "__type": "Event", - "feed.name": "BitSight", - "extra.non_ascii": "ççãããã\x80\ua000 \164 \x80\x80 abcd \165\166", - "raw": "eyJ0cm9qYW5mYW1pbHkiOiJTYWxpdHlwMnAiLCJlbnYiOnsic" - "mVtb3RlX2FkZHIiOiIxNTIuMTY2LjExOS4yIiwicmVtb3RlX3" - "BvcnQiOiI2NTExOCIsInNlcnZlcl9hZGRyIjoiNTIuMTguMTk" - "2LjE2OSIsInNlcnZlcl9wb3J0IjoiOTc5NiJ9LCJfdHMiOjE0" - "NjExMDc3NjgsIl9nZW9fZW52X3JlbW90ZV9hZGRyIjp7ImNvd" - "W50cnlfbmFtZSI6IkRvbWluaWNhbiBSZXB1YmxpYyJ9fQ==", - "__type": "Event", - } +EXAMPLE_EVENT = { + "classification.type": "infected-system", + "destination.port": 9796, + "feed.accuracy": 100.0, + "destination.ip": "52.18.196.169", + "malware.name": "salityp2p", + "event_description.text": "Sinkhole attempted connection", + "time.source": "2016-04-19T23:16:08+00:00", + "source.ip": "152.166.119.2", + "feed.url": "http://alerts.bitsighttech.com:8080/stream?", + "source.geolocation.country": "Dominican Republic", + "time.observation": "2016-04-19T23:16:08+00:00", + "source.port": 65118, + "__type": "Event", + "feed.name": "BitSight", + "extra.non_ascii": "ççãããã\x80\ua000 \164 \x80\x80 abcd \165\166", + "raw": "eyJ0cm9qYW5mYW1pbHkiOiJTYWxpdHlwMnAiLCJlbnYiOnsic" + "mVtb3RlX2FkZHIiOiIxNTIuMTY2LjExOS4yIiwicmVtb3RlX3" + "BvcnQiOiI2NTExOCIsInNlcnZlcl9hZGRyIjoiNTIuMTguMTk" + "2LjE2OSIsInNlcnZlcl9wb3J0IjoiOTc5NiJ9LCJfdHMiOjE0" + "NjExMDc3NjgsIl9nZW9fZW52X3JlbW90ZV9hZGRyIjp7ImNvd" + "W50cnlfbmFtZSI6IkRvbWluaWNhbiBSZXB1YmxpYyJ9fQ==", + "__type": "Event", +} @test.skip_exotic() @@ -43,11 +44,16 @@ def set_bot(cls): cls.use_cache = True cls.bot_reference = MISPFeedOutputBot cls.default_input_message = EXAMPLE_EVENT - cls.directory = TemporaryDirectory() - cls.sysconfig = {"misp_org_name": 'IntelMQTestOrg', - "misp_org_uuid": "b89da4c2-0f74-11ea-96a1-6fa873a0eb4d", - "output_dir": cls.directory.name, - "interval_event": '1 hour'} + cls.sysconfig = { + "misp_org_name": "IntelMQTestOrg", + "misp_org_uuid": "b89da4c2-0f74-11ea-96a1-6fa873a0eb4d", + "interval_event": "1 hour", + } + + def setUp(self) -> None: + super().setUp() + self.directory = TemporaryDirectory() + self.sysconfig["output_dir"] = self.directory.name def test_event(self): self.run_bot() @@ -59,7 +65,7 @@ def test_event(self): def test_accumulating_events(self): self.input_message = [EXAMPLE_EVENT, EXAMPLE_EVENT] - self.run_bot(iterations=2, parameters={"delay_save_event_count": 3}) + self.run_bot(iterations=2, parameters={"bulk_save_count": 3}) current_event = open(f"{self.directory.name}/.current").read() @@ -69,7 +75,7 @@ def test_accumulating_events(self): assert len(objects) == 0 self.input_message = [EXAMPLE_EVENT] - self.run_bot(parameters={"delay_save_event_count": 3}) + self.run_bot(parameters={"bulk_save_count": 3}) # When enough events were collected, save them with open(current_event) as f: @@ -77,7 +83,7 @@ def test_accumulating_events(self): assert len(objects) == 3 self.input_message = [EXAMPLE_EVENT, EXAMPLE_EVENT, EXAMPLE_EVENT] - self.run_bot(iterations=3, parameters={"delay_save_event_count": 3}) + self.run_bot(iterations=3, parameters={"bulk_save_count": 3}) # We continue saving to the same file until interval timeout with open(current_event) as f: @@ -87,22 +93,18 @@ def test_accumulating_events(self): # Simulating leftovers in the queue when it's time to generate new event Path(f"{self.directory.name}/.current").unlink() self.bot.cache_put(EXAMPLE_EVENT) - self.run_bot(parameters={"delay_save_event_count": 3}) + self.run_bot(parameters={"bulk_save_count": 3}) new_event = open(f"{self.directory.name}/.current").read() with open(new_event) as f: objects = json.load(f)["Event"]["Object"] assert len(objects) == 1 - def tearDown(self): self.cache.delete(self.bot_id) + self.directory.cleanup() super().tearDown() - @classmethod - def tearDownClass(cls): - cls.directory.cleanup() - if __name__ == "__main__": # pragma: no cover unittest.main() From b38bbf7312cff92034f9a125262b7779f00f9a73 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Wed, 3 Jul 2024 15:51:30 +0200 Subject: [PATCH 03/16] Fix spelling --- docs/user/bots.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/bots.md b/docs/user/bots.md index 3e5edba136..49f43dc792 100644 --- a/docs/user/bots.md +++ b/docs/user/bots.md @@ -4611,7 +4611,7 @@ hour", string. **`bulk_save_count`** -(optional, int) If set to a non-0 value, the bot won't refresh the MISP feed immeadiately, but will cache +(optional, int) If set to a non-0 value, the bot won't refresh the MISP feed immediately, but will cache incoming messages until the given number of them. Use it if your bot proceeds a high number of messages and constant saving to the disk is a problem. Reloading or restarting bot as well as generating a new MISP event based on `interval_event` triggers saving regardless of the cache size. From 659b941951e4dea7fdcba04e5944e2504a61baf1 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Thu, 4 Jul 2024 13:32:59 +0200 Subject: [PATCH 04/16] ENH: Add attribute mapping The bot can now construct an event much more alligned to custom needs, allowing setting comments and selecting just a subset of fields to export --- CHANGELOG.md | 1 + docs/user/bots.md | 25 ++++++ intelmq/bots/outputs/misp/output_feed.py | 78 ++++++++++++++++--- .../bots/outputs/misp/test_output_feed.py | 47 ++++++++++- 4 files changed, 138 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c9a7df569..2ccc2486a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,6 +43,7 @@ - `intelmq.bots.outputs.misp.output_feed`: - Handle failures if saved current event wasn't saved or is incorrect (PR by Kamil Mankowski). - Allow saving messages in bulks instead of refreshing the feed immediately (PR#2505 by Kamil Mankowski). + - Add `attribute_mapping` parameter to allow selecting a subset of event attributes as well as additional attribute parameters (PR by Kamil Mankowski). - `intelmq.bots.outputs.smtp_batch.output`: Documentation on multiple recipients added (PR#2501 by Edvard Rejthar). ### Documentation diff --git a/docs/user/bots.md b/docs/user/bots.md index 49f43dc792..76c48790ad 100644 --- a/docs/user/bots.md +++ b/docs/user/bots.md @@ -4616,6 +4616,31 @@ incoming messages until the given number of them. Use it if your bot proceeds a and constant saving to the disk is a problem. Reloading or restarting bot as well as generating a new MISP event based on `interval_event` triggers saving regardless of the cache size. +**`attribute_mapping`** + +(optional, dict) If set, allows selecting which IntelMQ event fields are mapped to MISP attributes +as well as attribute parameters (like e.g. a comment). The expected format is a *dictonary of dictionaries*: +first-level key represents an IntelMQ field that will be directly translated to a MISP attribute; nested +dictionary represents addditional parameters PyMISP can take when creating an attribute. They can use +names of other IntelMQ fields (then the value of such field will be used), or static values. If not needed, +leave empty dict. + +For example: + +```yaml +attribute_mapping: + source.ip: + feed.name: + comment: event_description.text + destination.ip: + to_ids: False +``` + +would create a MISP object with three attributes `source.ip`, `feed.name` and `destination.ip` +and set their values as in the IntelMQ event. In addition, the `feed.name` would have a comment +as given in the `event_description.text` from IntelMQ event, and `destination.ip` would be set +as not usable for IDS. + **Usage in MISP** Configure the destination directory of this feed as feed in MISP, either as local location, or served via a web server. diff --git a/intelmq/bots/outputs/misp/output_feed.py b/intelmq/bots/outputs/misp/output_feed.py index 53c6556795..878858ceac 100644 --- a/intelmq/bots/outputs/misp/output_feed.py +++ b/intelmq/bots/outputs/misp/output_feed.py @@ -9,8 +9,11 @@ from pathlib import Path from uuid import uuid4 +import pymisp + from intelmq.lib.bot import OutputBot from intelmq.lib.exceptions import MissingDependencyError +from ....lib.message import Message, MessageFactory from intelmq.lib.mixins import CacheMixin from intelmq.lib.utils import parse_relative @@ -30,8 +33,11 @@ class MISPFeedOutputBot(OutputBot, CacheMixin): bulk_save_count: int = None misp_org_name = None misp_org_uuid = None - output_dir: str = "/opt/intelmq/var/lib/bots/mispfeed-output" # TODO: should be path + output_dir: str = ( + "/opt/intelmq/var/lib/bots/mispfeed-output" # TODO: should be path + ) _is_multithreadable: bool = False + attribute_mapping: dict = None @staticmethod def check_output_dir(dirname): @@ -56,11 +62,13 @@ def init(self): if self.interval_event is None: self.timedelta = datetime.timedelta(hours=1) else: - self.timedelta = datetime.timedelta(minutes=parse_relative(self.interval_event)) + self.timedelta = datetime.timedelta( + minutes=parse_relative(self.interval_event) + ) - if (self.output_dir / '.current').exists(): + if (self.output_dir / ".current").exists(): try: - with (self.output_dir / '.current').open() as f: + with (self.output_dir / ".current").open() as f: self.current_file = Path(f.read()) if self.current_file.exists(): @@ -127,12 +135,49 @@ def process(self): def _add_message_to_feed(self, message: dict): obj = self.current_event.add_object(name="intelmq_event") + if not self.attribute_mapping: + self._default_mapping(obj, message) + else: + self._custom_mapping(obj, message) + + def _default_mapping(self, obj: pymisp.MISPObject, message: dict): for object_relation, value in message.items(): try: obj.add_attribute(object_relation, value=value) except NewAttributeError: # This entry isn't listed in the harmonization file, ignoring. - pass + self.logger.warning( + "Object relation %s not exists in MISP definition, ignoring", + object_relation, + ) + + def _extract_misp_attribute_kwargs(self, message: dict, definition: dict) -> dict: + # For caching and default mapping, the serialized version is the right format to work on. + # However, for any custom mapping the Message object is more sufficient as it handles + # subfields. + message = MessageFactory.from_dict( + message, harmonization=self.harmonization, default_type="Event" + ) + result = {} + for parameter, value in definition.items(): + # Check if the value is a harmonization key or a static value + if isinstance(value, str) and ( + value in self.harmonization["event"] + or value.split(".", 1)[0] in self.harmonization["event"] + ): + result[parameter] = message.get(value) + else: + result[parameter] = value + return result + + def _custom_mapping(self, obj: pymisp.MISPObject, message: dict): + for object_relation, definition in self.attribute_mapping.items(): + obj.add_attribute( + object_relation, + value=message[object_relation], + **self._extract_misp_attribute_kwargs(message, definition), + ) + # In case of manual mapping, we want to fail if it produces incorrect values def _generate_feed(self, message: dict = None): if message: @@ -151,18 +196,27 @@ def _generate_feed(self, message: dict = None): @staticmethod def check(parameters): - if 'output_dir' not in parameters: + if "output_dir" not in parameters: return [["error", "Parameter 'output_dir' not given."]] try: - created = MISPFeedOutputBot.check_output_dir(parameters['output_dir']) + created = MISPFeedOutputBot.check_output_dir(parameters["output_dir"]) except OSError: - return [["error", - "Directory %r of parameter 'output_dir' does not exist and could not be created." % parameters['output_dir']]] + return [ + [ + "error", + "Directory %r of parameter 'output_dir' does not exist and could not be created." + % parameters["output_dir"], + ] + ] else: if created: - return [["info", - "Directory %r of parameter 'output_dir' did not exist, but has now been created." - "" % parameters['output_dir']]] + return [ + [ + "info", + "Directory %r of parameter 'output_dir' did not exist, but has now been created." + "" % parameters["output_dir"], + ] + ] BOT = MISPFeedOutputBot diff --git a/intelmq/tests/bots/outputs/misp/test_output_feed.py b/intelmq/tests/bots/outputs/misp/test_output_feed.py index 631b7b7bd4..abb4b9c368 100644 --- a/intelmq/tests/bots/outputs/misp/test_output_feed.py +++ b/intelmq/tests/bots/outputs/misp/test_output_feed.py @@ -8,6 +8,7 @@ from pathlib import Path from tempfile import TemporaryDirectory +from .....lib.message import Message, MessageFactory import intelmq.lib.test as test from intelmq.bots.outputs.misp.output_feed import MISPFeedOutputBot @@ -92,7 +93,7 @@ def test_accumulating_events(self): # Simulating leftovers in the queue when it's time to generate new event Path(f"{self.directory.name}/.current").unlink() - self.bot.cache_put(EXAMPLE_EVENT) + self.bot.cache_put(MessageFactory.from_dict(EXAMPLE_EVENT).to_dict(jsondict_as_string=True)) self.run_bot(parameters={"bulk_save_count": 3}) new_event = open(f"{self.directory.name}/.current").read() @@ -100,6 +101,50 @@ def test_accumulating_events(self): objects = json.load(f)["Event"]["Object"] assert len(objects) == 1 + def test_attribute_mapping(self): + self.run_bot( + parameters={ + "attribute_mapping": { + "source.ip": {}, + "feed.name": {"comment": "event_description.text"}, + "destination.ip": {"to_ids": False}, + "malware.name": {"comment": "extra.non_ascii"} + } + } + ) + + current_event = open(f"{self.directory.name}/.current").read() + with open(current_event) as f: + objects = json.load(f).get("Event", {}).get("Object", []) + + assert len(objects) == 1 + attributes = objects[0].get("Attribute") + assert len(attributes) == 4 + source_ip = next( + attr for attr in attributes if attr.get("object_relation") == "source.ip" + ) + assert source_ip["value"] == "152.166.119.2" + assert source_ip["comment"] == "" + + feed_name = next( + attr for attr in attributes if attr.get("object_relation") == "feed.name" + ) + assert feed_name["value"] == EXAMPLE_EVENT["feed.name"] + assert feed_name["comment"] == EXAMPLE_EVENT["event_description.text"] + + destination_ip = next( + attr for attr in attributes if attr.get("object_relation") == "destination.ip" + ) + assert destination_ip["value"] == EXAMPLE_EVENT["destination.ip"] + assert destination_ip["to_ids"] is False + + malware_name = next( + attr for attr in attributes if attr.get("object_relation") == "malware.name" + ) + assert malware_name["value"] == EXAMPLE_EVENT["malware.name"] + assert malware_name["comment"] == EXAMPLE_EVENT["extra.non_ascii"] + + def tearDown(self): self.cache.delete(self.bot_id) self.directory.cleanup() From 62a87e32a2365cc356217b0ae28665ad9ae9c4d7 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Thu, 4 Jul 2024 16:32:30 +0200 Subject: [PATCH 05/16] ENG: Add support for creating separated MISP Events With `event_separator` parameter, user can decide to create more than one MISP event in the output bot and group incomming messages based on given field. In additon, the message library was fixed not to modify the parameter directly. --- CHANGELOG.md | 5 +- docs/user/bots.md | 10 +- intelmq/bots/outputs/misp/output_feed.py | 148 +++++++++++------- intelmq/lib/message.py | 13 +- .../bots/outputs/misp/test_output_feed.py | 89 +++++++++-- 5 files changed, 191 insertions(+), 74 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ccc2486a4..0159c17563 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,8 +42,9 @@ #### Outputs - `intelmq.bots.outputs.misp.output_feed`: - Handle failures if saved current event wasn't saved or is incorrect (PR by Kamil Mankowski). - - Allow saving messages in bulks instead of refreshing the feed immediately (PR#2505 by Kamil Mankowski). - - Add `attribute_mapping` parameter to allow selecting a subset of event attributes as well as additional attribute parameters (PR by Kamil Mankowski). + - Allow saving messages in bulks instead of refreshing the feed immediately (PR#2509 by Kamil Mankowski). + - Add `attribute_mapping` parameter to allow selecting a subset of event attributes as well as additional attribute parameters (PR#2509 by Kamil Mankowski). + - Add `event_separator` parameter to allow keeping IntelMQ events in separated MISP Events based on a given field (PR#2509 by Kamil Mankowski). - `intelmq.bots.outputs.smtp_batch.output`: Documentation on multiple recipients added (PR#2501 by Edvard Rejthar). ### Documentation diff --git a/docs/user/bots.md b/docs/user/bots.md index 76c48790ad..0ec9ed6957 100644 --- a/docs/user/bots.md +++ b/docs/user/bots.md @@ -4619,9 +4619,9 @@ a new MISP event based on `interval_event` triggers saving regardless of the cac **`attribute_mapping`** (optional, dict) If set, allows selecting which IntelMQ event fields are mapped to MISP attributes -as well as attribute parameters (like e.g. a comment). The expected format is a *dictonary of dictionaries*: +as well as attribute parameters (like e.g. a comment). The expected format is a *dictionary of dictionaries*: first-level key represents an IntelMQ field that will be directly translated to a MISP attribute; nested -dictionary represents addditional parameters PyMISP can take when creating an attribute. They can use +dictionary represents additional parameters PyMISP can take when creating an attribute. They can use names of other IntelMQ fields (then the value of such field will be used), or static values. If not needed, leave empty dict. @@ -4641,6 +4641,12 @@ and set their values as in the IntelMQ event. In addition, the `feed.name` would as given in the `event_description.text` from IntelMQ event, and `destination.ip` would be set as not usable for IDS. +**`event_separator` + +(optional, string): If set to a field name from IntelMQ event, the bot will group incoming messages +in separated MISP events, based on the value of this field. The `interval_event` parameter acts +for all grouping events together. + **Usage in MISP** Configure the destination directory of this feed as feed in MISP, either as local location, or served via a web server. diff --git a/intelmq/bots/outputs/misp/output_feed.py b/intelmq/bots/outputs/misp/output_feed.py index 878858ceac..a0ef882399 100644 --- a/intelmq/bots/outputs/misp/output_feed.py +++ b/intelmq/bots/outputs/misp/output_feed.py @@ -9,22 +9,22 @@ from pathlib import Path from uuid import uuid4 -import pymisp - from intelmq.lib.bot import OutputBot from intelmq.lib.exceptions import MissingDependencyError -from ....lib.message import Message, MessageFactory +from ....lib.message import MessageFactory from intelmq.lib.mixins import CacheMixin from intelmq.lib.utils import parse_relative try: - from pymisp import MISPEvent, MISPOrganisation, NewAttributeError + from pymisp import MISPEvent, MISPObject, MISPOrganisation, NewAttributeError from pymisp.tools import feed_meta_generator except ImportError: # catching SyntaxError because of https://github.com/MISP/PyMISP/issues/501 MISPEvent = None import_fail_reason = "import" +DEFAULT_KEY = "default" + class MISPFeedOutputBot(OutputBot, CacheMixin): """Generate an output in the MISP Feed format""" @@ -38,6 +38,7 @@ class MISPFeedOutputBot(OutputBot, CacheMixin): ) _is_multithreadable: bool = False attribute_mapping: dict = None + event_separator: str = None @staticmethod def check_output_dir(dirname): @@ -50,7 +51,8 @@ def init(self): if MISPEvent is None: raise MissingDependencyError("pymisp", version=">=2.4.117.3") - self.current_event = None + self.current_events = {} + self.current_files = {} self.misp_org = MISPOrganisation() self.misp_org.name = self.misp_org_name @@ -66,58 +68,57 @@ def init(self): minutes=parse_relative(self.interval_event) ) + self.min_time_current = datetime.datetime.max + self.max_time_current = datetime.datetime.min + if (self.output_dir / ".current").exists(): try: with (self.output_dir / ".current").open() as f: - self.current_file = Path(f.read()) - - if self.current_file.exists(): - self.current_event = MISPEvent() - self.current_event.load_file(self.current_file) - - last_min_time, last_max_time = re.findall( - "IntelMQ event (.*) - (.*)", self.current_event.info - )[0] - last_min_time = datetime.datetime.strptime( - last_min_time, "%Y-%m-%dT%H:%M:%S.%f" - ) - last_max_time = datetime.datetime.strptime( - last_max_time, "%Y-%m-%dT%H:%M:%S.%f" - ) - if last_max_time < datetime.datetime.now(): - self.min_time_current = datetime.datetime.now() - self.max_time_current = self.min_time_current + self.timedelta - self.current_event = None - else: - self.min_time_current = last_min_time - self.max_time_current = last_max_time - except: + current = f.read() + + if not self.event_separator: + self.current_files[DEFAULT_KEY] = Path(current) + else: + self.current_files = { + k: Path(v) for k, v in json.loads(current).items() + } + + for key, path in self.current_files.items(): + self._load_event(path, key) + except Exception: self.logger.exception( - "Loading current event %s failed. Skipping it.", self.current_event + "Loading current events %s failed. Skipping it.", self.current_files ) - self.current_event = None - else: + self.current_events = {} + + if not self.current_files or self.max_time_current < datetime.datetime.now(): self.min_time_current = datetime.datetime.now() self.max_time_current = self.min_time_current + self.timedelta + self.current_events = {} + + def _load_event(self, file_path: Path, key: str): + if file_path.exists(): + self.current_events[key] = MISPEvent() + self.current_events[key].load_file(file_path) + + last_min_time, last_max_time = re.findall( + "IntelMQ event (.*) - (.*)", self.current_events[key].info + )[0] + last_min_time = datetime.datetime.strptime( + last_min_time, "%Y-%m-%dT%H:%M:%S.%f" + ) + last_max_time = datetime.datetime.strptime( + last_max_time, "%Y-%m-%dT%H:%M:%S.%f" + ) + + self.min_time_current = min(last_min_time, self.min_time_current) + self.max_time_current = max(last_max_time, self.max_time_current) def process(self): - if not self.current_event or datetime.datetime.now() > self.max_time_current: + if datetime.datetime.now() > self.max_time_current: self.min_time_current = datetime.datetime.now() self.max_time_current = self.min_time_current + self.timedelta - self.current_event = MISPEvent() - self.current_event.info = "IntelMQ event {begin} - {end}" "".format( - begin=self.min_time_current.isoformat(), - end=self.max_time_current.isoformat(), - ) - self.current_event.set_date(datetime.date.today()) - self.current_event.Orgc = self.misp_org - self.current_event.uuid = str(uuid4()) - self.current_file = self.output_dir / f"{self.current_event.uuid}.json" - with (self.output_dir / ".current").open("w") as f: - f.write(str(self.current_file)) - - # On startup or when timeout occurs, clean the queue to ensure we do not - # keep events forever because there was not enough generated + self._generate_feed() event = self.receive_message().to_dict(jsondict_as_string=True) @@ -128,19 +129,57 @@ def process(self): if cache_size is None: self._generate_feed(event) + elif not self.current_events: + # Always create the first event so we can keep track of the interval. + # It also ensures cleaning the queue after startup in case of awaiting + # messages from the previous run + self._generate_feed() elif cache_size >= self.bulk_save_count: self._generate_feed() self.acknowledge_message() + def _generate_new_event(self, key): + self.current_events[key] = MISPEvent() + self.current_events[key].info = "IntelMQ event {begin} - {end}" "".format( + begin=self.min_time_current.isoformat(), + end=self.max_time_current.isoformat(), + ) + self.current_events[key].set_date(datetime.date.today()) + self.current_events[key].Orgc = self.misp_org + self.current_events[key].uuid = str(uuid4()) + self.current_files[key] = ( + self.output_dir / f"{self.current_events[key].uuid}.json" + ) + with (self.output_dir / ".current").open("w") as f: + if not self.event_separator: + f.write(str(self.current_files[key])) + else: + json.dump({k: str(v) for k, v in self.current_files.items()}, f) + return self.current_events[key] + def _add_message_to_feed(self, message: dict): - obj = self.current_event.add_object(name="intelmq_event") + if not self.event_separator: + key = DEFAULT_KEY + else: + # For proper handling of nested fields + message_obj = MessageFactory.from_dict( + message, harmonization=self.harmonization, default_type="Event" + ) + key = message_obj.get(self.event_separator) or DEFAULT_KEY + + if key in self.current_events: + event = self.current_events[key] + else: + event = self._generate_new_event(key) + + obj = event.add_object(name="intelmq_event") if not self.attribute_mapping: self._default_mapping(obj, message) else: self._custom_mapping(obj, message) - def _default_mapping(self, obj: pymisp.MISPObject, message: dict): + def _default_mapping(self, obj: "MISPObject", message: dict): for object_relation, value in message.items(): try: obj.add_attribute(object_relation, value=value) @@ -162,15 +201,15 @@ def _extract_misp_attribute_kwargs(self, message: dict, definition: dict) -> dic for parameter, value in definition.items(): # Check if the value is a harmonization key or a static value if isinstance(value, str) and ( - value in self.harmonization["event"] - or value.split(".", 1)[0] in self.harmonization["event"] + value in self.harmonization["event"] or + value.split(".", 1)[0] in self.harmonization["event"] ): result[parameter] = message.get(value) else: result[parameter] = value return result - def _custom_mapping(self, obj: pymisp.MISPObject, message: dict): + def _custom_mapping(self, obj: "MISPObject", message: dict): for object_relation, definition in self.attribute_mapping.items(): obj.add_attribute( object_relation, @@ -188,9 +227,10 @@ def _generate_feed(self, message: dict = None): self._add_message_to_feed(message) message = self.cache_pop() - feed_output = self.current_event.to_feed(with_meta=False) - with self.current_file.open("w") as f: - json.dump(feed_output, f) + for key, event in self.current_events.items(): + feed_output = event.to_feed(with_meta=False) + with self.current_files[key].open("w") as f: + json.dump(feed_output, f) feed_meta_generator(self.output_dir) diff --git a/intelmq/lib/message.py b/intelmq/lib/message.py index e99e227313..84ee60a528 100644 --- a/intelmq/lib/message.py +++ b/intelmq/lib/message.py @@ -48,17 +48,18 @@ def from_dict(message: dict, harmonization=None, MessageFactory.unserialize MessageFactory.serialize """ - if default_type and "__type" not in message: - message["__type"] = default_type + # don't modify the parameter + message_copy = message.copy() + + if default_type and "__type" not in message_copy: + message_copy["__type"] = default_type try: - class_reference = getattr(intelmq.lib.message, message["__type"]) + class_reference = getattr(intelmq.lib.message, message_copy["__type"]) except AttributeError: raise exceptions.InvalidArgument('__type', - got=message["__type"], + got=message_copy["__type"], expected=VALID_MESSSAGE_TYPES, docs=HARMONIZATION_CONF_FILE) - # don't modify the parameter - message_copy = message.copy() del message_copy["__type"] return class_reference(message_copy, auto=True, harmonization=harmonization) diff --git a/intelmq/tests/bots/outputs/misp/test_output_feed.py b/intelmq/tests/bots/outputs/misp/test_output_feed.py index abb4b9c368..31172a81bb 100644 --- a/intelmq/tests/bots/outputs/misp/test_output_feed.py +++ b/intelmq/tests/bots/outputs/misp/test_output_feed.py @@ -70,18 +70,19 @@ def test_accumulating_events(self): current_event = open(f"{self.directory.name}/.current").read() - # First, the feed is empty - not enough events came + # The first event is always immediately dumped to the MISP feed + # But the second wait until bulk saving size is achieved with open(current_event) as f: objects = json.load(f).get("Event", {}).get("Object", []) - assert len(objects) == 0 + assert len(objects) == 1 - self.input_message = [EXAMPLE_EVENT] - self.run_bot(parameters={"bulk_save_count": 3}) + self.input_message = [EXAMPLE_EVENT, EXAMPLE_EVENT] + self.run_bot(iterations=2, parameters={"bulk_save_count": 3}) # When enough events were collected, save them with open(current_event) as f: objects = json.load(f)["Event"]["Object"] - assert len(objects) == 3 + assert len(objects) == 4 self.input_message = [EXAMPLE_EVENT, EXAMPLE_EVENT, EXAMPLE_EVENT] self.run_bot(iterations=3, parameters={"bulk_save_count": 3}) @@ -89,17 +90,19 @@ def test_accumulating_events(self): # We continue saving to the same file until interval timeout with open(current_event) as f: objects = json.load(f)["Event"]["Object"] - assert len(objects) == 6 + assert len(objects) == 7 # Simulating leftovers in the queue when it's time to generate new event Path(f"{self.directory.name}/.current").unlink() - self.bot.cache_put(MessageFactory.from_dict(EXAMPLE_EVENT).to_dict(jsondict_as_string=True)) + self.bot.cache_put( + MessageFactory.from_dict(EXAMPLE_EVENT).to_dict(jsondict_as_string=True) + ) self.run_bot(parameters={"bulk_save_count": 3}) new_event = open(f"{self.directory.name}/.current").read() with open(new_event) as f: objects = json.load(f)["Event"]["Object"] - assert len(objects) == 1 + assert len(objects) == 2 def test_attribute_mapping(self): self.run_bot( @@ -108,7 +111,7 @@ def test_attribute_mapping(self): "source.ip": {}, "feed.name": {"comment": "event_description.text"}, "destination.ip": {"to_ids": False}, - "malware.name": {"comment": "extra.non_ascii"} + "malware.name": {"comment": "extra.non_ascii"}, } } ) @@ -133,7 +136,9 @@ def test_attribute_mapping(self): assert feed_name["comment"] == EXAMPLE_EVENT["event_description.text"] destination_ip = next( - attr for attr in attributes if attr.get("object_relation") == "destination.ip" + attr + for attr in attributes + if attr.get("object_relation") == "destination.ip" ) assert destination_ip["value"] == EXAMPLE_EVENT["destination.ip"] assert destination_ip["to_ids"] is False @@ -144,6 +149,70 @@ def test_attribute_mapping(self): assert malware_name["value"] == EXAMPLE_EVENT["malware.name"] assert malware_name["comment"] == EXAMPLE_EVENT["extra.non_ascii"] + def test_event_separation(self): + self.input_message = [ + EXAMPLE_EVENT, + {**EXAMPLE_EVENT, "malware.name": "another_malware"}, + EXAMPLE_EVENT, + ] + self.run_bot(iterations=3, parameters={"event_separator": "malware.name"}) + + current_events = json.loads(open(f"{self.directory.name}/.current").read()) + assert len(current_events) == 2 + + with open(current_events["salityp2p"]) as f: + objects = json.load(f).get("Event", {}).get("Object", []) + assert len(objects) == 2 + malware_name = next( + attr["value"] + for attr in objects[0]["Attribute"] + if attr.get("object_relation") == "malware.name" + ) + assert malware_name == "salityp2p" + + with open(current_events["another_malware"]) as f: + objects = json.load(f).get("Event", {}).get("Object", []) + assert len(objects) == 1 + malware_name = next( + attr["value"] + for attr in objects[0]["Attribute"] + if attr.get("object_relation") == "malware.name" + ) + assert malware_name == "another_malware" + + def test_event_separation_with_extra_and_bulk_save(self): + self.input_message = [ + {**EXAMPLE_EVENT, "extra.some_key": "another_malware"}, + {**EXAMPLE_EVENT, "extra.some_key": "first_malware"}, + {**EXAMPLE_EVENT, "extra.some_key": "another_malware"}, + ] + self.run_bot( + iterations=3, + parameters={"event_separator": "extra.some_key", "bulk_save_count": 3}, + ) + + # Only the initial event is saved, the rest is cached + current_events = json.loads(open(f"{self.directory.name}/.current").read()) + assert len(current_events) == 1 + with open(current_events["another_malware"]) as f: + objects = json.load(f).get("Event", {}).get("Object", []) + assert len(objects) == 1 + + self.input_message = {**EXAMPLE_EVENT, "extra.some_key": "first_malware"} + self.run_bot( + parameters={"event_separator": "extra.some_key", "bulk_save_count": 3}, + ) + + # Now everything is saved + current_events = json.loads(open(f"{self.directory.name}/.current").read()) + assert len(current_events) == 2 + with open(current_events["another_malware"]) as f: + objects = json.load(f).get("Event", {}).get("Object", []) + assert len(objects) == 2 + + with open(current_events["first_malware"]) as f: + objects = json.load(f).get("Event", {}).get("Object", []) + assert len(objects) == 2 def tearDown(self): self.cache.delete(self.bot_id) From 2b8b61714e1a43b19055e584b823e1dfc7739541 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Mon, 8 Jul 2024 13:31:48 +0200 Subject: [PATCH 06/16] Revert early copying message A lot of tests depend on that, so it looks currently risky to change. --- intelmq/bots/outputs/misp/output_feed.py | 9 +++++---- intelmq/lib/message.py | 12 ++++++------ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/intelmq/bots/outputs/misp/output_feed.py b/intelmq/bots/outputs/misp/output_feed.py index a0ef882399..9005103530 100644 --- a/intelmq/bots/outputs/misp/output_feed.py +++ b/intelmq/bots/outputs/misp/output_feed.py @@ -185,10 +185,11 @@ def _default_mapping(self, obj: "MISPObject", message: dict): obj.add_attribute(object_relation, value=value) except NewAttributeError: # This entry isn't listed in the harmonization file, ignoring. - self.logger.warning( - "Object relation %s not exists in MISP definition, ignoring", - object_relation, - ) + if object_relation != "__type": + self.logger.warning( + "Object relation %s not exists in MISP definition, ignoring", + object_relation, + ) def _extract_misp_attribute_kwargs(self, message: dict, definition: dict) -> dict: # For caching and default mapping, the serialized version is the right format to work on. diff --git a/intelmq/lib/message.py b/intelmq/lib/message.py index 84ee60a528..4353dd5682 100644 --- a/intelmq/lib/message.py +++ b/intelmq/lib/message.py @@ -48,18 +48,18 @@ def from_dict(message: dict, harmonization=None, MessageFactory.unserialize MessageFactory.serialize """ - # don't modify the parameter - message_copy = message.copy() - if default_type and "__type" not in message_copy: - message_copy["__type"] = default_type + if default_type and "__type" not in message: + message["__type"] = default_type try: - class_reference = getattr(intelmq.lib.message, message_copy["__type"]) + class_reference = getattr(intelmq.lib.message, message["__type"]) except AttributeError: raise exceptions.InvalidArgument('__type', - got=message_copy["__type"], + got=message["__type"], expected=VALID_MESSSAGE_TYPES, docs=HARMONIZATION_CONF_FILE) + # don't modify the parameter + message_copy = message.copy() del message_copy["__type"] return class_reference(message_copy, auto=True, harmonization=harmonization) From 3006220caad0f70ce23eb37e0c6b84f608158671 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Mon, 8 Jul 2024 13:59:23 +0200 Subject: [PATCH 07/16] FIX: Handle not existing fields with manual mapping --- intelmq/bots/outputs/misp/output_feed.py | 13 ++++++----- .../bots/outputs/misp/test_output_feed.py | 22 +++++++++++++++++++ 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/intelmq/bots/outputs/misp/output_feed.py b/intelmq/bots/outputs/misp/output_feed.py index 9005103530..2b431521ed 100644 --- a/intelmq/bots/outputs/misp/output_feed.py +++ b/intelmq/bots/outputs/misp/output_feed.py @@ -212,12 +212,13 @@ def _extract_misp_attribute_kwargs(self, message: dict, definition: dict) -> dic def _custom_mapping(self, obj: "MISPObject", message: dict): for object_relation, definition in self.attribute_mapping.items(): - obj.add_attribute( - object_relation, - value=message[object_relation], - **self._extract_misp_attribute_kwargs(message, definition), - ) - # In case of manual mapping, we want to fail if it produces incorrect values + if object_relation in message: + obj.add_attribute( + object_relation, + value=message[object_relation], + **self._extract_misp_attribute_kwargs(message, definition), + ) + # In case of manual mapping, we want to fail if it produces incorrect values def _generate_feed(self, message: dict = None): if message: diff --git a/intelmq/tests/bots/outputs/misp/test_output_feed.py b/intelmq/tests/bots/outputs/misp/test_output_feed.py index 31172a81bb..c2b69e37b6 100644 --- a/intelmq/tests/bots/outputs/misp/test_output_feed.py +++ b/intelmq/tests/bots/outputs/misp/test_output_feed.py @@ -149,6 +149,28 @@ def test_attribute_mapping(self): assert malware_name["value"] == EXAMPLE_EVENT["malware.name"] assert malware_name["comment"] == EXAMPLE_EVENT["extra.non_ascii"] + def test_attribute_mapping_empty_field(self): + self.run_bot( + parameters={ + "attribute_mapping": { + "source.ip": {}, + "source.fqdn": {}, # not exists in the message + } + } + ) + + current_event = open(f"{self.directory.name}/.current").read() + with open(current_event) as f: + objects = json.load(f).get("Event", {}).get("Object", []) + + assert len(objects) == 1 + attributes = objects[0].get("Attribute") + assert len(attributes) == 1 + source_ip = next( + attr for attr in attributes if attr.get("object_relation") == "source.ip" + ) + assert source_ip["value"] == "152.166.119.2" + def test_event_separation(self): self.input_message = [ EXAMPLE_EVENT, From 6b2f8b7ee31fe52c542a33a73b8a711675f1b031 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Tue, 9 Jul 2024 15:43:15 +0200 Subject: [PATCH 08/16] ENH: Add option to extend default info --- CHANGELOG.md | 1 + docs/user/bots.md | 27 ++++++++++++++++--- intelmq/bots/outputs/misp/output_feed.py | 11 +++++--- .../bots/outputs/misp/test_output_feed.py | 21 +++++++++++++++ 4 files changed, 53 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0159c17563..cfd6e0203b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ - Allow saving messages in bulks instead of refreshing the feed immediately (PR#2509 by Kamil Mankowski). - Add `attribute_mapping` parameter to allow selecting a subset of event attributes as well as additional attribute parameters (PR#2509 by Kamil Mankowski). - Add `event_separator` parameter to allow keeping IntelMQ events in separated MISP Events based on a given field (PR#2509 by Kamil Mankowski). + - Add `additional_info` parameter to extend the default description of MISP Events (PR#2509 by Kamil Mankowski). - `intelmq.bots.outputs.smtp_batch.output`: Documentation on multiple recipients added (PR#2501 by Edvard Rejthar). ### Documentation diff --git a/docs/user/bots.md b/docs/user/bots.md index 0ec9ed6957..b7088c6375 100644 --- a/docs/user/bots.md +++ b/docs/user/bots.md @@ -4643,9 +4643,30 @@ as not usable for IDS. **`event_separator` -(optional, string): If set to a field name from IntelMQ event, the bot will group incoming messages -in separated MISP events, based on the value of this field. The `interval_event` parameter acts -for all grouping events together. +(optional, string): If set to a field name from IntelMQ event, the bot will work in parallel on a few +events instead of saving all incomming messages to a one. Each unique value from the field will +use its own MISP Event. This is useful if your feed provides data about multiple entities you would +like to group, for example IPs of C2 servers from different botnets. For a given value, the bot will +use the same MISP Event as long as it's allowed by the `interval_event`. + +**`additional_info` + +(optional, string): If set, the generated MISP Event will use it in the `info` field of the event, +in addition to the standard IntelMQ description with the time frame (you cannot remove it as the bot +depends of datetimes saved there). If you use `event_separator`, you may want to use `{separator}` +placeholder which will be then replaced with the value of the separator. + +For example, the following configuration can be used to create MISP Feed with IPs of C2 servers +of different botnets, having each botnet in a separated MISP Events with an appropiate description. +Each MISP Event will contain objects with the `source.ip` field only, and the events' info will look +like *C2 Servers for botnet-1. IntelMQ event 2024-07-09T14:51:10.825123 - 2024-07-10T14:51:10.825123* + +```yaml +event_separator: malware.name +additional_info: C2 Servers for {separator}. +attribute_mapping: + source.ip: +``` **Usage in MISP** diff --git a/intelmq/bots/outputs/misp/output_feed.py b/intelmq/bots/outputs/misp/output_feed.py index 2b431521ed..9f82ae8d04 100644 --- a/intelmq/bots/outputs/misp/output_feed.py +++ b/intelmq/bots/outputs/misp/output_feed.py @@ -33,12 +33,11 @@ class MISPFeedOutputBot(OutputBot, CacheMixin): bulk_save_count: int = None misp_org_name = None misp_org_uuid = None - output_dir: str = ( - "/opt/intelmq/var/lib/bots/mispfeed-output" # TODO: should be path - ) + output_dir: str = "/opt/intelmq/var/lib/bots/mispfeed-output" # TODO: should be path _is_multithreadable: bool = False attribute_mapping: dict = None event_separator: str = None + additional_info: str = None @staticmethod def check_output_dir(dirname): @@ -141,10 +140,14 @@ def process(self): def _generate_new_event(self, key): self.current_events[key] = MISPEvent() - self.current_events[key].info = "IntelMQ event {begin} - {end}" "".format( + info = "IntelMQ event {begin} - {end}" "".format( begin=self.min_time_current.isoformat(), end=self.max_time_current.isoformat(), ) + if self.additional_info: + info = f"{self.additional_info.format(separator=key)} {info}" + + self.current_events[key].info = info self.current_events[key].set_date(datetime.date.today()) self.current_events[key].Orgc = self.misp_org self.current_events[key].uuid = str(uuid4()) diff --git a/intelmq/tests/bots/outputs/misp/test_output_feed.py b/intelmq/tests/bots/outputs/misp/test_output_feed.py index c2b69e37b6..0c175177d9 100644 --- a/intelmq/tests/bots/outputs/misp/test_output_feed.py +++ b/intelmq/tests/bots/outputs/misp/test_output_feed.py @@ -64,6 +64,27 @@ def test_event(self): objects = json.load(f).get("Event", {}).get("Object", []) assert len(objects) == 1 + def test_additional_info(self): + self.run_bot(parameters={"additional_info": "This is my custom info."}) + + current_event = open(f"{self.directory.name}/.current").read() + with open(current_event) as f: + info: str = json.load(f).get("Event", {}).get("info", "") + assert info.startswith("This is my custom info. IntelMQ event ") + + def test_additional_info_with_separator(self): + self.run_bot( + parameters={ + "additional_info": "Event related to {separator}.", + "event_separator": "malware.name", + } + ) + + current_events = json.loads(open(f"{self.directory.name}/.current").read()) + with open(current_events["salityp2p"]) as f: + info: str = json.load(f).get("Event", {}).get("info", "") + assert info.startswith("Event related to salityp2p. IntelMQ event ") + def test_accumulating_events(self): self.input_message = [EXAMPLE_EVENT, EXAMPLE_EVENT] self.run_bot(iterations=2, parameters={"bulk_save_count": 3}) From 3c8698093298592da1e2e0ecba52818bf13a71d5 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Tue, 9 Jul 2024 15:46:38 +0200 Subject: [PATCH 09/16] Fix typos --- docs/user/bots.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/user/bots.md b/docs/user/bots.md index b7088c6375..dc8588ea26 100644 --- a/docs/user/bots.md +++ b/docs/user/bots.md @@ -4644,7 +4644,7 @@ as not usable for IDS. **`event_separator` (optional, string): If set to a field name from IntelMQ event, the bot will work in parallel on a few -events instead of saving all incomming messages to a one. Each unique value from the field will +events instead of saving all incoming messages to a one. Each unique value from the field will use its own MISP Event. This is useful if your feed provides data about multiple entities you would like to group, for example IPs of C2 servers from different botnets. For a given value, the bot will use the same MISP Event as long as it's allowed by the `interval_event`. @@ -4657,7 +4657,7 @@ depends of datetimes saved there). If you use `event_separator`, you may want to placeholder which will be then replaced with the value of the separator. For example, the following configuration can be used to create MISP Feed with IPs of C2 servers -of different botnets, having each botnet in a separated MISP Events with an appropiate description. +of different botnets, having each botnet in a separated MISP Events with an appropriate description. Each MISP Event will contain objects with the `source.ip` field only, and the events' info will look like *C2 Servers for botnet-1. IntelMQ event 2024-07-09T14:51:10.825123 - 2024-07-10T14:51:10.825123* From d014e93cbe04055909b71a60a11336f4257e39c1 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Wed, 10 Jul 2024 11:25:05 +0200 Subject: [PATCH 10/16] ENH: add support for tagging --- intelmq/bots/outputs/misp/output_feed.py | 29 ++++++++++++++++++- .../bots/outputs/misp/test_output_feed.py | 13 +++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/intelmq/bots/outputs/misp/output_feed.py b/intelmq/bots/outputs/misp/output_feed.py index 9f82ae8d04..98274cce86 100644 --- a/intelmq/bots/outputs/misp/output_feed.py +++ b/intelmq/bots/outputs/misp/output_feed.py @@ -16,7 +16,7 @@ from intelmq.lib.utils import parse_relative try: - from pymisp import MISPEvent, MISPObject, MISPOrganisation, NewAttributeError + from pymisp import MISPEvent, MISPObject, MISPOrganisation, MISPTag, NewAttributeError from pymisp.tools import feed_meta_generator except ImportError: # catching SyntaxError because of https://github.com/MISP/PyMISP/issues/501 @@ -38,6 +38,10 @@ class MISPFeedOutputBot(OutputBot, CacheMixin): attribute_mapping: dict = None event_separator: str = None additional_info: str = None + tagging: dict = None + # A structure like: + # __all__: list of tag kwargs for all events + # : list of tag kwargs per separator key @staticmethod def check_output_dir(dirname): @@ -95,6 +99,18 @@ def init(self): self.max_time_current = self.min_time_current + self.timedelta self.current_events = {} + self._tagging_objects = {} + if self.tagging: + for key, tag_list in self.tagging.items(): + self._tagging_objects[key] = list() + for kw in tag_list: + # For some reason, PyMISP do not uses classmethod, and from_dict requires + # unpacking. So this is really the way to initialize tag objects. + tag = MISPTag() + tag.from_dict(**kw) + self._tagging_objects[key].append(tag) + self.logger.debug("Generated tags: %r.", self._tagging_objects) + def _load_event(self, file_path: Path, key: str): if file_path.exists(): self.current_events[key] = MISPEvent() @@ -140,6 +156,14 @@ def process(self): def _generate_new_event(self, key): self.current_events[key] = MISPEvent() + + tags: list[MISPTag] = [] + if "__all__" in self._tagging_objects: + tags.extend(self._tagging_objects["__all__"]) + if key in self._tagging_objects: + tags.extend(self._tagging_objects[key]) + self.current_events[key].tags = tags + info = "IntelMQ event {begin} - {end}" "".format( begin=self.min_time_current.isoformat(), end=self.max_time_current.isoformat(), @@ -195,6 +219,9 @@ def _default_mapping(self, obj: "MISPObject", message: dict): ) def _extract_misp_attribute_kwargs(self, message: dict, definition: dict) -> dict: + """ + Creates a + """ # For caching and default mapping, the serialized version is the right format to work on. # However, for any custom mapping the Message object is more sufficient as it handles # subfields. diff --git a/intelmq/tests/bots/outputs/misp/test_output_feed.py b/intelmq/tests/bots/outputs/misp/test_output_feed.py index 0c175177d9..d3b04442cf 100644 --- a/intelmq/tests/bots/outputs/misp/test_output_feed.py +++ b/intelmq/tests/bots/outputs/misp/test_output_feed.py @@ -257,6 +257,19 @@ def test_event_separation_with_extra_and_bulk_save(self): objects = json.load(f).get("Event", {}).get("Object", []) assert len(objects) == 2 + def test_tagging(self): + self.run_bot( + parameters={ + "tagging": {"__all__": [{"name": "tlp:unclear", "colour": "#7e7eae"}]} + } + ) + + current_event = open(f"{self.directory.name}/.current").read() + with open(current_event) as f: + objects = json.load(f).get("Event", {}).get("Object", []) + assert len(objects) == 1 + + def tearDown(self): self.cache.delete(self.bot_id) self.directory.cleanup() From b1ac8e2ebf8dc317159082584e55bf87e6de775c Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Wed, 10 Jul 2024 15:08:45 +0200 Subject: [PATCH 11/16] Fix generating on restart --- intelmq/bots/outputs/misp/output_feed.py | 4 ++++ intelmq/lib/mixins/cache.py | 21 +++++++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/intelmq/bots/outputs/misp/output_feed.py b/intelmq/bots/outputs/misp/output_feed.py index 98274cce86..d213e5a5b1 100644 --- a/intelmq/bots/outputs/misp/output_feed.py +++ b/intelmq/bots/outputs/misp/output_feed.py @@ -111,6 +111,10 @@ def init(self): self._tagging_objects[key].append(tag) self.logger.debug("Generated tags: %r.", self._tagging_objects) + if self.current_events and self.cache_length(): + # Ensure we do generate feed on reload / restart + self._generate_feed() + def _load_event(self, file_path: Path, key: str): if file_path.exists(): self.current_events[key] = MISPEvent() diff --git a/intelmq/lib/mixins/cache.py b/intelmq/lib/mixins/cache.py index ee945fbb53..5919d67535 100644 --- a/intelmq/lib/mixins/cache.py +++ b/intelmq/lib/mixins/cache.py @@ -15,15 +15,17 @@ class CacheMixin: """Provides caching possibilities for bots - For key-value cache, use methods: - cache_exists - cache_get - cache_set - - To store dict elements in a cache queue named after bot id, use methods: - cache_put - cache_pop + For key-value cache, use methods: + cache_exists + cache_get + cache_set + + To store dict elements in a cache queue named after bot id, use methods: + cache_put + cache_pop + cache_length """ + __redis: redis.Redis = None redis_cache_host: str = "127.0.0.1" redis_cache_port: int = 6379 @@ -70,6 +72,9 @@ def cache_put(self, value: dict) -> int: size = self.__redis.lpush(self.bot_id, json.dumps(value)) return size + def cache_length(self) -> int: + return self.__redis.llen(self.bot_id) + def cache_pop(self) -> dict: data = self.__redis.rpop(self.bot_id) if data is None: From 85ddfb3a11ff2ccf12b13a63c213ad17b43542d0 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Tue, 16 Jul 2024 13:56:03 +0200 Subject: [PATCH 12/16] ENH: Add tagging, check, and improved docs --- CHANGELOG.md | 1 + docs/user/bots.md | 46 ++- intelmq/bots/outputs/misp/output_feed.py | 288 ++++++++++++++---- .../bots/outputs/misp/test_output_feed.py | 116 ++++++- 4 files changed, 384 insertions(+), 67 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cfd6e0203b..0e4379d7d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ - Allow saving messages in bulks instead of refreshing the feed immediately (PR#2509 by Kamil Mankowski). - Add `attribute_mapping` parameter to allow selecting a subset of event attributes as well as additional attribute parameters (PR#2509 by Kamil Mankowski). - Add `event_separator` parameter to allow keeping IntelMQ events in separated MISP Events based on a given field (PR#2509 by Kamil Mankowski). + - Add `tagging` parameter to allow adding tags to MISP events (PR#2509 by Kamil Mankowski). - Add `additional_info` parameter to extend the default description of MISP Events (PR#2509 by Kamil Mankowski). - `intelmq.bots.outputs.smtp_batch.output`: Documentation on multiple recipients added (PR#2501 by Edvard Rejthar). diff --git a/docs/user/bots.md b/docs/user/bots.md index dc8588ea26..324b9608f1 100644 --- a/docs/user/bots.md +++ b/docs/user/bots.md @@ -4585,6 +4585,12 @@ Create a directory layout in the MISP Feed format. The PyMISP library >= 2.4.119.1 is required, see [REQUIREMENTS.txt](https://github.com/certtools/intelmq/blob/master/intelmq/bots/outputs/misp/REQUIREMENTS.txt). +Note: please test the produced feed before using in production. This bot allows you to do an +extensive customisation of the MISP feed, including creating multiple events and tags, but it can +be tricky to configure properly. Misconfiguration can prevent bot from starting or have bad +consequences for your MISP Instance (e.g. spaming with events). Use `intelmqctl check` command +to validate your configuration against common mistakes. + **Module:** `intelmq.bots.outputs.misp.output_feed` **Parameters:** @@ -4614,7 +4620,7 @@ hour", string. (optional, int) If set to a non-0 value, the bot won't refresh the MISP feed immediately, but will cache incoming messages until the given number of them. Use it if your bot proceeds a high number of messages and constant saving to the disk is a problem. Reloading or restarting bot as well as generating -a new MISP event based on `interval_event` triggers saving regardless of the cache size. +a new MISP event based on `interval_event` triggers regenerating MISP feed regardless of the cache size. **`attribute_mapping`** @@ -4625,6 +4631,10 @@ dictionary represents additional parameters PyMISP can take when creating an att names of other IntelMQ fields (then the value of such field will be used), or static values. If not needed, leave empty dict. +For available attribute parameters, refer to the +[PyMISP documentation](https://pymisp.readthedocs.io/en/latest/_modules/pymisp/mispevent.html#MISPObjectAttribute) +for the `MISPObjectAttribute`. + For example: ```yaml @@ -4668,6 +4678,40 @@ attribute_mapping: source.ip: ``` +**`tagging` + +(optional, dict): Allows setting MISP tags to MISP events. The structure is a *dict of list of dicts*. +The keys refers to which MISP events you want to tag. If you want to tag all of them, use `__all__`. +If you use `event_separator` and want to add additional tags to some events, use the expected values +of the separation field. The *list of dicts* defines MISP tags as parameters to create `MISPTag` +objects from. Each dictonary has to have at least `name`. For all available parameters refer to the +[PyMISP documentation](https://pymisp.readthedocs.io/en/latest/_modules/pymisp/abstract.html#MISPTag) +for `MISPTag`. + +Note: setting `name` is enough for MISP to match a correct tag from the global collection. You may +see it lacking the colour in the MISP Feed view, but it will be retriven after importing to your +instance. + +Example 1 - set two tags for every MISP event: + +```yaml +tagging: + __all__: + - name: tlp:red + - name: source:intelmq +``` + +Example 2 - create separated events based on `malware.name` and set additional family tag: + +```yaml +event_separator: malware.name +tagging: + __all__: + - name: tlp:red + njrat: + - name: njrat +``` + **Usage in MISP** Configure the destination directory of this feed as feed in MISP, either as local location, or served via a web server. diff --git a/intelmq/bots/outputs/misp/output_feed.py b/intelmq/bots/outputs/misp/output_feed.py index d213e5a5b1..9d56f097de 100644 --- a/intelmq/bots/outputs/misp/output_feed.py +++ b/intelmq/bots/outputs/misp/output_feed.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: 2019 Sebastian Wagner +# SPDX-FileCopyrightText: 2019 Sebastian Wagner, 2024 CERT.at GmbH # # SPDX-License-Identifier: AGPL-3.0-or-later @@ -9,19 +9,25 @@ from pathlib import Path from uuid import uuid4 + from intelmq.lib.bot import OutputBot from intelmq.lib.exceptions import MissingDependencyError -from ....lib.message import MessageFactory +from intelmq.lib.message import Event, Message, MessageFactory from intelmq.lib.mixins import CacheMixin from intelmq.lib.utils import parse_relative try: - from pymisp import MISPEvent, MISPObject, MISPOrganisation, MISPTag, NewAttributeError + from pymisp import ( + MISPEvent, + MISPObject, + MISPOrganisation, + MISPTag, + MISPObjectAttribute, + NewAttributeError, + ) from pymisp.tools import feed_meta_generator except ImportError: - # catching SyntaxError because of https://github.com/MISP/PyMISP/issues/501 MISPEvent = None - import_fail_reason = "import" DEFAULT_KEY = "default" @@ -30,18 +36,44 @@ class MISPFeedOutputBot(OutputBot, CacheMixin): """Generate an output in the MISP Feed format""" interval_event: str = "1 hour" - bulk_save_count: int = None misp_org_name = None misp_org_uuid = None - output_dir: str = "/opt/intelmq/var/lib/bots/mispfeed-output" # TODO: should be path - _is_multithreadable: bool = False - attribute_mapping: dict = None - event_separator: str = None + output_dir: str = ( + "/opt/intelmq/var/lib/bots/mispfeed-output" # TODO: should be path + ) + # Enables regenerating the MISP feed after collecting given number of messages + bulk_save_count: int = None + + # Additional information to be added to the MISP event description additional_info: str = None + + # An optional field used to create multiple MISP events from incoming messages + event_separator: str = None + + # Optional non-standard mapping of message fields to MISP object attributes + # The structure is like: + # {: {}} + # For example: + # {"source.ip": {"comment": "This is the source of the event"}} + # will include only the "source.ip" field in the MISP object attributes, + # and set the comment + attribute_mapping: dict = None + + # Optional definition to add tags to the MISP event. It should be a dict where keys are + # '__all__' (to add tags for every event) or, if the event_separator is used, the separator + # values. For each key, there should be a list of dicts defining parameters for the MISPTag + # object, but only the "name" is required to set. + # For example: + # {"__all__": [{"name": "tag1"}, {"name": "tag2"}]} + # will add two tags to every event + # {"infostealer": [{"name": "type:infostealer"}], "__all__": [{"name": "tag1"}]} + # will add two tags to every event separated by "infostealer", and + # one tag to every other event tagging: dict = None - # A structure like: - # __all__: list of tag kwargs for all events - # : list of tag kwargs per separator key + + # Delaying reloading would delay saving eventually long-awaiting messages + _sighup_delay = False + _is_multithreadable: bool = False @staticmethod def check_output_dir(dirname): @@ -109,11 +141,13 @@ def init(self): tag = MISPTag() tag.from_dict(**kw) self._tagging_objects[key].append(tag) - self.logger.debug("Generated tags: %r.", self._tagging_objects) - if self.current_events and self.cache_length(): - # Ensure we do generate feed on reload / restart - self._generate_feed() + # Ensure we do generate feed on reload / restart, so awaiting messages won't wait forever + if self.cache_length() and not getattr(self, "testing", False): + self.logger.debug( + "Found %s awaiting messages. Generating feed.", self.cache_length() + ) + self._generate_misp_feed() def _load_event(self, file_path: Path, key: str): if file_path.exists(): @@ -138,7 +172,7 @@ def process(self): self.min_time_current = datetime.datetime.now() self.max_time_current = self.min_time_current + self.timedelta - self._generate_feed() + self._generate_misp_feed() event = self.receive_message().to_dict(jsondict_as_string=True) @@ -147,18 +181,16 @@ def process(self): cache_size = self.cache_put(event) if cache_size is None: - self._generate_feed(event) + self._generate_misp_feed(event) elif not self.current_events: # Always create the first event so we can keep track of the interval. - # It also ensures cleaning the queue after startup in case of awaiting - # messages from the previous run - self._generate_feed() + self._generate_misp_feed() elif cache_size >= self.bulk_save_count: - self._generate_feed() + self._generate_misp_feed() self.acknowledge_message() - def _generate_new_event(self, key): + def _generate_new_misp_event(self, key): self.current_events[key] = MISPEvent() tags: list[MISPTag] = [] @@ -189,26 +221,29 @@ def _generate_new_event(self, key): json.dump({k: str(v) for k, v in self.current_files.items()}, f) return self.current_events[key] - def _add_message_to_feed(self, message: dict): + def _add_message_to_misp_event(self, message: dict): + # For proper handling of nested fields, we need the object + message_obj = MessageFactory.from_dict( + message, harmonization=self.harmonization, default_type="Event" + ) if not self.event_separator: key = DEFAULT_KEY else: - # For proper handling of nested fields - message_obj = MessageFactory.from_dict( - message, harmonization=self.harmonization, default_type="Event" - ) key = message_obj.get(self.event_separator) or DEFAULT_KEY if key in self.current_events: event = self.current_events[key] else: - event = self._generate_new_event(key) + event = self._generate_new_misp_event(key) obj = event.add_object(name="intelmq_event") + # For caching and default mapping, the serialized version is the right format to work on. + # However, for any custom mapping the Message object is more sufficient as it handles + # subfields. if not self.attribute_mapping: self._default_mapping(obj, message) else: - self._custom_mapping(obj, message) + self._custom_mapping(obj, message_obj) def _default_mapping(self, obj: "MISPObject", message: dict): for object_relation, value in message.items(): @@ -223,28 +258,21 @@ def _default_mapping(self, obj: "MISPObject", message: dict): ) def _extract_misp_attribute_kwargs(self, message: dict, definition: dict) -> dict: - """ - Creates a - """ - # For caching and default mapping, the serialized version is the right format to work on. - # However, for any custom mapping the Message object is more sufficient as it handles - # subfields. - message = MessageFactory.from_dict( - message, harmonization=self.harmonization, default_type="Event" - ) + """Creates the a dict with arguments to create a MISPObjectAttribute.""" result = {} for parameter, value in definition.items(): # Check if the value is a harmonization key or a static value if isinstance(value, str) and ( - value in self.harmonization["event"] or - value.split(".", 1)[0] in self.harmonization["event"] + value in self.harmonization["event"] + or value.split(".", 1)[0] in self.harmonization["event"] ): result[parameter] = message.get(value) else: result[parameter] = value return result - def _custom_mapping(self, obj: "MISPObject", message: dict): + def _custom_mapping(self, obj: "MISPObject", message: Message): + """Map the IntelMQ event to the MISP Object using the custom mapping definition.""" for object_relation, definition in self.attribute_mapping.items(): if object_relation in message: obj.add_attribute( @@ -252,15 +280,15 @@ def _custom_mapping(self, obj: "MISPObject", message: dict): value=message[object_relation], **self._extract_misp_attribute_kwargs(message, definition), ) - # In case of manual mapping, we want to fail if it produces incorrect values + # In case of custom mapping, we want to fail if it produces incorrect values - def _generate_feed(self, message: dict = None): + def _generate_misp_feed(self, message: dict = None): if message: - self._add_message_to_feed(message) + self._add_message_to_misp_event(message) message = self.cache_pop() while message: - self._add_message_to_feed(message) + self._add_message_to_misp_event(message) message = self.cache_pop() for key, event in self.current_events.items(): @@ -272,27 +300,163 @@ def _generate_feed(self, message: dict = None): @staticmethod def check(parameters): + results = [] if "output_dir" not in parameters: - return [["error", "Parameter 'output_dir' not given."]] - try: - created = MISPFeedOutputBot.check_output_dir(parameters["output_dir"]) - except OSError: - return [ + results.append(["error", "Parameter 'output_dir' not given."]) + else: + try: + created = MISPFeedOutputBot.check_output_dir(parameters["output_dir"]) + except OSError: + results.append( + [ + "error", + "Directory %r of parameter 'output_dir' does not exist and could not be created." + % parameters["output_dir"], + ] + ) + else: + if created: + results.append( + [ + "info", + "Directory %r of parameter 'output_dir' did not exist, but has now been created." + "" % parameters["output_dir"], + ] + ) + + bulk_save_count = parameters.get("bulk_save_count") + if bulk_save_count and not isinstance(bulk_save_count, int): + results.append( + ["error", "Parameter 'bulk_save_count' has to be int if set."] + ) + + sanity_event = Event({}) + event_separator = parameters.get("event_separator") + if ( + event_separator + and not sanity_event._Message__is_valid_key(event_separator)[0] + ): + results.append( [ "error", - "Directory %r of parameter 'output_dir' does not exist and could not be created." - % parameters["output_dir"], + f"Value {event_separator} in 'event_separator' is not a valid event key.", ] - ] - else: - if created: - return [ + ) + + not_feed_field_warning = ( + "Parameter '{parameter}' of {context} looks like not being a field exportable to" + " MISP Feed. It may be a valid PyMISP parameter, but won't be exported to the feed." + " Please ensure it's intended and consult PyMISP documentation at https://pymisp.readthedocs.io/" + " for valid parameters for the {object}." + ) + attribute_mapping = parameters.get("attribute_mapping") + if attribute_mapping: + if not isinstance(attribute_mapping, dict): + results.append( + ["error", "Parameter 'attribute_mapping has to be a dictionary."] + ) + else: + for key, value in attribute_mapping.items(): + if not sanity_event._Message__is_valid_key(key)[0]: + results.append( + [ + "error", + f"The key '{key}' in attribute_mapping is not a valid IDF field.", + ] + ) + if not isinstance(value, dict): + results.append( + [ + "error", + f"The config attribute_mapping['{key}'] should be a " + "dict with parameters for MISPObjectAttribute.", + ] + ) + else: + for parameter in value.keys(): + if parameter not in MISPObjectAttribute._fields_for_feed: + results.append( + [ + "warning", + not_feed_field_warning.format( + parameter=parameter, + context=f"attribute_mapping['{key}']", + object="MISPObjectAttribute", + ), + ] + ) + + tagging = parameters.get("tagging") + if tagging: + tagging_error = ( + "should be a list of dictionaries with parameters for the MISPTag object." + " Please consult PyMISP documentation at https://pymisp.readthedocs.io/" + " to find valid fields." + ) + if not isinstance(tagging, dict): + results.append( [ - "info", - "Directory %r of parameter 'output_dir' did not exist, but has now been created." - "" % parameters["output_dir"], + "error", + ( + "Parameter 'tagging' has to be a dictionary with keys as '__all__' " + "or possible 'event_separator' values. Each dictionary value " + + tagging_error, + ), ] - ] + ) + else: + if not event_separator and ( + "__all__" not in tagging or len(tagging.keys()) > 1 + ): + results.append( + [ + "error", + ( + "Tagging configuration expects custom values, but the 'event_separator'" + " parameter is not set. If you want to just tag all events, use only" + " the '__all__' key." + ), + ] + ) + for key, value in tagging.items(): + if not isinstance(value, list): + results.append( + [ + "error", + f"The config tagging['{key}'] {tagging_error}", + ] + ) + else: + for tag in value: + if not isinstance(tag, dict): + results.append( + [ + "error", + f"The config tagging['{key}'] {tagging_error}", + ] + ) + else: + if "name" not in tag: + results.append( + [ + "error", + f"The config tagging['{key}'] contains a tag without 'name'.", + ] + ) + for parameter in tag.keys(): + if parameter not in MISPTag._fields_for_feed: + results.append( + [ + "warning", + not_feed_field_warning.format( + parameter=parameter, + context=f"tagging['{key}']", + object="MISPTag", + ), + ] + ) + + return results or None BOT = MISPFeedOutputBot diff --git a/intelmq/tests/bots/outputs/misp/test_output_feed.py b/intelmq/tests/bots/outputs/misp/test_output_feed.py index d3b04442cf..f27b367164 100644 --- a/intelmq/tests/bots/outputs/misp/test_output_feed.py +++ b/intelmq/tests/bots/outputs/misp/test_output_feed.py @@ -4,6 +4,7 @@ # -*- coding: utf-8 -*- import json +import select import unittest from pathlib import Path from tempfile import TemporaryDirectory @@ -86,6 +87,9 @@ def test_additional_info_with_separator(self): assert info.startswith("Event related to salityp2p. IntelMQ event ") def test_accumulating_events(self): + """Ensures bot first collects events and then saves them in bulks to MISP feed, + and also respects the event interval to create a new event periodically. + """ self.input_message = [EXAMPLE_EVENT, EXAMPLE_EVENT] self.run_bot(iterations=2, parameters={"bulk_save_count": 3}) @@ -126,6 +130,8 @@ def test_accumulating_events(self): assert len(objects) == 2 def test_attribute_mapping(self): + """Tests custom attribute mapping that selects just a subset of fields to export + and allows including custom parameters for MISPObjectAttribute, like comments.""" self.run_bot( parameters={ "attribute_mapping": { @@ -170,7 +176,7 @@ def test_attribute_mapping(self): assert malware_name["value"] == EXAMPLE_EVENT["malware.name"] assert malware_name["comment"] == EXAMPLE_EVENT["extra.non_ascii"] - def test_attribute_mapping_empty_field(self): + def test_attribute_mapping_omitted_when_field_is_empty(self): self.run_bot( parameters={ "attribute_mapping": { @@ -193,6 +199,8 @@ def test_attribute_mapping_empty_field(self): assert source_ip["value"] == "152.166.119.2" def test_event_separation(self): + """Tests that based on the value of the given field, incoming messages are put in separated + MISP events.""" self.input_message = [ EXAMPLE_EVENT, {**EXAMPLE_EVENT, "malware.name": "another_malware"}, @@ -258,17 +266,117 @@ def test_event_separation_with_extra_and_bulk_save(self): assert len(objects) == 2 def test_tagging(self): + """Ensures MISP events get correct MISP tags""" self.run_bot( parameters={ - "tagging": {"__all__": [{"name": "tlp:unclear", "colour": "#7e7eae"}]} + "tagging": { + "__all__": [ + {"name": "tlp:unclear", "colour": "#7e7eae"}, + {"name": "source:intelmq"}, + ] + } } ) current_event = open(f"{self.directory.name}/.current").read() with open(current_event) as f: - objects = json.load(f).get("Event", {}).get("Object", []) - assert len(objects) == 1 + tags = json.load(f).get("Event", {}).get("Tag", []) + assert len(tags) == 2 + + tlp = next(t for t in tags if t["name"] == "tlp:unclear") + assert tlp["colour"] == "#7e7eae" + + def test_tagging_and_event_separation(self): + """When separating events, it is possible to add different MISP tags to specific MISP + events.""" + self.input_message = [ + EXAMPLE_EVENT, + {**EXAMPLE_EVENT, "malware.name": "another_malware"}, + ] + self.run_bot( + iterations=2, + parameters={ + "event_separator": "malware.name", + "tagging": { + "__all__": [{"name": "source:intelmq"}], + "salityp2p": [{"name": "family:salityp2p"}], + "another_malware": [{"name": "family:malware_2"}], + }, + }, + ) + + current_events = json.loads(open(f"{self.directory.name}/.current").read()) + assert len(current_events) == 2 + + with open(current_events["salityp2p"]) as f: + tags = json.load(f).get("Event", {}).get("Tag", []) + assert len(tags) == 2 + assert next(t for t in tags if t["name"] == "source:intelmq") + assert next(t for t in tags if t["name"] == "family:salityp2p") + with open(current_events["another_malware"]) as f: + tags = json.load(f).get("Event", {}).get("Tag", []) + assert len(tags) == 2 + assert next(t for t in tags if t["name"] == "source:intelmq") + assert next(t for t in tags if t["name"] == "family:malware_2") + + def test_parameter_check_correct(self): + result = self.bot_reference.check( + { + **self.sysconfig, + "attribute_mapping": { + "source.ip": {}, + "feed.name": {"comment": "event_description.text"}, + "destination.ip": {"to_ids": False, "comment": "Possible FP"}, + "malware.name": {"comment": "extra.non_ascii"}, + }, + "event_separator": "extra.botnet", + "bulk_save_count": 10, + "tagging": { + "__all__": [{"name": "source:feed", "colour": "#000000"}], + "abotnet": [{"name": "type:botnet"}], + }, + } + ) + assert result is None + + def test_parameter_check_errors(self): + cases = [ + {"bulk_save_count": "not-a-number"}, + {"event_separator": "not-a-field"}, + {"attribute_mapping": "not-a-dict"}, + {"attribute_mapping": {"not-a-field": {}}}, + {"attribute_mapping": {"source.ip": "not-a-dict"}}, + {"tagging": {"not-all": []}}, # without event_separator, only __all__ is allowed + {"tagging": {"__all__": [], "other": []}}, + {"event_separator": "malware.name", "tagging": ["not", "a", "dict"]}, + { + "event_separator": "malware.name", + "tagging": {"case": "must-be-list-of-dicts"}, + }, + { + "event_separator": "malware.name", + "tagging": {"case": ["must-be-list-of-dicts"]}, + }, + { + "event_separator": "malware.name", + "tagging": {"case": [{"must": "have a name"}]}, + }, + ] + for case in cases: + with self.subTest(): + result = self.bot_reference.check({**self.sysconfig, **case}) + assert len(list(r for r in result if r[0] == "error")) == 1 + + def test_parameter_check_warnings(self): + cases = [ + {"attribute_mapping": {"source.ip": {"not-a-feed-arg": "any"}}}, + {"tagging": {"case": [{"name": "", "not-a-feed-arg": "any"}]}}, + ] + for case in cases: + with self.subTest(): + result = self.bot_reference.check({**self.sysconfig, **case}) + assert len(list(r for r in result if r[0] == "warning")) == 1 def tearDown(self): self.cache.delete(self.bot_id) From 7c3396270746995b7574d010c7c5cc3a9c662128 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Tue, 16 Jul 2024 14:25:10 +0200 Subject: [PATCH 13/16] DOC: Update documentation about CacheMixin --- CHANGELOG.md | 2 ++ docs/dev/bot-development.md | 14 +++++++++++++- intelmq/lib/mixins/cache.py | 2 +- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e4379d7d8..ebac8ecf5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ ### Core - `intelmq.lib.utils.drop_privileges`: When IntelMQ is called as `root` and dropping the privileges to user `intelmq`, also set the non-primary groups associated with the `intelmq` user. Makes the behaviour of running intelmqctl as `root` closer to the behaviour of `sudo -u intelmq ...` (PR#2507 by Mikk Margus Möll). +- `intelmq.lib.mixins.cache.CacheMixin` was extended to support temporary storing messages in a cache queue + (PR#2509 by Kamil Mankowski). ### Development diff --git a/docs/dev/bot-development.md b/docs/dev/bot-development.md index 39253c8cf8..89d6414f65 100644 --- a/docs/dev/bot-development.md +++ b/docs/dev/bot-development.md @@ -197,7 +197,7 @@ The `CacheMixin` provides methods to cache values for bots in a Redis database. - `redis_cache_ttl: int = 15` - `redis_cache_password: Optional[str] = None` -and provides the methods: +and provides the methods to cache key-value pairs: - `cache_exists` - `cache_get` @@ -205,6 +205,18 @@ and provides the methods: - `cache_flush` - `cache_get_redis_instance` +and following methods to cache objects in a queue: + +- `cache_put` +- `cache_pop` +- `cache_length`. + +Caching key-value pairs and queue caching are two separated mechanisms. The first is designed + for arbitrary values, the second one is focused on temporary storing messages (but can handle other + data). You won't see caches from one in the another. For example, if adding a key-value pair using + `cache_set`, it does not change the value from `cache_length`, and if adding an element using + `cache_put` you cannot use `check_exists` to look for it. + ### Pipeline Interactions We can call three methods related to the pipeline: diff --git a/intelmq/lib/mixins/cache.py b/intelmq/lib/mixins/cache.py index 5919d67535..01465ae3df 100644 --- a/intelmq/lib/mixins/cache.py +++ b/intelmq/lib/mixins/cache.py @@ -13,7 +13,7 @@ class CacheMixin: - """Provides caching possibilities for bots + """Provides caching possibilities for bots, see also https://docs.intelmq.org/latest/dev/bot-development/#mixins For key-value cache, use methods: cache_exists From 05f4aef29934e88f015db71de5936151b15830c9 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Tue, 16 Jul 2024 14:11:02 +0200 Subject: [PATCH 14/16] Adjust to pycodestyle --- intelmq/bots/outputs/misp/output_feed.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/intelmq/bots/outputs/misp/output_feed.py b/intelmq/bots/outputs/misp/output_feed.py index 9d56f097de..beb0ded5fb 100644 --- a/intelmq/bots/outputs/misp/output_feed.py +++ b/intelmq/bots/outputs/misp/output_feed.py @@ -263,8 +263,8 @@ def _extract_misp_attribute_kwargs(self, message: dict, definition: dict) -> dic for parameter, value in definition.items(): # Check if the value is a harmonization key or a static value if isinstance(value, str) and ( - value in self.harmonization["event"] - or value.split(".", 1)[0] in self.harmonization["event"] + value in self.harmonization["event"] or + value.split(".", 1)[0] in self.harmonization["event"] ): result[parameter] = message.get(value) else: @@ -333,8 +333,8 @@ def check(parameters): sanity_event = Event({}) event_separator = parameters.get("event_separator") if ( - event_separator - and not sanity_event._Message__is_valid_key(event_separator)[0] + event_separator and not + sanity_event._Message__is_valid_key(event_separator)[0] ): results.append( [ @@ -399,8 +399,8 @@ def check(parameters): "error", ( "Parameter 'tagging' has to be a dictionary with keys as '__all__' " - "or possible 'event_separator' values. Each dictionary value " - + tagging_error, + "or possible 'event_separator' values. Each dictionary value " + + tagging_error, ), ] ) From c57121a63e1909b42652ae5108b7823e18589585 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Tue, 16 Jul 2024 14:26:16 +0200 Subject: [PATCH 15/16] Fix typo --- docs/user/bots.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/bots.md b/docs/user/bots.md index 324b9608f1..a36fc05051 100644 --- a/docs/user/bots.md +++ b/docs/user/bots.md @@ -4684,7 +4684,7 @@ attribute_mapping: The keys refers to which MISP events you want to tag. If you want to tag all of them, use `__all__`. If you use `event_separator` and want to add additional tags to some events, use the expected values of the separation field. The *list of dicts* defines MISP tags as parameters to create `MISPTag` -objects from. Each dictonary has to have at least `name`. For all available parameters refer to the +objects from. Each dictionary has to have at least `name`. For all available parameters refer to the [PyMISP documentation](https://pymisp.readthedocs.io/en/latest/_modules/pymisp/abstract.html#MISPTag) for `MISPTag`. From 2b8fac77e970a608d945849f68a869a23e1e10d6 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Tue, 16 Jul 2024 14:46:50 +0200 Subject: [PATCH 16/16] Clean up imports in tests --- intelmq/tests/bots/outputs/misp/test_output_feed.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/intelmq/tests/bots/outputs/misp/test_output_feed.py b/intelmq/tests/bots/outputs/misp/test_output_feed.py index f27b367164..5fedb657f7 100644 --- a/intelmq/tests/bots/outputs/misp/test_output_feed.py +++ b/intelmq/tests/bots/outputs/misp/test_output_feed.py @@ -4,14 +4,13 @@ # -*- coding: utf-8 -*- import json -import select import unittest from pathlib import Path from tempfile import TemporaryDirectory -from .....lib.message import Message, MessageFactory import intelmq.lib.test as test from intelmq.bots.outputs.misp.output_feed import MISPFeedOutputBot +from intelmq.lib.message import MessageFactory EXAMPLE_EVENT = { "classification.type": "infected-system", @@ -347,7 +346,9 @@ def test_parameter_check_errors(self): {"attribute_mapping": "not-a-dict"}, {"attribute_mapping": {"not-a-field": {}}}, {"attribute_mapping": {"source.ip": "not-a-dict"}}, - {"tagging": {"not-all": []}}, # without event_separator, only __all__ is allowed + { + "tagging": {"not-all": []} + }, # without event_separator, only __all__ is allowed {"tagging": {"__all__": [], "other": []}}, {"event_separator": "malware.name", "tagging": ["not", "a", "dict"]}, {