diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 0dc569df8be..913e464abb7 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,3 +1,2 @@ -# These owners will be the default owners for everything in -# the repo. Unless a later match takes precedence, * @minvws/kat-managers +.github/CODEOWNERS @minvws/irealisatie-operations diff --git a/.github/workflows/build-rdo-package.yml b/.github/workflows/build-rdo-package.yml index 032e7709b5e..911642089f0 100644 --- a/.github/workflows/build-rdo-package.yml +++ b/.github/workflows/build-rdo-package.yml @@ -203,7 +203,7 @@ jobs: - name: Octopoes Upload whl package uses: actions/upload-artifact@v4 with: - name: "octopoes-${{env.RELEASE_VERSION}}" + name: "octopoes-${{env.RELEASE_VERSION}}_python${{ matrix.python_version }}" path: "${{ github.workspace }}/octopoes/dist/octopoes*.whl" - name: Octopoes Upload venv tar diff --git a/boefjes/boefjes/app.py b/boefjes/boefjes/app.py index af30d6319da..6a12ca463cf 100644 --- a/boefjes/boefjes/app.py +++ b/boefjes/boefjes/app.py @@ -85,60 +85,33 @@ def _fill_queue(self, task_queue: Queue, queue_type: WorkerManager.Queue) -> Non time.sleep(self.settings.worker_heartbeat) return + logger.debug("Popping from queue %s", queue_type.value) + try: - queues = self.scheduler_client.get_queues() - except HTTPError: - # Scheduler is having issues, so make note of it and try again - logger.exception("Getting the queues from the scheduler failed") - time.sleep(self.settings.poll_interval) # But not immediately + p_item = self.scheduler_client.pop_item(queue_type.value) + except (HTTPError, ValidationError): + logger.exception("Popping task from scheduler failed, sleeping 10 seconds") + time.sleep(self.settings.worker_heartbeat) return - # We do not target a specific queue since we start one runtime for all organisations - # and queue ids contain the organisation_id - queues = [q for q in queues if q.id.startswith(queue_type.value) and q.size > 0] - - logger.debug("Found queues: %s", [queue.id for queue in queues]) - - all_queues_empty = True - - for queue in queues: - logger.debug("Popping from queue %s", queue.id) - - try: - p_item = self.scheduler_client.pop_item(queue.id) - except (HTTPError, ValidationError): - logger.exception("Popping task from scheduler failed, sleeping 10 seconds") - time.sleep(10) - continue - - if not p_item: - logger.debug("Queue %s empty", queue.id) - continue + if p_item is None: + time.sleep(self.settings.worker_heartbeat) + return - all_queues_empty = False + logger.info("Handling task[%s]", p_item.data.id) - logger.info("Handling task[%s]", p_item.data.id) + try: + task_queue.put(p_item) + logger.info("Dispatched task[%s]", p_item.data.id) + except: # noqa + logger.exception("Exiting worker...") + logger.info("Patching scheduler task[id=%s] to %s", p_item.data.id, TaskStatus.FAILED.value) try: - task_queue.put(p_item) - logger.info("Dispatched task[%s]", p_item.data.id) - except: # noqa - logger.exception("Exiting worker...") - logger.info("Patching scheduler task[id=%s] to %s", p_item.data.id, TaskStatus.FAILED.value) - - try: - self.scheduler_client.patch_task(p_item.id, TaskStatus.FAILED) - logger.info( - "Set task status to %s in the scheduler for task[id=%s]", TaskStatus.FAILED, p_item.data.id - ) - except HTTPError: - logger.exception("Could not patch scheduler task to %s", TaskStatus.FAILED.value) - - raise - - if all_queues_empty: - logger.debug("All queues empty, sleeping %f seconds", self.settings.poll_interval) - time.sleep(self.settings.poll_interval) + self.scheduler_client.patch_task(p_item.id, TaskStatus.FAILED) + logger.info("Set task status to %s in the scheduler for task[id=%s]", TaskStatus.FAILED, p_item.data.id) + except HTTPError: + logger.exception("Could not patch scheduler task to %s", TaskStatus.FAILED.value) def _check_workers(self) -> None: new_workers = [] @@ -279,9 +252,4 @@ def get_runtime_manager(settings: Settings, queue: WorkerManager.Queue, log_leve LocalNormalizerJobRunner(local_repository), bytes_api_client, settings.scan_profile_whitelist ) - return SchedulerWorkerManager( - item_handler, - SchedulerAPIClient(str(settings.scheduler_api)), # Do not share a session between workers - settings, - log_level, - ) + return SchedulerWorkerManager(item_handler, SchedulerAPIClient(str(settings.scheduler_api)), settings, log_level) diff --git a/boefjes/boefjes/clients/scheduler_client.py b/boefjes/boefjes/clients/scheduler_client.py index 095804b6001..edb5c02ac5c 100644 --- a/boefjes/boefjes/clients/scheduler_client.py +++ b/boefjes/boefjes/clients/scheduler_client.py @@ -1,6 +1,7 @@ import datetime import uuid from enum import Enum +from typing import Any from httpx import Client, HTTPTransport, Response from pydantic import BaseModel, TypeAdapter @@ -29,7 +30,8 @@ class TaskStatus(Enum): class Task(BaseModel): id: uuid.UUID scheduler_id: str - schedule_id: str | None + schedule_id: uuid.UUID | None = None + organisation: str priority: int status: TaskStatus type: str @@ -39,11 +41,21 @@ class Task(BaseModel): modified_at: datetime.datetime +class PaginatedTasksResponse(BaseModel): + count: int + next: str | None = None + previous: str | None = None + results: list[Task] + + class SchedulerClientInterface: def get_queues(self) -> list[Queue]: raise NotImplementedError() - def pop_item(self, queue_id: str) -> Task | None: + def pop_item(self, scheduler_id: str) -> Task | None: + raise NotImplementedError() + + def pop_items(self, scheduler_id: str, filters: dict[str, Any]) -> PaginatedTasksResponse | None: raise NotImplementedError() def patch_task(self, task_id: uuid.UUID, status: TaskStatus) -> None: @@ -66,20 +78,24 @@ def __init__(self, base_url: str): def _verify_response(response: Response) -> None: response.raise_for_status() - def get_queues(self) -> list[Queue]: - response = self._session.get("/queues") + def pop_item(self, scheduler_id: str) -> Task | None: + response = self._session.post(f"/schedulers/{scheduler_id}/pop?limit=1") self._verify_response(response) - return TypeAdapter(list[Queue]).validate_json(response.content) + page = TypeAdapter(PaginatedTasksResponse | None).validate_json(response.content) + if page.count == 0: + return None + + return page.results[0] - def pop_item(self, queue_id: str) -> Task | None: - response = self._session.post(f"/queues/{queue_id}/pop") + def pop_items(self, scheduler_id: str, filters: dict[str, Any]) -> PaginatedTasksResponse | None: + response = self._session.post(f"/schedulers/{scheduler_id}/pop", json=filters) self._verify_response(response) - return TypeAdapter(Task | None).validate_json(response.content) + return TypeAdapter(PaginatedTasksResponse | None).validate_json(response.content) def push_item(self, p_item: Task) -> None: - response = self._session.post(f"/queues/{p_item.scheduler_id}/push", content=p_item.model_dump_json()) + response = self._session.post(f"/schedulers/{p_item.scheduler_id}/push", content=p_item.model_dump_json()) self._verify_response(response) def patch_task(self, task_id: uuid.UUID, status: TaskStatus) -> None: diff --git a/boefjes/boefjes/config.py b/boefjes/boefjes/config.py index a3947ed399c..7ccf3f78e1f 100644 --- a/boefjes/boefjes/config.py +++ b/boefjes/boefjes/config.py @@ -3,7 +3,7 @@ from pathlib import Path from typing import Any, Literal -from pydantic import AmqpDsn, AnyHttpUrl, Field, FilePath, IPvAnyAddress, PostgresDsn, conint +from pydantic import AnyHttpUrl, Field, FilePath, IPvAnyAddress, PostgresDsn, conint from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict from pydantic_settings.sources import EnvSettingsSource @@ -63,9 +63,6 @@ class Settings(BaseSettings): examples=['{"kat_external_db_normalize": 3, "kat_dns_normalize": 1}'], ) - # Queue configuration - queue_uri: AmqpDsn = Field(..., description="KAT queue URI", examples=["amqp://"], validation_alias="QUEUE_URI") - katalogus_db_uri: PostgresDsn = Field( ..., examples=["postgresql://xx:xx@host:5432/katalogus"], diff --git a/boefjes/packaging/deb/data/etc/kat/boefjes.conf b/boefjes/packaging/deb/data/etc/kat/boefjes.conf index 8baa0e64fd4..6d986354bfa 100644 --- a/boefjes/packaging/deb/data/etc/kat/boefjes.conf +++ b/boefjes/packaging/deb/data/etc/kat/boefjes.conf @@ -1,5 +1,3 @@ -QUEUE_URI= - # OCTOPOES_API=http://localhost:8001 # BYTES_API=http://localhost:8002 BYTES_USERNAME=bytes diff --git a/boefjes/pyproject.toml b/boefjes/pyproject.toml index 223bf59937f..1a5073aad75 100644 --- a/boefjes/pyproject.toml +++ b/boefjes/pyproject.toml @@ -97,7 +97,6 @@ markers = ["slow: marks tests as slow"] addopts = "-m 'not slow'" env = [ "D:KATALOGUS_DB_URI=postgresql://postgres:postgres@ci_katalogus-db:5432/ci_katalogus", - "D:QUEUE_URI=amqp://placeholder", "D:BOEFJES_API=http://placeholder:8006", "D:KATALOGUS_API=http://placeholder:8000", "D:OCTOPOES_API=http://placeholder:8001", diff --git a/boefjes/tests/conftest.py b/boefjes/tests/conftest.py index 202d3d0a778..5b598f36907 100644 --- a/boefjes/tests/conftest.py +++ b/boefjes/tests/conftest.py @@ -15,7 +15,7 @@ from boefjes.app import SchedulerWorkerManager from boefjes.clients.bytes_client import BytesAPIClient -from boefjes.clients.scheduler_client import Queue, SchedulerClientInterface, Task, TaskStatus +from boefjes.clients.scheduler_client import PaginatedTasksResponse, SchedulerClientInterface, Task, TaskStatus from boefjes.config import Settings, settings from boefjes.dependencies.plugins import PluginService, get_plugin_service from boefjes.job_handler import bytes_api_client @@ -50,7 +50,6 @@ class MockSchedulerClient(SchedulerClientInterface): def __init__( self, - queue_response: bytes, boefje_responses: list[bytes], normalizer_responses: list[bytes], log_path: Path, @@ -58,7 +57,6 @@ def __init__( iterations_to_wait_for_exception: int = 0, sleep_time: float = 0.1, ): - self.queue_response = queue_response self.boefje_responses = boefje_responses self.normalizer_responses = normalizer_responses @@ -73,26 +71,25 @@ def __init__( self._popped_items: dict[str, Task] = multiprocessing.Manager().dict() self._pushed_items: dict[str, Task] = multiprocessing.Manager().dict() - def get_queues(self) -> list[Queue]: - time.sleep(self.sleep_time) - return TypeAdapter(list[Queue]).validate_json(self.queue_response) - def pop_item(self, queue: str) -> Task | None: time.sleep(self.sleep_time) try: if WorkerManager.Queue.BOEFJES.value in queue: - p_item = TypeAdapter(Task).validate_json(self.boefje_responses.pop(0)) + response = TypeAdapter(PaginatedTasksResponse).validate_json(self.boefje_responses.pop(0)) + p_item = response.results[0] self._popped_items[str(p_item.id)] = p_item self._tasks[str(p_item.id)] = self._task_from_id(p_item.id) return p_item if WorkerManager.Queue.NORMALIZERS.value in queue: - p_item = TypeAdapter(Task).validate_json(self.normalizer_responses.pop(0)) + response = TypeAdapter(PaginatedTasksResponse).validate_json(self.normalizer_responses.pop(0)) + p_item = response.results[0] self._popped_items[str(p_item.id)] = p_item self._tasks[str(p_item.id)] = self._task_from_id(p_item.id) return p_item except IndexError: + time.sleep(3 * self.sleep_time) raise self.raise_on_empty_queue def patch_task(self, task_id: UUID, status: TaskStatus) -> None: @@ -126,7 +123,8 @@ def __init__(self, exception=Exception): def handle(self, item: BoefjeMeta | NormalizerMeta): time.sleep(self.sleep_time) - if str(item.id) == "9071c9fd-2b9f-440f-a524-ef1ca4824fd4": + if str(item.id) in ["9071c9fd-2b9f-440f-a524-ef1ca4824fd4", "2071c9fd-2b9f-440f-a524-ef1ca4824fd4"]: + time.sleep(self.sleep_time) raise self.exception() self.queue.put(item) @@ -151,7 +149,6 @@ def item_handler(tmp_path: Path): @pytest.fixture def manager(item_handler: MockHandler, tmp_path: Path) -> SchedulerWorkerManager: scheduler_client = MockSchedulerClient( - queue_response=get_dummy_data("scheduler/queues_response.json"), boefje_responses=[ get_dummy_data("scheduler/pop_response_boefje.json"), get_dummy_data("scheduler/pop_response_boefje_2.json"), diff --git a/boefjes/tests/examples/scheduler/pop_response_boefje.json b/boefjes/tests/examples/scheduler/pop_response_boefje.json index 29e7d5dfb72..4f668582726 100644 --- a/boefjes/tests/examples/scheduler/pop_response_boefje.json +++ b/boefjes/tests/examples/scheduler/pop_response_boefje.json @@ -1,25 +1,33 @@ { - "id": "70da7d4f-f41f-4940-901b-d98a92e9014b", - "priority": 1, - "scheduler_id": "boefje-_dev", - "schedule_id": null, - "status": "dispatched", - "type": "boefje", - "hash": "70da7d4f-f41f-4940-901b-d98a92e9014b", - "data": { - "id": "70da7d4f-f41f-4940-901b-d98a92e9014b", - "boefje": { - "id": "dns-records", - "version": null - }, - "input_ooi": "Hostname|internet|test.test", - "organization": "_dev", - "arguments": {}, - "started_at": null, - "runnable_hash": null, - "environment": null, - "ended_at": null - }, - "created_at": "2021-06-29T14:00:00", - "modified_at": "2021-06-29T14:00:00" + "count": 1, + "next": null, + "previous": null, + "results": [ + { + "id": "70da7d4f-f41f-4940-901b-d98a92e9014b", + "priority": 1, + "scheduler_id": "boefje", + "organisation": "_dev", + "schedule_id": null, + "status": "dispatched", + "type": "boefje", + "hash": "70da7d4f-f41f-4940-901b-d98a92e9014b", + "data": { + "id": "70da7d4f-f41f-4940-901b-d98a92e9014b", + "boefje": { + "id": "dns-records", + "version": null + }, + "input_ooi": "Hostname|internet|test.test", + "organization": "_dev", + "arguments": {}, + "started_at": null, + "runnable_hash": null, + "environment": null, + "ended_at": null + }, + "created_at": "2021-06-29T14:00:00", + "modified_at": "2021-06-29T14:00:00" + } + ] } diff --git a/boefjes/tests/examples/scheduler/pop_response_boefje_2.json b/boefjes/tests/examples/scheduler/pop_response_boefje_2.json index 762be767cc9..8c40569c9c2 100644 --- a/boefjes/tests/examples/scheduler/pop_response_boefje_2.json +++ b/boefjes/tests/examples/scheduler/pop_response_boefje_2.json @@ -1,25 +1,33 @@ { - "id": "70da7d4f-f41f-4940-901b-d98a92e9014c", - "priority": 1, - "scheduler_id": "boefje-_dev", - "schedule_id": null, - "status": "dispatched", - "type": "boefje", - "hash": "70da7d4f-f41f-4940-901b-d98a92e9014c", - "data": { - "id": "70da7d4f-f41f-4940-901b-d98a92e9014c", - "boefje": { - "id": "dns-records", - "version": null - }, - "input_ooi": "Hostname|internet|test.test", - "organization": "_dev", - "arguments": {}, - "started_at": null, - "runnable_hash": null, - "environment": null, - "ended_at": null - }, - "created_at": "2021-06-29T14:00:00", - "modified_at": "2021-06-29T14:00:00" + "count": 1, + "next": null, + "previous": null, + "results": [ + { + "id": "70da7d4f-f41f-4940-901b-d98a92e9014c", + "priority": 1, + "scheduler_id": "boefje", + "organisation": "_dev", + "schedule_id": null, + "status": "dispatched", + "type": "boefje", + "hash": "70da7d4f-f41f-4940-901b-d98a92e9014c", + "data": { + "id": "70da7d4f-f41f-4940-901b-d98a92e9014c", + "boefje": { + "id": "dns-records", + "version": null + }, + "input_ooi": "Hostname|internet|test.test", + "organization": "_dev", + "arguments": {}, + "started_at": null, + "runnable_hash": null, + "environment": null, + "ended_at": null + }, + "created_at": "2021-06-29T14:00:00", + "modified_at": "2021-06-29T14:00:00" + } + ] } diff --git a/boefjes/tests/examples/scheduler/pop_response_boefje_no_ooi.json b/boefjes/tests/examples/scheduler/pop_response_boefje_no_ooi.json index 5ce2ad45c9e..522737bbd27 100644 --- a/boefjes/tests/examples/scheduler/pop_response_boefje_no_ooi.json +++ b/boefjes/tests/examples/scheduler/pop_response_boefje_no_ooi.json @@ -1,25 +1,33 @@ { - "id": "70da7d4f-f41f-4940-901b-d98a92e9014b", - "priority": 1, - "scheduler_id": "boefje-_dev", - "schedule_id": null, - "status": "dispatched", - "type": "boefje", - "hash": "70da7d4f-f41f-4940-901b-d98a92e9014b", - "data": { - "id": "70da7d4f-f41f-4940-901b-d98a92e9014b", - "boefje": { - "id": "dns-records", - "version": null - }, - "input_ooi": "", - "organization": "_dev", - "arguments": {}, - "started_at": null, - "runnable_hash": null, - "environment": null, - "ended_at": null - }, - "created_at": "2021-06-29T14:00:00", - "modified_at": "2021-06-29T14:00:00" + "count": 1, + "next": null, + "previous": null, + "results": [ + { + "id": "70da7d4f-f41f-4940-901b-d98a92e9014b", + "priority": 1, + "scheduler_id": "boefje", + "organisation": "_dev", + "schedule_id": null, + "status": "dispatched", + "type": "boefje", + "hash": "70da7d4f-f41f-4940-901b-d98a92e9014b", + "data": { + "id": "70da7d4f-f41f-4940-901b-d98a92e9014b", + "boefje": { + "id": "dns-records", + "version": null + }, + "input_ooi": "", + "organization": "_dev", + "arguments": {}, + "started_at": null, + "runnable_hash": null, + "environment": null, + "ended_at": null + }, + "created_at": "2021-06-29T14:00:00", + "modified_at": "2021-06-29T14:00:00" + } + ] } diff --git a/boefjes/tests/examples/scheduler/pop_response_normalizer.json b/boefjes/tests/examples/scheduler/pop_response_normalizer.json index 274a9798c28..f197961c532 100644 --- a/boefjes/tests/examples/scheduler/pop_response_normalizer.json +++ b/boefjes/tests/examples/scheduler/pop_response_normalizer.json @@ -1,57 +1,65 @@ { - "id": "60da7d4ff41f4940901bd98a92e9014b", - "priority": 1, - "scheduler_id": "normalizer-_dev", - "schedule_id": null, - "status": "dispatched", - "type": "normalizer", - "hash": "7e698c377cfd85015c0d7086b76b76b4", - "data": { - "id": "60da7d4ff41f4940901bd98a92e9014b", - "raw_data": { - "id": "60da7d4ff41f4940901bd98a92e9014a", - "boefje_meta": { - "id": "70da7d4ff41f4940901bd98a92e9014b", - "boefje": { - "id": "dns-records", - "name": "DnsRecords", - "description": "Fetch the DNS record(s) of a hostname", - "version": null, - "scan_level": 1, - "consumes": [ - "Hostname" - ], - "produces": [ - "DNSAAAARecord", - "IPAddressV6", - "NXDOMAIN", - "Hostname", - "Network", - "DNSNSRecord", - "DNSTXTRecord", - "IPAddressV4", - "DNSMXRecord", - "DNSZone", - "DNSARecord", - "DNSSOARecord", - "DNSCNAMERecord" - ], - "dispatches": null + "count": 1, + "next": null, + "previous": null, + "results": [ + { + "id": "60da7d4ff41f4940901bd98a92e9014b", + "priority": 1, + "scheduler_id": "normalizer", + "organisation": "_dev", + "schedule_id": null, + "status": "dispatched", + "type": "normalizer", + "hash": "7e698c377cfd85015c0d7086b76b76b4", + "data": { + "id": "60da7d4ff41f4940901bd98a92e9014b", + "raw_data": { + "id": "60da7d4ff41f4940901bd98a92e9014a", + "boefje_meta": { + "id": "70da7d4ff41f4940901bd98a92e9014b", + "boefje": { + "id": "dns-records", + "name": "DnsRecords", + "description": "Fetch the DNS record(s) of a hostname", + "version": null, + "scan_level": 1, + "consumes": [ + "Hostname" + ], + "produces": [ + "DNSAAAARecord", + "IPAddressV6", + "NXDOMAIN", + "Hostname", + "Network", + "DNSNSRecord", + "DNSTXTRecord", + "IPAddressV4", + "DNSMXRecord", + "DNSZone", + "DNSARecord", + "DNSSOARecord", + "DNSCNAMERecord" + ], + "dispatches": null + }, + "input_ooi": "Hostname|internet|test.test", + "organization": "_dev", + "dispatches": [] + }, + "mime_types": [ + { + "value": "boefje/dns-records" + } + ] }, - "input_ooi": "Hostname|internet|test.test", - "organization": "_dev", - "dispatches": [] - }, - "mime_types": [ - { - "value": "boefje/dns-records" + "normalizer": { + "id": "kat_dns_normalize" } - ] - }, - "normalizer": { - "id": "kat_dns_normalize" + }, + "created_at": "2021-06-29T14:00:00", + "modified_at": "2021-06-29T14:00:00" } - }, - "created_at": "2021-06-29T14:00:00", - "modified_at": "2021-06-29T14:00:00" + ] } diff --git a/boefjes/tests/examples/scheduler/queues_response.json b/boefjes/tests/examples/scheduler/queues_response.json deleted file mode 100644 index ae5f68f8ab0..00000000000 --- a/boefjes/tests/examples/scheduler/queues_response.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - { - "id": "boefje-_dev", - "size": 1, - "maxsize": 1000, - "allow_replace": false, - "allow_updates": false, - "allow_priority_updates": true, - "pq": [] - }, - { - "id": "normalizer-_dev", - "size": 1, - "maxsize": 1000, - "allow_replace": false, - "allow_updates": false, - "allow_priority_updates": true, - "pq": [] - } -] diff --git a/boefjes/tests/examples/scheduler/should_crash.json b/boefjes/tests/examples/scheduler/should_crash.json index f267bf5fc57..ca3ba0f35e8 100644 --- a/boefjes/tests/examples/scheduler/should_crash.json +++ b/boefjes/tests/examples/scheduler/should_crash.json @@ -1,43 +1,51 @@ { - "id": "9071c9fd-2b9f-440f-a524-ef1ca4824fd4", - "priority": 1, - "scheduler_id": "boefje-_dev", - "schedule_id": null, - "status": "dispatched", - "type": "boefje", - "hash": "7e698c377cfd85015c0d7086b76b76b4", - "data": { - "id": "9071c9fd-2b9f-440f-a524-ef1ca4824fd4", - "boefje": { - "id": "dns-records", - "name": "DnsRecords", - "description": "Fetch the DNS record(s) of a hostname", - "version": null, - "scan_level": 1, - "consumes": [ - "Hostname" - ], - "produces": [ - "DNSAAAARecord", - "IPAddressV6", - "NXDOMAIN", - "Hostname", - "Network", - "DNSNSRecord", - "DNSTXTRecord", - "IPAddressV4", - "DNSMXRecord", - "DNSZone", - "DNSARecord", - "DNSSOARecord", - "DNSCNAMERecord" - ], - "dispatches": null - }, - "input_ooi": "Hostname|internet|test.test", - "organization": "_dev", - "dispatches": [] - }, - "created_at": "2021-06-29T14:00:00", - "modified_at": "2021-06-29T14:00:00" + "count": 1, + "next": null, + "previous": null, + "results": [ + { + "id": "9071c9fd-2b9f-440f-a524-ef1ca4824fd4", + "priority": 1, + "scheduler_id": "boefje", + "organisation": "_dev", + "schedule_id": null, + "status": "dispatched", + "type": "boefje", + "hash": "7e698c377cfd85015c0d7086b76b76b4", + "data": { + "id": "9071c9fd-2b9f-440f-a524-ef1ca4824fd4", + "boefje": { + "id": "dns-records", + "name": "DnsRecords", + "description": "Fetch the DNS record(s) of a hostname", + "version": null, + "scan_level": 1, + "consumes": [ + "Hostname" + ], + "produces": [ + "DNSAAAARecord", + "IPAddressV6", + "NXDOMAIN", + "Hostname", + "Network", + "DNSNSRecord", + "DNSTXTRecord", + "IPAddressV4", + "DNSMXRecord", + "DNSZone", + "DNSARecord", + "DNSSOARecord", + "DNSCNAMERecord" + ], + "dispatches": null + }, + "input_ooi": "Hostname|internet|test.test", + "organization": "_dev", + "dispatches": [] + }, + "created_at": "2021-06-29T14:00:00", + "modified_at": "2021-06-29T14:00:00" + } + ] } diff --git a/boefjes/tests/examples/scheduler/should_crash_2.json b/boefjes/tests/examples/scheduler/should_crash_2.json new file mode 100644 index 00000000000..913ea5c86a0 --- /dev/null +++ b/boefjes/tests/examples/scheduler/should_crash_2.json @@ -0,0 +1,51 @@ +{ + "count": 1, + "next": null, + "previous": null, + "results": [ + { + "id": "2071c9fd-2b9f-440f-a524-ef1ca4824fd4", + "priority": 1, + "scheduler_id": "boefje", + "organisation": "_dev", + "schedule_id": null, + "status": "dispatched", + "type": "boefje", + "hash": "7e698c377cfd85015c0d7086b76b76b4", + "data": { + "id": "2071c9fd-2b9f-440f-a524-ef1ca4824fd4", + "boefje": { + "id": "dns-records", + "name": "DnsRecords", + "description": "Fetch the DNS record(s) of a hostname", + "version": null, + "scan_level": 1, + "consumes": [ + "Hostname" + ], + "produces": [ + "DNSAAAARecord", + "IPAddressV6", + "NXDOMAIN", + "Hostname", + "Network", + "DNSNSRecord", + "DNSTXTRecord", + "IPAddressV4", + "DNSMXRecord", + "DNSZone", + "DNSARecord", + "DNSSOARecord", + "DNSCNAMERecord" + ], + "dispatches": null + }, + "input_ooi": "Hostname|internet|test.test", + "organization": "_dev", + "dispatches": [] + }, + "created_at": "2021-06-29T14:00:00", + "modified_at": "2021-06-29T14:00:00" + } + ] +} diff --git a/boefjes/tests/test_api.py b/boefjes/tests/test_api.py index 71287655f1d..4d6aedaf835 100644 --- a/boefjes/tests/test_api.py +++ b/boefjes/tests/test_api.py @@ -11,7 +11,6 @@ def _mocked_scheduler_client(tmp_path: Path): return MockSchedulerClient( - queue_response=get_dummy_data("scheduler/queues_response.json"), boefje_responses=[get_dummy_data("scheduler/pop_response_boefje_no_ooi.json")], normalizer_responses=[], log_path=tmp_path / "patch_task_log", diff --git a/boefjes/tests/test_app.py b/boefjes/tests/test_app.py index 8cd124940f6..26bcd41cfb6 100644 --- a/boefjes/tests/test_app.py +++ b/boefjes/tests/test_app.py @@ -56,7 +56,6 @@ def test_two_processes(manager: SchedulerWorkerManager, item_handler: MockHandle def test_two_processes_exception(manager: SchedulerWorkerManager, item_handler: MockHandler, tmp_path) -> None: manager.scheduler_client = MockSchedulerClient( - get_dummy_data("scheduler/queues_response.json"), [get_dummy_data("scheduler/should_crash.json")], [get_dummy_data("scheduler/pop_response_normalizer.json")], tmp_path / "patch_task_log", @@ -72,12 +71,16 @@ def test_two_processes_exception(manager: SchedulerWorkerManager, item_handler: def test_two_processes_handler_exception(manager: SchedulerWorkerManager, item_handler: MockHandler, tmp_path) -> None: manager.scheduler_client = MockSchedulerClient( - get_dummy_data("scheduler/queues_response.json"), - [get_dummy_data("scheduler/pop_response_boefje.json")] + 2 * [get_dummy_data("scheduler/should_crash.json")], + [ + get_dummy_data("scheduler/pop_response_boefje.json"), + get_dummy_data("scheduler/should_crash.json"), + get_dummy_data("scheduler/should_crash_2.json"), + ], [get_dummy_data("scheduler/pop_response_normalizer.json")], tmp_path / "patch_task_log", ) + item_handler.sleep_time = 0.1 manager.settings.pool_size = 2 manager.task_queue = Manager().Queue() with pytest.raises(KeyboardInterrupt): @@ -96,20 +99,17 @@ def test_two_processes_handler_exception(manager: SchedulerWorkerManager, item_h # We expect the first two patches to set the task status to running of both task and then process 1 to finish, as # the exception has been set up with a small delay. assert len(patched_tasks) == 6 - assert sorted(patched_tasks[:3]) == sorted( - [ - ("70da7d4f-f41f-4940-901b-d98a92e9014b", "running"), # Process 1 - ("70da7d4f-f41f-4940-901b-d98a92e9014b", "completed"), # Process 1 - ("9071c9fd-2b9f-440f-a524-ef1ca4824fd4", "running"), # Process 2 - ] + assert sorted(patched_tasks[:2]) == sorted( + [("70da7d4f-f41f-4940-901b-d98a92e9014b", "running"), ("9071c9fd-2b9f-440f-a524-ef1ca4824fd4", "running")] ) # The process completing status then to be set to completed/failed for both tasks. - assert sorted(patched_tasks[3:]) == sorted( + assert sorted(patched_tasks[2:]) == sorted( [ - ("9071c9fd-2b9f-440f-a524-ef1ca4824fd4", "running"), # Process 1 - ("9071c9fd-2b9f-440f-a524-ef1ca4824fd4", "failed"), # Process 2 - ("9071c9fd-2b9f-440f-a524-ef1ca4824fd4", "failed"), # Process 1 + ("9071c9fd-2b9f-440f-a524-ef1ca4824fd4", "failed"), + ("2071c9fd-2b9f-440f-a524-ef1ca4824fd4", "running"), + ("2071c9fd-2b9f-440f-a524-ef1ca4824fd4", "failed"), + ("70da7d4f-f41f-4940-901b-d98a92e9014b", "completed"), ] ) @@ -126,10 +126,7 @@ def test_two_processes_cleanup_unfinished_tasks( """ manager.scheduler_client = MockSchedulerClient( - get_dummy_data("scheduler/queues_response.json"), - 3 * [get_dummy_data("scheduler/pop_response_boefje.json")], - [], - tmp_path / "patch_task_log", + 3 * [get_dummy_data("scheduler/pop_response_boefje.json")], [], tmp_path / "patch_task_log" ) manager.settings.pool_size = 2 manager.task_queue = Manager().Queue() @@ -153,10 +150,11 @@ def test_two_processes_cleanup_unfinished_tasks( } # Tasks (one with the same id) was still unhandled the queue and pushed back to the scheduler by the main process - assert manager.scheduler_client._pushed_items["70da7d4f-f41f-4940-901b-d98a92e9014b"].scheduler_id == "boefje-_dev" - assert json.loads( - manager.scheduler_client._pushed_items["70da7d4f-f41f-4940-901b-d98a92e9014b"].json() - ) == json.loads(get_dummy_data("scheduler/pop_response_boefje.json")) + assert manager.scheduler_client._pushed_items["70da7d4f-f41f-4940-901b-d98a92e9014b"].scheduler_id == "boefje" + assert ( + json.loads(manager.scheduler_client._pushed_items["70da7d4f-f41f-4940-901b-d98a92e9014b"].json()) + == json.loads(get_dummy_data("scheduler/pop_response_boefje.json")).get("results")[0] + ) def test_normalizer_queue(manager: SchedulerWorkerManager, item_handler: MockHandler) -> None: @@ -170,7 +168,6 @@ def test_normalizer_queue(manager: SchedulerWorkerManager, item_handler: MockHan def test_null(manager: SchedulerWorkerManager, tmp_path: Path, item_handler: MockHandler): manager.scheduler_client = MockSchedulerClient( - get_dummy_data("scheduler/queues_response.json"), 3 * [get_dummy_data("scheduler/pop_response_boefje.json")], [get_dummy_data("scheduler/pop_response_normalizer.json")], tmp_path / "patch_task_log", diff --git a/bytes/bytes/rabbitmq.py b/bytes/bytes/rabbitmq.py index 8ae5b83f446..f40a042ef1f 100644 --- a/bytes/bytes/rabbitmq.py +++ b/bytes/bytes/rabbitmq.py @@ -53,7 +53,7 @@ def _check_connection(self) -> None: @staticmethod def _queue_name(event: Event) -> str: - return f"{event.organization}__{event.event_id}" + return event.event_id class NullManager(EventManager): diff --git a/bytes/tests/conftest.py b/bytes/tests/conftest.py index f30b18e8544..049cb2c81c1 100644 --- a/bytes/tests/conftest.py +++ b/bytes/tests/conftest.py @@ -105,5 +105,8 @@ def raw_repository(tmp_path: Path) -> FileRawRepository: @pytest.fixture -def event_manager(settings: Settings) -> RabbitMQEventManager: - return RabbitMQEventManager(str(settings.queue_uri)) +def event_manager(settings: Settings) -> Iterator[RabbitMQEventManager]: + manager = RabbitMQEventManager(str(settings.queue_uri)) + manager.channel.queue_delete("raw_file_received") + + yield manager diff --git a/bytes/tests/integration/test_bytes_api.py b/bytes/tests/integration/test_bytes_api.py index 046a0d74919..143592ba3ee 100644 --- a/bytes/tests/integration/test_bytes_api.py +++ b/bytes/tests/integration/test_bytes_api.py @@ -227,7 +227,7 @@ def test_raw(bytes_api_client: BytesAPIClient, event_manager: RabbitMQEventManag assert retrieved_raw == raw - method, properties, body = event_manager.connection.channel().basic_get("test__raw_file_received") + method, properties, body = event_manager.connection.channel().basic_get("raw_file_received") event_manager.connection.channel().basic_ack(method.delivery_tag) assert str(boefje_meta.id) in body.decode() @@ -244,7 +244,7 @@ def test_raw_big(bytes_api_client: BytesAPIClient, event_manager: RabbitMQEventM assert retrieved_raw == raw - method, properties, body = event_manager.connection.channel().basic_get("test__raw_file_received") + method, properties, body = event_manager.connection.channel().basic_get("raw_file_received") event_manager.connection.channel().basic_ack(method.delivery_tag) assert str(boefje_meta.id) in body.decode() diff --git a/bytes/tests/integration/test_event.py b/bytes/tests/integration/test_event.py index 015f1f3f811..706c7f48edc 100644 --- a/bytes/tests/integration/test_event.py +++ b/bytes/tests/integration/test_event.py @@ -7,7 +7,7 @@ def test_event_published_successfully(event_manager: RabbitMQEventManager) -> None: - test_organization = "event-test" + test_organization = "test" raw_data_meta = get_raw_data_meta() # We use an isolated queue this way to not conflict with other integration tests @@ -23,5 +23,5 @@ def test_event_published_successfully(event_manager: RabbitMQEventManager) -> No event_manager.connection.channel().basic_ack(method.delivery_tag) assert response["organization"] == test_organization - assert response["raw_data"] == json.loads(event.raw_data.json()) + assert response["raw_data"] == json.loads(event.raw_data.model_dump_json()) assert response["created_at"] == "2000-10-10T10:00:00" diff --git a/docs/source/release-notes/1.18.rst b/docs/source/release-notes/1.18.rst index 735efb0334b..5036623fd72 100644 --- a/docs/source/release-notes/1.18.rst +++ b/docs/source/release-notes/1.18.rst @@ -1,26 +1,336 @@ ============================================ -OpenKAT 1.18 +OpenKAT 1.18 - Sneeuwkat ============================================ -New Features -============ +This release adds report scheduling, which implements periodic report generation: by adding an interval to a report, it will automatically update with the latest information. With our new Dashboarding feature, these reports can be added to custom dashboards. +Dashboard and Report data also have historical versions available. Future versions of the user interface will include moving back and forth in time and comparing these historic versions, highlighting changes and trends. +We also included a new HTTP export boefje that you can use to export all objects in the graph to an external API either on an interval (e.g. every hour), or when the OOI is either created or changed, using our new Run-On functionality. This can be used to alert that findings have been created or their score has been updated. +There's also a new S3 backend for Bytes and various new boefjes, normalizers and fixes to bits from our growing community. Thanks! Docs on how to setup S3 for your (new) install can be found here: https://docs.openkat.nl/installation-and-deployment/s3-buckets.html -Bug fixes -========= +The language Tamil has been added via the hard work of a community volunteer. Since we have not yet tested it ourselves, it's currently only available if you add it to the languages list manually. +If you want to add a language to OpenKAT, or just help translate smaller parts, please take a look at our weblate: https://hosted.weblate.org/projects/openkat/nl-kat-coordination/ any help is much appreciated! + +The Keiko module (formerly used to generate reports Via LaTeX) has been removed as we are now using full html reports that can also be exported as PDF. + +In total 30 contributors have made 267 commits to Main in which 1,332 Files where changed. + +New Features and Bug fixes +========================== + +* Feature: improve settings and environment logic and phase out redundant environment keys by @Donnype in #3384 +* feat: adds notification styling and icons by @HeleenSG in #3461 +* Make the "name" field for plugins mandatory by @Donnype in #3471 +* Feature/upload multiple files at once to bytes by @Donnype in #3476 +* Add report scheduler functionality to scheduler by @jpbruinsslot in #3352 +* Add json download to report export by @Rieven in #3460 +* feat: multi select dropdown by @HeleenSG in #3446 +* Add timezone to valid time by @noamblitz in #3429 +* Exclude OOIs creation from the OOI add form by OOI-types by @Rieven in #3490 +* Hotfix for normalizer API bug by @Donnype in #3475 +* fix: toggle styling by @HeleenSG in #3449 +* Dont yield all snyk findings when no version was found by @noamblitz in #3431 +* Handle empty normalizer results by @Donnype in #3482 +* Fix enabling normalizers from Rocky by @Donnype in #3481 +* Fix report types selection not being overridden by @Rieven in #3436 +* Add new Boefje by @madelondohmen in #3400 +* Fix hanging upload of large files by @noamblitz in #3489 +* Check if the task is still running according to the scheduler before changing the status by @Donnype in #3506 +* Use the right variable name in the template's if-statement by @Donnype in #3519 +* Add regex pattern check to PORTS setting of nmap-ports by @Donnype in #3516 +* Update xtdb-http-multinode to the latest version by @dekkers in #3523 +* Updated findings in the findings database by @stephanie0x00 in #3427 +* remove unneeded column from filtered plugin table view by @underdarknl in #3515 +* Also delete self-affirming or self-infered objects by @originalsouth in #3498 +* Support valid_time and the like for queries in xtdb tools by @originalsouth in #3430 +* Chore: use only Pytest in the boefjes by @Donnype in #3536 +* Invert findings, add source URLs. by @stephanie0x00 in #3538 +* Fix JSON line logging by @ammar92 in #3511 +* Fix xtdb-cli by @originalsouth in #3543 +* Create boefje variant by @madelondohmen in #3456 +* make session commit less chatty by @underdarknl in #3544 +* Fix duplicate OOI references in result in origin by @originalsouth in #3531 +* a bit more detailed erroring in the scheduler client. by @underdarknl in #3546 +* Show proper error message instead of stacktrace if boefje API is unreachable by @dekkers in #3550 +* Fix headings by @madelondohmen in #3528 +* Feat/bit domain ownership pending by @underdarknl in #3290 +* Improve boefje runner error messages on container failure by @dekkers in #3548 +* Translations update from Hosted Weblate by @weblate in #3567 +* Clean more stale origins by @originalsouth in #3561 +* Fix Pydantic warnings by @ammar92 in #3557 +* Prevent race conditions between Octopoes' event manager and the scheduler from recreating already deleted OOIs through affirmations by @originalsouth in #3564 +* burpsuite fix by @underdarknl in #3381 +* Fix boefje schema on Boefje Setup page by @madelondohmen in #3574 +* Set default findingtype risk in model instead of in bit by @noamblitz in #3562 +* Add permission that grants access to all organizations by @dekkers in #3532 +* Add unique constraint to database plugin names by @Donnype in #3556 +* Feature/add boefje scheduling fields by @Donnype in #3555 +* Refactor and fix faulty save_origin exception code by @originalsouth in #3577 +* Dont show manual tasks in normalizer list by @noamblitz in #3580 +* Update a Boefje by @madelondohmen in #3521 +* Explicitly use the fork context for multiprocessing to fix running boefjes on macOS by @Donnype in #3576 +* fix: button style by @HeleenSG in #3565 +* Use stdlib instead of dateutil to parse ISO datetime by @dekkers in #3590 +* Do not store the hypothetically produced mime-types always by @Donnype in #3583 +* Remove old Findings Report by @madelondohmen in #3560 +* Add 'set start date' functionality to scheduler by @jpbruinsslot in #3589 +* Make API usable by non-admin users and check specific permissions by @dekkers in #3571 +* fix: button styling by @HeleenSG in #3591 +* Add interval to Boefje by @madelondohmen in #3579 +* Add boefje interval and cron check for deadline in scheduler by @jpbruinsslot in #3529 +* Always redirect to katalogus when enabling plugins by @noamblitz in #3584 +* Fixes notification alignment by @HeleenSG in #3522 +* REST API to recalculate bits and clone katalogus settings by @dekkers in #3572 +* fix: form styling by @HeleenSG in #3588 +* Remove an erroneously generated request body from an object history GET call in Octopoes' router by @originalsouth in #3605 +* RFD 0002: Code of Conduct: Code Reviews by @jpbruinsslot in #3425 +* Fix grace period is being used instead of interval for boefjes that have interval specified in scheduler by @jpbruinsslot in #3602 +* Use identifiers on modal triggers and modal component instead of integral trigger by @TwistMeister in #3541 +* Refactoring for Report Recipe, Report Task Runner and Scheduling by @Rieven in #3597 +* Handle existing Boefje name by @madelondohmen in #3573 +* Update boefje interval texts to make functionality more clear by @stephanie0x00 in #3609 +* Translations update from Hosted Weblate by @weblate in #3610 +* Feature/sort ooi type clearance level by @HeleenSG in #3300 +* Feature/report runner integration by @Donnype in #3607 +* Report Schedules List by @Rieven in #3608 +* Add s3 functionality in Bytes by @Souf149 in #3505 +* Implement SonarCloud integrations by @ammar92 in #3001 +* Fixed references in SonarCloud workflow by @ammar92 in #3620 +* Update filter unit and integration tests by @jpbruinsslot in #3595 +* Enable ruff format skip-magic-trailing-comma by @dekkers in #2975 +* Fixes for xtdb-cli by @originalsouth in #3624 +* Give python-docker-boefjes the possibility to use modules that are not part of OpenKAT by @Souf149 in #3621 +* fix tagging list in scheduled_reports_table.html by @underdarknl in #3615 +* Revert 1b4aed6 by @originalsouth in #3647 +* Add audit trail logging to boefje crud actions in boefje by @madelondohmen in #3613 +* use correct error mimetype by @noamblitz in #3646 +* Update katalogus client, input sanitization / validation by @underdarknl in #3396 +* Bug fixes for the reports flow by @Rieven in #3630 +* Remove source link in Findings Report when source is none by @madelondohmen in #3642 +* add CA bundle env var to dadb boefje schema. by @underdarknl in #3618 +* Fix nmap-ports regex pattern not allowing 80 by @Donnype in #3651 +* Fix boefje container image url by @madelondohmen in #3622 +* Fix description on plugin page when all plugins are enabled by @madelondohmen in #3644 +* Fix for downloading PDF by @madelondohmen in #3664 +* Fix sorting plugins list by @Rieven in #3659 +* fix the boefje id check for uuid's. A cleaner match regex would probably be better. by @underdarknl in #3665 +* Fix table in DNS Report by @madelondohmen in #3650 +* Pass bytes instead of string to BytesClient.upload_raw() by @Donnype in #3670 +* make some things look better by @Rieven in #3661 +* Fix/yielded objects by @Donnype in #3669 +* Add rocky worker service to debian packages by @Donnype in #3619 +* Update upload_raw.py by @underdarknl in #3645 +* Translations update from Hosted Weblate by @weblate in #3673 +* Add plugins to findings report by @Rieven in #3657 +* Fix jsonb 'contained by' query by @jpbruinsslot in #3643 +* Fix empty vulnerability reports by @madelondohmen in #3662 +* Silence KATFindingType not found error in JobHandler by @originalsouth in #3686 +* Github action should trigger if workflow definition changes by @dekkers in #3680 +* Do not run dh_strip_nondeterminism in Debian packaging by @dekkers in #3674 +* Fix first order dangling affirmation delete by @originalsouth in #3682 +* Fix javascript and component template in prod environments by @dekkers in #3672 +* Add delete schedule functionality for schedules in the scheduler by @jpbruinsslot in #3678 +* Fix/report naming by @Donnype in #3666 +* Add search endpoint for schedules for scheduler by @jpbruinsslot in #3695 +* feat: ✨ add Shodan InternetDB boefje by @zcrt in #2615 +* Add sterr to output list by @noamblitz in #3649 +* Rework workflow for variable python version, add python 3.11 by @sigio in #3721 +* Fixes in Report Overview by @madelondohmen in #3707 +* Add REST API to list report and download pdf report by @dekkers in #3689 +* Add start date to report schedule by @madelondohmen in #3701 +* Edit report recipe by @madelondohmen in #3690 +* Fix Multi Report recursion error by @Rieven in #3714 +* Fix report names for scheduled reports by @madelondohmen in #3726 +* Refactor Multi Report to comply to the new report flow by @Rieven in #3705 +* Add exception handling to the rest api by @jpbruinsslot in #3708 +* Add rocky REST API for report recipes by @dekkers in #3746 +* Fix auth token middleware with wrong format header by @dekkers in #3755 +* Fix vulnerability chapters in Aggregate table of content by @madelondohmen in #3780 +* Make systemctl call for kat-rocky-worker conditional by @dekkers in #3782 +* Fix scheduled Aggregate Report naming by @madelondohmen in #3748 +* Fixes for dropdowns by @Rieven in #3732 +* Exclude Report from ooi list by @Rieven in #3768 +* Fix reports with organization tags by @noamblitz in #3790 +* Silence staticfiles warning by @dekkers in #3795 +* Add configurable httpx request timeout and increase default by @dekkers in #3786 +* fix: Long links within tables by @HeleenSG in #3724 +* Translations update from Hosted Weblate by @weblate in #3762 +* Update web system report to make "certificate valid" check positive by @stephanie0x00 in #3798 +* Add live set (filter/query) to ReportRecipe by @madelondohmen in #3769 +* Add reports to scheduled table by @madelondohmen in #3787 +* fix: Adds code element styling by @HeleenSG in #3722 +* Fix filtered ooi types for reports by @Rieven in #3807 +* Replace finding description 'None' with the id by @madelondohmen in #3806 +* Button styling by @HeleenSG in #3772 +* Fix settings boefje settings via system env vars by @dekkers in #3766 +* Update normalizer texts in katalogus for some normalizers. by @stephanie0x00 in #3821 +* Add searching and sorting to Findings page by @madelondohmen in #3804 +* Fix typo in InternetDB boefje name by @dekkers in #3828 +* Refactor KATalogus client in Rocky by @Donnype in #3717 +* Check queue size before polling by @Donnype in #3829 +* Do not fail silently when deleting non-existing objects in octopoes by @Donnype in #3813 +* Add bulk actions on report overview by @TwistMeister in #3777 +* Upgrade script notes and fix for 1.16 on Debian by @Donnype in #3824 +* Bug fix: When opening subreports it throws index error by @Rieven in #3775 +* Delete log.txt by @underdarknl in #3851 +* Support a Schedule without a schedule in scheduler by @jpbruinsslot in #3834 +* Report types listed in a modal @ report plugins by @Rieven in #3718 +* Skip empty queues in the Rocky worker by @Donnype in #3860 +* Let local plugins (files) take precedence over database entries by @Donnype in #3858 +* Limit requesting prior tasks for ranking in scheduler by @jpbruinsslot in #3836 +* Add configuration setting for number of octopoes workers by @dekkers in #3796 +* Add start time to scheduled reports by @madelondohmen in #3809 +* Sub reports for Aggregate Report by @Rieven in #3852 +* Fix cron for last day of the month by @madelondohmen in #3831 +* Fixes for empty tables by @madelondohmen in #3844 +* optimize locking in katalogus.py, reuse available data by @underdarknl in #3752 +* Enable/disable scheduled reports by @madelondohmen in #3871 +* Fix rocky katalogus tests and delete unused fixtures by @dekkers in #3884 +* Change plugins enabling in report flow to checkboxes by @noamblitz in #3747 +* Let mailserver inherit l1 by @noamblitz in #3704 +* Ignore specific url parameters when following location headers by @noamblitz in #3856 +* Add auto_calculate_deadline attribute to Scheduler by @jpbruinsslot in #3869 +* Fix for task id as valid UUID by @Rieven in #3744 +* Increase max number of PostgreSQL connections by @dekkers in #3889 +* Translations update from Hosted Weblate by @weblate in #3870 +* Update scheduler folder structure by @jpbruinsslot in #3883 +* Feature/improve rename bulk modal by @TwistMeister in #3885 +* fix: 🐛 allow boefje completion with 404 by @zcrt in #3893 +* Create separate finding for Microsoft RDP port by @stephanie0x00 in #3882 +* Add additional check if task already run for report scheduler by @jpbruinsslot in #3900 +* Adds loggers to report flow by @madelondohmen in #3872 +* Fix mula migrations Debian package by @dekkers in #3919 +* Bug fix: KAT-alogus parameter is now organization member instead of organization code by @Rieven in #3895 +* Fix call to get_katalogus by @dekkers in #3924 +* add support for detecting Lame dns delegations on ip ranges by @underdarknl in #3899 +* Add bgp.jsonl and bgp-meta.json to .gitignore by @dekkers in #3928 +* Improve the KATalogus /plugins endpoint performance by @Donnype in #3892 +* Create scheduled report with zero objects selectable by @madelondohmen in #3907 +* Fix layout issues on scheduled reports page by @TwistMeister in #3930 +* Add export http boefje by @noamblitz in #3901 +* Update website_discovery.py by @underdarknl in #3921 +* add unpkg.com to disallowed hostnames in CSP by @underdarknl in #3927 +* Dont check for Locations on local Ip's. by @underdarknl in #3894 +* fix: 🔨 do not store CDN findings by @zcrt in #3931 +* Boefje runonce functionality in scheduler by @jpbruinsslot in #3906 +* Fix report recipe API by @dekkers in #3942 +* Translations update from Hosted Weblate by @weblate in #3939 +* Report flaws by @madelondohmen in #3880 +* Fix typing in more places and configure mypy to follow imports by @dekkers in #3932 +* Do not let enabling plugins affect the global plugin cache by @Donnype in #3944 +* fix typos in description.md by @underdarknl in #3952 +* Add documentation for S3 Support by @Souf149 in #3953 +* fix/Makes expando row chevron buttons sticky in report history and scheduled reports tables by @TwistMeister in #3954 +* Move event codes logging to KATalogus client by @Donnype in #3956 +* Translations update from Hosted Weblate by @weblate in #3969 +* Add cron parser to make cron human readable. Add "next scan" to object table on boefje detail view by @TwistMeister in #3960 +* Upsert report recipe in REST API by @dekkers in #3968 +* Translations update from Hosted Weblate by @weblate in #3984 +* Fix test_report_runner.py by @originalsouth in #4003 +* minor changes to onboarding, remove header, make preferred route more visible. by @underdarknl in #3986 +* Move the NXDomain catch to look at the results now that we dont raise… by @underdarknl in #3997 +* Add SPF optional machnism qualifier to model and parser. fix Human readable formatting for various mechanisms by @underdarknl in #3999 +* Changes to schedule all reports, even for once by @Rieven in #3840 +* Documentation Export HTTP API boefje by @stephanie0x00 in #4030 +* catch the schema mismatch error and produce an error raw file by @underdarknl in #3995 +* Fix pagination in the history API by @Donnype in #4041 +* Fix/remove unneeded lookups for inference params by @underdarknl in #4031 +* Update dropdown.scss, add scrolling / max height by @underdarknl in #4040 +* Fix/remove unneeded tree lookups on ooi views by @underdarknl in #4032 +* Fix/ooi detail fixes by @underdarknl in #4024 +* Update organization_list and settings page, remove unused stuff, add tags + direct settings link by @underdarknl in #4039 +* Fix/reuse report ooi entities by @Donnype in #4047 +* make reference parsing more strict in init.py by @underdarknl in #4065 +* Add normalizer name to tasklist on object details page, observation table. by @underdarknl in #4034 +* Feat/plugin selection toggler by @underdarknl in #4063 +* Report Task List by @Rieven in #4059 +* Add one-off jobs for report scheduler by @jpbruinsslot in #4045 +* Remove the keiko report module by @dekkers in #4066 +* Translations update from Hosted Weblate by @weblate in #4046 +* Add run-on to Boefje Setup page by @madelondohmen in #4061 +Documentation +============= + +* Docs/update userguide objects tasks members settings by @stephanie0x00 in #3957 +* Add risk level severities to docs by @stephanie0x00 in #4037 +* Docs: adding Questions and Configs by @stephanie0x00 in #3975 +* Docs: adding Questions and Configs by @stephanie0x00 in #3975 +* Add Kubernetes and Ansible to docs by @stephanie0x00 in #3970 +* Fix docs target in Makefile by @ammar92 in #3987 +* Docs: adding Questions and Configs by @stephanie0x00 in #3975 +* Update intro.rst, fix security email address by @underdarknl in #3846 +* Update scheduler documentation by @jpbruinsslot in #3692 +* Update folder structure in scheduler architecture doc by @jpbruinsslot in #4002 +* Update docs for creating a new Boefje by @madelondohmen in #3540 +* update readme by @F3licity in #3648 +* Updates boefje clearances and descriptions by @stephanie0x00 in #3863 +* Update development tutorial documentation by @allan-firelay in #3611 +* Add docs for xtdb analyze bits. by @stephanie0x00 in #3688 +* Docs/add muted findings by @stephanie0x00 in #3699 +* Update helper text for report names by @madelondohmen in #3616 +* Update README.rst - Fix guidelines URLs by @Thijs0x57 in #3789 +* Add descriptions to katalogus by @stephanie0x00 in #3545 + +Dependency Updates +================== + +* Bump cryptography from 42.0.8 to 43.0.1 in /bytes by @dependabot in #3473 +* Bump django from 5.0.10 to 5.0.11 in /rocky by @dependabot in #4025 +* Bump django from 5.0.9 to 5.0.10 in /rocky by @dependabot in #3940* +* Bump SonarSource/sonarcloud-github-action from 3.1.0 to 4.0.0 by @dependabot in #4001 +* Bump python-multipart from 0.0.9 to 0.0.18 in /bytes by @dependabot in #3925 +* Remove sigrid workflows by @dekkers in #3920 +* Update Sphinx and documentation by @ammar92 in #3710 +* Fix/pin pydicom dependency and revert irrelevant version bumps by @Donnype in #3553 +* Bump django from 5.0.8 to 5.0.9 in /rocky by @dependabot in #3653 +* Bump sphinx-rtd-theme from 2.0.0 to 3.0.0 by @dependabot in #3625 +* Bump waitress from 3.0.0 to 3.0.1 in /octopoes by @dependabot in #3760 +* Update Wappalyzer by @ammar92 in #3800 +* Update packages by @ammar92 in #3990 +* Updates CWE archive to 4.16 by @ammar92 in #3943 +* Update croniter by @ammar92 in #3767 +* Updated packages by @ammar92 in #3694 +* Update Packages by @ammar92 in #3563 +* Updated packages by @ammar92 in #3898 +* Update pre-commit and all hooks by @dekkers in #3923 Upgrading ========= +Keiko has been removed. You should uninstall / remove the Keiko package or container. + +Containers +---------- + +When using docker with docker compose, you need to remove keiko from the +docker-compose.yml file. You can then use `--remove-orphan` option to tell +docker compose to remove containers that are no longer in the compose file: + +.. code-block:: sh + + docker compose up -d --remove-orphans + Debian packages --------------- If you are using the :doc:`/installation-and-deployment/scripts` we provide to install/upgrade OpenKAT you need to get the latest version that includes the kat-rocky-worker service. +You should also remove the kat-keiko package: + +.. code-block:: sh + + apt purge kat-keiko + +Note that if you use the openkat-update.sh script to update to a newer 1.18 +version (for example from 1.18.0rc1 to 1.18.0), then the kat-keiko will be +installed again because the script will update or install all the packages. In 1.19 +this won't happen because the kat-keiko package will not exist anymore. + + Full Changelog ============== The full changelog can be found on `Github -`_. +`_. diff --git a/mula/docs/architecture.md b/mula/docs/architecture.md index 69a2dbf3c6c..b3d24418acb 100644 --- a/mula/docs/architecture.md +++ b/mula/docs/architecture.md @@ -3,19 +3,14 @@ ## Purpose The _scheduler_ is tasked with populating and maintaining a priority queues of -ranked tasks, and can be popped off through HTTP API calls. The scheduler is +tasks, and can be popped off through HTTP API calls. The scheduler is designed to be extensible, such that you're able to create your own rules for the population, scheduling, and prioritization of tasks. -In the implementation of the scheduler within OpenKAT is tasked with -scheduling and populating the priority queues of 'boefje', 'normalizer' and +In the implementation of the scheduler within OpenKAT the scheduler is tasked +with scheduling and populating the priority queues of `boefje`, `normalizer` and `report` tasks. -Because of the use of a priority queue we can differentiate between tasks that -are to be executed first, e.g. tasks created by the user get precedence over -tasks that are created by the internal rescheduling processes within the -scheduler. - In this document we will outline how the scheduler operates within KAT, how internal systems function and how external services use it. @@ -34,43 +29,75 @@ combines data from the `Octopoes`, `Katalogus`, `Bytes` and `RabbitMQ` systems. External services used and for what purpose: -- Octopoes; retrieval of ooi information - - RabbitMQ; messaging queues to notify the scheduler of scan level changes and the creation of raw files from bytes +- Rocky; interfaces with the scheduler through its rest api + +- Octopoes; retrieval of ooi information + - Katalogus; retrieval of plugin and organization information - Bytes; retrieval of raw file information -- Rocky; interfaces with the scheduler through its rest api +```mermaid +flowchart TB + subgraph "External informational services" + Octopoes["Octopoes
[system]"] + Katalogus["Katalogus
[system]"] + Bytes["Bytes
[system]"] + end + subgraph "Task creation services" + Rocky["Rocky
[webapp]"] + RabbitMQ["RabbitMQ
[message broker]"] + end + + Scheduler["Scheduler
[system]"] + + subgraph "Task handling services" + TaskRunner["Task Runner
[software system]"] + end + + Rocky-->Scheduler + RabbitMQ-->Scheduler -![scheduler_system.svg](./img/scheduler_system.svg) + Octopoes-->Scheduler + Katalogus-->Scheduler + Bytes-->Scheduler + + + Scheduler--"Pop task of queue"-->TaskRunner +``` ### C3 Component level When we take a closer look at the `scheduler` system itself we can identify -several components. The `SchedulerApp` directs the creation and maintenance -of a multitude of schedulers. - -| Scheduler | Schedulers | -| :-------------------------------- | --------------------------------------: | -| ![scheduler](./img/scheduler.svg) | ![schedulers.svg](./img/schedulers.svg) | +several components. The `App` directs the creation and maintenance +of several schedulers. And the `API` that is responsible for interfacing with +the `Scheduler` system. + +```mermaid +flowchart TB + subgraph "**Scheduler**
[system]" + direction TB + subgraph Server["**API**
[component]
REST API"] + end + subgraph App["**App**
[component]
Main python application"] + end + Server-->App + end +``` -Typically in a OpenKAT installation 3 scheduler will be created per organisation: +Typically in a OpenKAT installation 3 scheduler will be created 1. _boefje scheduler_ 2. _normalizer scheduler_ 3. _report scheduler_ Each scheduler type implements it's own priority queue, and can implement it's -own processes of populating, and prioritization of its tasks. - -![queue.svg](./img/queue.svg) - -Interaction with the scheduler and access to the internals of the -`SchedulerApp` can be accessed by the `Server` which implements a HTTP REST API -interface. +own processes of populating, and prioritization of its tasks. Interaction with +the scheduler and access to the internals of the `App` can be achieved by +interfacing with the `Server`. Which implements a HTTP REST API interface. ## Dataflows @@ -92,7 +119,22 @@ responsible for maintaining a queue of tasks for `Task Runners` to pick up and process. A `Scheduler` is responsible for creating `Task` objects and pushing them onto the queue. -![tasks.svg](./img/tasks.svg) +```mermaid +flowchart LR + subgraph "**Scheduler**
[system]" + direction LR + subgraph Scheduler["**Scheduler**
[component]
"] + direction LR + Process["Task creation process"] + subgraph PriorityQueue["PriorityQueue"] + Task0 + Task1[...] + TaskN + end + end + Process-->PriorityQueue + end +``` The `PriorityQueue` derives its state from the state of the `Task` objects that are persisted in the database. In other words, the current state of the @@ -102,13 +144,16 @@ are persisted in the database. In other words, the current state of the A `Task` object contains the following fields: -- `scheduler_id` - The id of the scheduler for which this task is created -- `schedule_id` - Optional, the id of the `Schedule` that created the task -- `priority` - The priority of the task -- `status` - The status of the task -- `type` - The type of the task -- `data` - A JSON object containing the task data -- `hash` - A unique hash generated by specific fields from the task data +| Field | Description | +| -------------- | ------------------------------------------------------------- | +| `scheduler_id` | The id of the scheduler for which this task is created | +| `schedule_id` | Optional, the id of the `Schedule` that created the task | +| `priority` | The priority of the task | +| `organisation` | The organisation for which the task is created | +| `status` | The status of the task | +| `type` | The type of the task | +| `data` | A JSON object containing the task data | +| `hash` | A unique hash generated by specific fields from the task data | Important to note is the `data` field contains the object that a `Task Runner` will use to execute the task. This field is a JSON field that allows any object @@ -120,6 +165,35 @@ By doing this, it allows the scheduler to wrap whatever object within a `Task`, and as a result we're able to create and extend more types of schedulers that are not specifically bound to a type. +A json representation of a `Task` object, for example a `BoefjeTask` object +as the `data` field: + +```json +{ + "scheduler_id": "1", + "schedule_id": "1", + "priority": 1, + "organisation": "openkat-corp", + "status": "PENDING", + "type": "boefje", + "data": { + "ooi": "internet", + "boefje": { + "id": "dns-zone", + "scan_level": 1 + } + }, + "hash": "a1b2c3d4e5f6g7h8i9j0" +} +``` + +A `Task` is a one-time execution of a task and is a unique instance of task that +is present in the `data` object. This means that you will encounter several +instances of the same task. We generate a unique hash for each task by hashing +specific fields from the `data` object. This hash is used to identify the task +within the `PriorityQueue` and is used to check if the same task is already on +the queue. + This approach ensures that the historical record of each task's execution is distinct, providing a clear and isolated view of each instance of the task's lifecycle. This strategy enables maintaining accurate and unambiguous @@ -153,29 +227,28 @@ that `Scheduler` can create `Schedule` objects for its `Task` objects. A `Schedule` object is a way to define when a `Task` should be executed automatically on a recurring schedule by the `Scheduler`. -A `Schedule` will use the 'blueprint' that is defined in its `data` field (this +A `Schedule` will use the _'blueprint'_ that is defined in its `data` field (this is the same as the `data` field of a `Task`) to generate a `Task` object to be pushed on the queue of a `Scheduler`. -![schedules.svg](./img/schedules.svg) - A `Schedule` object contains the following fields: -- `scheduler_id` - The id of the scheduler that created the schedule -- `schedule` - A cron expression that defines when the task should be - executed, this is used to update the value of `deadline_at` -- `deadline_at` - A timestamp that defines when the task should be executed -- `data` - A JSON object containing data for the schedule (this is the same as - the `data` field in the `Task` object) -- `hash` - A unique hash generated by specific fields from the schedule data +| Field | Description | +| -------------- | ------------------------------------------------------------------------------------------------------------------ | +| `scheduler_id` | The id of the scheduler that created the schedule | +| `schedule` | A cron expression that defines when the task should be executed, this is used to update the value of `deadline_at` | +| `deadline_at` | A timestamp that defines when the task should be executed | +| `data` | A JSON object containing data for the schedule (this is the same as the `data` field in the `Task` object) | +| `hash` | A unique hash generated by specific fields from the schedule data | A `Scheduler` can be extended by a process that checks if the `deadline_at` of a `Schedule` has passed, and if so, creates a `Task` object for the `Scheduler` to push onto the queue. -When the `Task` object is pushed onto the queue, the new `deadline_at` value -of the `Schedule` is calculated using the cron expression defined in the -`schedule` field. +Typically when the `Task` object is pushed onto the queue, the new +`deadline_at` value of the `Schedule` is calculated using the cron expression +defined in the `schedule` field. Refer to the specific `Scheduler` for more +information on how this is implemented. ### `BoefjeScheduler` @@ -221,21 +294,46 @@ Before a `BoefjeTask` and pushed on the queue we will check the following: #### Processes -![boefje_scheduler.svg](./img/boefje_scheduler.svg) +```mermaid +flowchart LR + subgraph "**Scheduler**
[system]" + direction LR + subgraph BoefjeScheduler["**BoefjeScheduler**
[component]
"] + direction LR + ProcessManual["Manual"] + ProcessMutations["Mutations"] + ProcessNewBoefjes["NewBoefjes"] + ProcessRescheduling["Rescheduling"] + subgraph PriorityQueue["PriorityQueue"] + Task0 + Task1[...] + TaskN + end + ProcessManual-->PriorityQueue + ProcessMutations-->PriorityQueue + ProcessNewBoefjes-->PriorityQueue + ProcessRescheduling-->PriorityQueue + end + end +``` In order to create a `BoefjeTask` and trigger the dataflow we described above -we have 4 different processes running in threads within a `BoefjeScheduler` +we have 3 different processes running in threads within a `BoefjeScheduler` that can create boefje tasks. Namely: -1. scan profile mutations -2. enabling of boefjes -3. rescheduling of prior tasks -4. manual scan job +| Process | Description | +| ----------------------- | -------------------------------------------------------------------------------------------------- | +| `process_mutations` | scan profile mutations received from RabbitMQ indicating that the scan level of an OOI has changed | +| `process_new_boefjes` | enabling of boefjes will result in gathering of OOI's on which the boefje can be used | +| `process_rescheduling ` | rescheduling of prior tasks | + +Additionally, a boefje task creation can be triggered by a manual scan job that +is created by the user in Rocky. ##### 1. Scan profile mutations When a scan level is increased on an OOI -(`schedulers.boefje.push_tasks_for_scan_profile_mutations`) a message is pushed +(`schedulers.boefje.process_mutations`) a message is pushed on the RabbitMQ `{organization_id}__scan_profile_mutations` queue. The scheduler continuously checks if new messages are posted on the queue. The resulting tasks from this process will get the second highest priority of 2 on the queue. @@ -336,7 +434,22 @@ queue we will check the following: #### Processes -![normalizer_scheduler.svg](./img/normalizer_scheduler.svg) +```mermaid +flowchart LR + subgraph "**Scheduler**
[system]" + direction LR + subgraph NormalizerScheduler["**NormalizerScheduler**
[component]
"] + direction LR + ProcessRawData["RawData"] + subgraph PriorityQueue["PriorityQueue"] + Task0 + Task1[...] + TaskN + end + ProcessRawData-->PriorityQueue + end + end +``` The following processes within a `NormalizerScheduler` will create a `NormalizerTask` tasks: @@ -345,7 +458,7 @@ The following processes within a `NormalizerScheduler` will create a ##### 1. Raw file creation in Bytes -When a raw file is created (`schedulers.normalizer.create_tasks_for_raw_data`) +When a raw file is created (`schedulers.normalizer.process_raw_data`) - The `NormalizerScheduler` retrieves raw files that have been created in Bytes from a message queue. @@ -365,7 +478,22 @@ picked up and processed by the report task runner. #### Processes -![report_scheduler.svg](./img/report_scheduler.svg) +```mermaid +flowchart LR + subgraph "**Scheduler**
[system]" + direction LR + subgraph ReportScheduler["**ReportScheduler**
[component]
"] + direction LR + ProcessRescheduling["Rescheduling"] + subgraph PriorityQueue["PriorityQueue"] + Task0 + Task1[...] + TaskN + end + ProcessRescheduling-->PriorityQueue + end + end +``` The `ReportScheduler` will create a `ReportTask` for the `Task` that is associated with a `Schedule` object. diff --git a/mula/docs/img/boefje_scheduler.svg b/mula/docs/img/boefje_scheduler.svg deleted file mode 100644 index 9f854ad21bf..00000000000 --- a/mula/docs/img/boefje_scheduler.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - -
BoefjeScheduler
queue
mutations
new boefjes
rescheduling
manual
diff --git a/mula/docs/img/normalizer_scheduler.svg b/mula/docs/img/normalizer_scheduler.svg deleted file mode 100644 index 18b53d70fe8..00000000000 --- a/mula/docs/img/normalizer_scheduler.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - -
queue
raw data
received
NormalizerScheduler
diff --git a/mula/docs/img/queue.svg b/mula/docs/img/queue.svg deleted file mode 100644 index 1f7fdbfcdee..00000000000 --- a/mula/docs/img/queue.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - -
Scheduler
API
queue
(process)
diff --git a/mula/docs/img/report_scheduler.svg b/mula/docs/img/report_scheduler.svg deleted file mode 100644 index c6a78c79e97..00000000000 --- a/mula/docs/img/report_scheduler.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - -
queue
rescheduling
ReportScheduler
diff --git a/mula/docs/img/scheduler.svg b/mula/docs/img/scheduler.svg deleted file mode 100644 index 87fc74ee30a..00000000000 --- a/mula/docs/img/scheduler.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - -
Scheduler
API
SchedulerApp
diff --git a/mula/docs/img/scheduler_system.svg b/mula/docs/img/scheduler_system.svg deleted file mode 100644 index ac511569ad2..00000000000 --- a/mula/docs/img/scheduler_system.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - -
Scheduler System
API
SchedulerApp
Rocky
RabbitMQ
KAT-alogus
Octopoes
Bytes
Task Runners
diff --git a/mula/docs/img/schedulers.svg b/mula/docs/img/schedulers.svg deleted file mode 100644 index d804fc43df1..00000000000 --- a/mula/docs/img/schedulers.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - -
Scheduler
API
boefje-org1
boefje-org2
diff --git a/mula/docs/img/schedules.svg b/mula/docs/img/schedules.svg deleted file mode 100644 index 2d4cf387854..00000000000 --- a/mula/docs/img/schedules.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - -
(process)
task
task
task
task
Schedule
Task
diff --git a/mula/docs/img/tasks.svg b/mula/docs/img/tasks.svg deleted file mode 100644 index 0b686533366..00000000000 --- a/mula/docs/img/tasks.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - -
(process)
task
task
task
task
diff --git a/mula/logging.json b/mula/logging.json index 2d7a02642d9..4e147b7e17c 100644 --- a/mula/logging.json +++ b/mula/logging.json @@ -16,65 +16,47 @@ }, "root": { "level": "INFO", - "handlers": [ - "console" - ] + "handlers": ["console"] }, "loggers": { "alembic.runtime.migration": { "level": "CRITICAL", - "handlers": [ - "console" - ], + "handlers": ["console"], "propagate": 0 }, "urllib3.connectionpool": { "level": "CRITICAL", - "handlers": [ - "console" - ], + "handlers": ["console"], "propagate": 0 }, "uvicorn.error": { "level": "CRITICAL", - "handlers": [ - "console" - ], + "handlers": ["console"], "propagate": 0 }, "uvicorn.access": { "level": "CRITICAL", - "handlers": [ - "console" - ], + "handlers": ["console"], "propagate": 0 }, "pika": { "level": "CRITICAL", - "handlers": [ - "console" - ], + "handlers": ["console"], "propagate": 0 }, "sqlalchemy.engine": { "level": "CRITICAL", - "handlers": [ - "console" - ], + "handlers": ["console"], "propagate": 0 }, "httpx": { "level": "CRITICAL", - "handlers": [ - "console" - ], + "handlers": ["console"], "propagate": 0 }, "httpcore": { "level": "CRITICAL", - "handlers": [ - "console" - ], + "handlers": ["console"], "propagate": 0 } } diff --git a/mula/scheduler/app.py b/mula/scheduler/app.py index d8770730762..5c8ca33a860 100644 --- a/mula/scheduler/app.py +++ b/mula/scheduler/app.py @@ -4,7 +4,7 @@ import structlog from opentelemetry import trace -from scheduler import clients, context, schedulers, server +from scheduler import context, schedulers, server from scheduler.utils import thread tracer = trace.get_tracer(__name__) @@ -26,34 +26,21 @@ class App: through a REST API. * Metrics: The collection of application specific metrics. - - Attributes: - logger: - The logger for the class. - ctx: - Application context of shared data (e.g. configuration, external - services connections). - stop_event: A threading.Event object used for communicating a stop - event across threads. - schedulers: - A dict of schedulers, keyed by scheduler id. - server: - The http rest api server instance. """ def __init__(self, ctx: context.AppContext) -> None: """Initialize the application. Args: - ctx: - Application context of shared data (e.g. configuration, - external services connections). + ctx (context.AppContext): Application context of shared data (e.g. + configuration, external services connections). """ self.logger: structlog.BoundLogger = structlog.getLogger(__name__) self.ctx: context.AppContext = ctx + self.server: server.Server | None = None - threading.excepthook = self.unhandled_exception + threading.excepthook = self._unhandled_exception self.stop_event: threading.Event = threading.Event() self.lock: threading.Lock = threading.Lock() @@ -64,147 +51,6 @@ def __init__(self, ctx: context.AppContext) -> None: | schedulers.NormalizerScheduler | schedulers.ReportScheduler, ] = {} - self.server: server.Server | None = None - - @tracer.start_as_current_span("monitor_organisations") - def monitor_organisations(self) -> None: - """Monitor the organisations from the Katalogus service, and add/remove - organisations from the schedulers. - """ - current_schedulers = self.schedulers.copy() - - # We make a difference between the organisation id's that are used - # by the schedulers, and the organisation id's that are in the - # Katalogus service. We will add/remove schedulers based on the - # difference between these two sets. - scheduler_orgs: set[str] = { - s.organisation.id for s in current_schedulers.values() if hasattr(s, "organisation") - } - try: - orgs = self.ctx.services.katalogus.get_organisations() - except clients.errors.ExternalServiceError: - self.logger.exception("Failed to get organisations from Katalogus") - return - - katalogus_orgs = {org.id for org in orgs} - - additions = katalogus_orgs.difference(scheduler_orgs) - self.logger.debug("Organisations to add: %s", len(additions), additions=sorted(additions)) - - removals = scheduler_orgs.difference(katalogus_orgs) - self.logger.debug("Organisations to remove: %s", len(removals), removals=sorted(removals)) - - # We need to get scheduler ids of the schedulers that are associated - # with the removed organisations - removal_scheduler_ids: set[str] = { - s.scheduler_id - for s in current_schedulers.values() - if hasattr(s, "organisation") and s.organisation.id in removals - } - - # Remove schedulers for removed organisations - for scheduler_id in removal_scheduler_ids: - if scheduler_id not in self.schedulers: - continue - - self.schedulers[scheduler_id].stop() - - if removals: - self.logger.debug("Removed %s organisations from scheduler", len(removals), removals=sorted(removals)) - - # Add schedulers for organisation - for org_id in additions: - try: - org = self.ctx.services.katalogus.get_organisation(org_id) - except clients.errors.ExternalServiceError as e: - self.logger.error("Failed to get organisation from Katalogus", error=e, org_id=org_id) - continue - - scheduler_boefje = schedulers.BoefjeScheduler( - ctx=self.ctx, scheduler_id=f"boefje-{org.id}", organisation=org, callback=self.remove_scheduler - ) - - scheduler_normalizer = schedulers.NormalizerScheduler( - ctx=self.ctx, scheduler_id=f"normalizer-{org.id}", organisation=org, callback=self.remove_scheduler - ) - - scheduler_report = schedulers.ReportScheduler( - ctx=self.ctx, scheduler_id=f"report-{org.id}", organisation=org, callback=self.remove_scheduler - ) - - with self.lock: - self.schedulers[scheduler_boefje.scheduler_id] = scheduler_boefje - self.schedulers[scheduler_normalizer.scheduler_id] = scheduler_normalizer - self.schedulers[scheduler_report.scheduler_id] = scheduler_report - - scheduler_normalizer.run() - scheduler_boefje.run() - scheduler_report.run() - - if additions: - # Flush katalogus caches when new organisations are added - self.ctx.services.katalogus.flush_caches() - - self.logger.debug("Added %s organisations to scheduler", len(additions), additions=sorted(additions)) - - @tracer.start_as_current_span("collect_metrics") - def collect_metrics(self) -> None: - """Collect application metrics - - This method that allows to collect metrics throughout the application. - """ - with self.lock: - for s in self.schedulers.copy().values(): - self.ctx.metrics_qsize.labels(scheduler_id=s.scheduler_id).set(s.queue.qsize()) - - status_counts = self.ctx.datastores.task_store.get_status_counts(s.scheduler_id) - for status, count in status_counts.items(): - self.ctx.metrics_task_status_counts.labels(scheduler_id=s.scheduler_id, status=status).set(count) - - def start_schedulers(self) -> None: - # Initialize the schedulers - try: - orgs = self.ctx.services.katalogus.get_organisations() - except clients.errors.ExternalServiceError as e: - self.logger.error("Failed to get organisations from Katalogus", error=e) - return - - for org in orgs: - boefje_scheduler = schedulers.BoefjeScheduler( - ctx=self.ctx, scheduler_id=f"boefje-{org.id}", organisation=org, callback=self.remove_scheduler - ) - self.schedulers[boefje_scheduler.scheduler_id] = boefje_scheduler - - normalizer_scheduler = schedulers.NormalizerScheduler( - ctx=self.ctx, scheduler_id=f"normalizer-{org.id}", organisation=org, callback=self.remove_scheduler - ) - self.schedulers[normalizer_scheduler.scheduler_id] = normalizer_scheduler - - report_scheduler = schedulers.ReportScheduler( - ctx=self.ctx, scheduler_id=f"report-{org.id}", organisation=org, callback=self.remove_scheduler - ) - self.schedulers[report_scheduler.scheduler_id] = report_scheduler - - # Start schedulers - for scheduler in self.schedulers.values(): - scheduler.run() - - def start_monitors(self) -> None: - thread.ThreadRunner( - name="App-monitor_organisations", - target=self.monitor_organisations, - stop_event=self.stop_event, - interval=self.ctx.config.monitor_organisations_interval, - ).start() - - def start_collectors(self) -> None: - thread.ThreadRunner( - name="App-metrics_collector", target=self.collect_metrics, stop_event=self.stop_event, interval=10 - ).start() - - def start_server(self) -> None: - self.server = server.Server(self.ctx, self.schedulers) - thread.ThreadRunner(name="App-server", target=self.server.run, stop_event=self.stop_event, loop=False).start() def run(self) -> None: """Start the main scheduler application, and run in threads the @@ -215,19 +61,12 @@ def run(self) -> None: * metrics collecting * api server """ - # Start schedulers self.start_schedulers() + self.start_server(self.schedulers) - # Start monitors - self.start_monitors() - - # Start metrics collecting if self.ctx.config.collect_metrics: self.start_collectors() - # API Server - self.start_server() - # Main thread while not self.stop_event.is_set(): self.stop_event.wait() @@ -241,24 +80,55 @@ def run(self) -> None: # Source: https://stackoverflow.com/a/1489838/1346257 os._exit(1) + def start_schedulers(self) -> None: + boefje = schedulers.BoefjeScheduler(ctx=self.ctx) + self.schedulers[boefje.scheduler_id] = boefje + + normalizer = schedulers.NormalizerScheduler(ctx=self.ctx) + self.schedulers[normalizer.scheduler_id] = normalizer + + report = schedulers.ReportScheduler(ctx=self.ctx) + self.schedulers[report.scheduler_id] = report + + for s in self.schedulers.values(): + s.run() + + def start_server( + self, + schedulers: dict[ + str, + schedulers.Scheduler + | schedulers.BoefjeScheduler + | schedulers.NormalizerScheduler + | schedulers.ReportScheduler, + ], + ) -> None: + self.server = server.Server(self.ctx, schedulers) + thread.ThreadRunner(name="App-server", target=self.server.run, stop_event=self.stop_event, loop=False).start() + + def start_collectors(self) -> None: + thread.ThreadRunner( + name="App-metrics_collector", target=self._collect_metrics, stop_event=self.stop_event, interval=10 + ).start() + def shutdown(self) -> None: """Shutdown the scheduler application, and all threads.""" self.logger.info("Shutdown initiated") self.stop_event.set() - # First stop schedulers - for s in self.schedulers.copy().values(): + # Stop all schedulers + for s in self.schedulers.values(): s.stop() # Stop all threads that are still running, except the main thread. # These threads likely have a blocking call and as such are not able # to leverage a stop event. - self.stop_threads() + self._stop_threads() self.logger.info("Shutdown complete") - def stop_threads(self) -> None: + def _stop_threads(self) -> None: """Stop all threads, except the main thread.""" for t in threading.enumerate(): if t is threading.current_thread(): @@ -272,23 +142,19 @@ def stop_threads(self) -> None: t.join(5) - def unhandled_exception(self, args: threading.ExceptHookArgs) -> None: + def _unhandled_exception(self, args: threading.ExceptHookArgs) -> None: """Gracefully shutdown the scheduler application, and all threads when a unhandled exception occurs. """ self.logger.error("Unhandled exception occurred: %s", args.exc_value) self.stop_event.set() - def remove_scheduler(self, scheduler_id: str) -> None: - """Remove a scheduler from the application. This method is passed - as a callback to the scheduler, so that the scheduler can remove - itself from the application. - - Args: - scheduler_id: The id of the scheduler to remove. - """ - with self.lock: - if scheduler_id not in self.schedulers: - return + def _collect_metrics(self) -> None: + """Collect application metrics throughout the application.""" + for s in self.schedulers.values(): + qsize = self.ctx.datastores.pq_store.qsize(s.scheduler_id) + self.ctx.metrics_qsize.labels(scheduler_id=s.scheduler_id).set(qsize) - self.schedulers.pop(scheduler_id) + status_counts = self.ctx.datastores.task_store.get_status_counts(s.scheduler_id) + for status, count in status_counts.items(): + self.ctx.metrics_task_status_counts.labels(scheduler_id=s.scheduler_id, status=status).set(count) diff --git a/mula/scheduler/clients/amqp/listeners.py b/mula/scheduler/clients/amqp/listeners.py index 6a8955a9107..2f93c2f1e80 100644 --- a/mula/scheduler/clients/amqp/listeners.py +++ b/mula/scheduler/clients/amqp/listeners.py @@ -95,7 +95,7 @@ def __init__(self, dsn: str, queue: str, func: Callable, durable: bool = True, p self.func: Callable = func self.executor: futures.ThreadPoolExecutor = futures.ThreadPoolExecutor( - max_workers=10, thread_name_prefix=f"Listener-TPE-{self.__class__.__name__}" + max_workers=10, thread_name_prefix=f"TPE-Listener-{self.__class__.__name__}" ) self.connection: pika.BlockingConnection | None = None diff --git a/mula/scheduler/clients/http/external/katalogus.py b/mula/scheduler/clients/http/external/katalogus.py index ba174259de5..e63e9a68860 100644 --- a/mula/scheduler/clients/http/external/katalogus.py +++ b/mula/scheduler/clients/http/external/katalogus.py @@ -63,7 +63,7 @@ def flush_plugin_cache(self) -> None: self.plugin_cache.expiration_enabled = True - self.logger.debug("Flushed the katalogus plugin cache for organisations") + self.logger.debug("Flushed the katalogus plugin cache for organisations", plugin_cache=self.plugin_cache.cache) def flush_boefje_cache(self) -> None: """boefje.consumes -> plugin type boefje""" @@ -99,7 +99,9 @@ def flush_boefje_cache(self) -> None: self.boefje_cache.expiration_enabled = True - self.logger.debug("Flushed the katalogus boefje type cache for organisations") + self.logger.debug( + "Flushed the katalogus boefje type cache for organisations", boefje_cache=self.boefje_cache.cache + ) def flush_normalizer_cache(self) -> None: """normalizer.consumes -> plugin type normalizer""" @@ -129,7 +131,10 @@ def flush_normalizer_cache(self) -> None: self.normalizer_cache.expiration_enabled = True - self.logger.debug("Flushed the katalogus normalizer type cache for organisations") + self.logger.debug( + "Flushed the katalogus normalizer type cache for organisations", + normalizer_cache=self.normalizer_cache.cache, + ) @exception_handler def get_boefjes(self) -> list[Boefje]: @@ -203,6 +208,8 @@ def _get_from_cache() -> Plugin: return dict_utils.deep_get(self.plugin_cache, [organisation_id, plugin_id]) try: + if self.plugin_cache.is_empty(): + self.flush_plugin_cache() return _get_from_cache() except dict_utils.ExpiredError: self.flush_plugin_cache() @@ -214,6 +221,8 @@ def _get_from_cache() -> list[Plugin]: return dict_utils.deep_get(self.boefje_cache, [organisation_id, boefje_type]) try: + if self.boefje_cache.is_empty(): + self.flush_boefje_cache() return _get_from_cache() except dict_utils.ExpiredError: self.flush_boefje_cache() @@ -225,6 +234,8 @@ def _get_from_cache() -> list[Plugin]: return dict_utils.deep_get(self.normalizer_cache, [organisation_id, normalizer_type]) try: + if self.normalizer_cache.is_empty(): + self.flush_normalizer_cache() return _get_from_cache() except dict_utils.ExpiredError: self.flush_normalizer_cache() diff --git a/mula/scheduler/config/settings.py b/mula/scheduler/config/settings.py index f095350b8bb..9db1d567e1f 100644 --- a/mula/scheduler/config/settings.py +++ b/mula/scheduler/config/settings.py @@ -130,7 +130,7 @@ class Settings(BaseSettings): ) # Queue settings - pq_maxsize: int = Field(1000, description="How many items a priority queue can hold (0 is infinite)") + pq_maxsize: int = Field(0, description="How many items a priority queue can hold (0 is infinite)") pq_interval: int = Field( 60, description="Interval in seconds of the execution of the `` method of the `scheduler.Scheduler` class" diff --git a/mula/scheduler/models/__init__.py b/mula/scheduler/models/__init__.py index a5390ad6ede..9dd3cf8ffcc 100644 --- a/mula/scheduler/models/__init__.py +++ b/mula/scheduler/models/__init__.py @@ -3,10 +3,10 @@ from .events import RawData, RawDataReceivedEvent from .health import ServiceHealth from .normalizer import Normalizer -from .ooi import OOI, MutationOperationType, ScanProfile, ScanProfileMutation +from .ooi import OOI, MutationOperationType, RunOn, ScanProfile, ScanProfileMutation from .organisation import Organisation from .plugin import Plugin from .queue import Queue from .schedule import Schedule, ScheduleDB -from .scheduler import Scheduler +from .scheduler import Scheduler, SchedulerType from .task import BoefjeTask, NormalizerTask, ReportTask, Task, TaskDB, TaskStatus diff --git a/mula/scheduler/models/ooi.py b/mula/scheduler/models/ooi.py index 94edb1570ce..4f7feed63ce 100644 --- a/mula/scheduler/models/ooi.py +++ b/mula/scheduler/models/ooi.py @@ -32,3 +32,4 @@ class ScanProfileMutation(BaseModel): operation: MutationOperationType primary_key: str value: OOI | None + client_id: str diff --git a/mula/scheduler/models/organisation.py b/mula/scheduler/models/organisation.py index 58032cb5f30..dc819d297f6 100644 --- a/mula/scheduler/models/organisation.py +++ b/mula/scheduler/models/organisation.py @@ -3,4 +3,4 @@ class Organisation(BaseModel): id: str - name: str + name: str | None = None diff --git a/mula/scheduler/models/schedule.py b/mula/scheduler/models/schedule.py index 4ba0c54bc71..941150da3ec 100644 --- a/mula/scheduler/models/schedule.py +++ b/mula/scheduler/models/schedule.py @@ -10,26 +10,19 @@ from scheduler.utils import GUID, cron from .base import Base -from .task import Task class Schedule(BaseModel): model_config = ConfigDict(from_attributes=True, validate_assignment=True) id: uuid.UUID = Field(default_factory=uuid.uuid4) - scheduler_id: str - + organisation: str hash: str | None = Field(None, max_length=32) - data: dict | None = None - enabled: bool = True - schedule: str | None = None - tasks: list[Task] = [] - deadline_at: datetime | None = None created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) modified_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) @@ -57,21 +50,14 @@ class ScheduleDB(Base): __tablename__ = "schedules" id = Column(GUID, primary_key=True) - scheduler_id = Column(String, nullable=False) - + organisation = Column(String, nullable=False) hash = Column(String(32), nullable=True, unique=True) - data = Column(JSONB, nullable=False) - enabled = Column(Boolean, nullable=False, default=True) - schedule = Column(String, nullable=True) - tasks = relationship("TaskDB", back_populates="schedule") deadline_at = Column(DateTime(timezone=True), nullable=True) - created_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now()) - modified_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now(), onupdate=func.now()) diff --git a/mula/scheduler/models/scheduler.py b/mula/scheduler/models/scheduler.py index 9c75c923743..e1d0f7c7b77 100644 --- a/mula/scheduler/models/scheduler.py +++ b/mula/scheduler/models/scheduler.py @@ -1,14 +1,24 @@ +import enum from datetime import datetime from typing import Any -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict + + +class SchedulerType(str, enum.Enum): + """Enum for scheduler types.""" + + UNKNOWN = "unknown" + BOEFJE = "boefje" + NORMALIZER = "normalizer" + REPORT = "report" class Scheduler(BaseModel): - """Representation of a schedulers.Scheduler instance. Used for - unmarshalling of schedulers to a JSON representation.""" + model_config = ConfigDict(from_attributes=True, use_enum_values=True) - id: str | None = None - enabled: bool | None = None - priority_queue: dict[str, Any] | None = None + id: str + type: SchedulerType + item_type: Any + qsize: int = 0 last_activity: datetime | None = None diff --git a/mula/scheduler/models/task.py b/mula/scheduler/models/task.py index dee0014e86c..c438dc87760 100644 --- a/mula/scheduler/models/task.py +++ b/mula/scheduler/models/task.py @@ -46,19 +46,13 @@ class Task(BaseModel): model_config = ConfigDict(from_attributes=True, use_enum_values=True) id: uuid.UUID = Field(default_factory=uuid.uuid4) - - scheduler_id: str | None = None - + scheduler_id: str schedule_id: uuid.UUID | None = None - + organisation: str priority: int | None = 0 - status: TaskStatus = TaskStatus.PENDING - type: str | None = None - hash: str | None = Field(None, max_length=32) - data: dict = Field(default_factory=dict) created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) @@ -69,24 +63,18 @@ class TaskDB(Base): __tablename__ = "tasks" id = Column(GUID, primary_key=True) - scheduler_id = Column(String, nullable=False) - schedule_id = Column(GUID, ForeignKey("schedules.id", ondelete="SET NULL"), nullable=True) - schedule = relationship("ScheduleDB", back_populates="tasks") - + organisation = Column(String, nullable=False) type = Column(String, nullable=False) - hash = Column(String(32), index=True) - priority = Column(Integer) - data = Column(JSONB, nullable=False) - status = Column(Enum(TaskStatus), nullable=False, default=TaskStatus.PENDING) - created_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now()) + schedule = relationship("ScheduleDB", back_populates="tasks") + created_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now()) modified_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now(), onupdate=func.now()) diff --git a/mula/scheduler/schedulers/errors.py b/mula/scheduler/schedulers/errors.py new file mode 100644 index 00000000000..d20f03018e0 --- /dev/null +++ b/mula/scheduler/schedulers/errors.py @@ -0,0 +1,21 @@ +import functools + +from scheduler.clients.errors import ExternalServiceError +from scheduler.schedulers.queue.errors import QueueFullError + + +def exception_handler(func): + @functools.wraps(func) + def inner_function(self, *args, **kwargs): + try: + return func(self, *args, **kwargs) + except ExternalServiceError as exc: + self.logger.exception("An exception occurred", exc=exc) + return None + except QueueFullError as exc: + self.logger.exception("Queue is full", exc=exc) + return None + except Exception as exc: + raise exc + + return inner_function diff --git a/mula/scheduler/schedulers/queue/pq.py b/mula/scheduler/schedulers/queue/pq.py index 4a1914451a6..1e7c0b77d29 100644 --- a/mula/scheduler/schedulers/queue/pq.py +++ b/mula/scheduler/schedulers/queue/pq.py @@ -97,7 +97,7 @@ def __init__( self.pq_store: storage.stores.PriorityQueueStore = pq_store self.lock: threading.Lock = threading.Lock() - def pop(self, filters: storage.filters.FilterRequest | None = None) -> models.Task | None: + def pop(self, filters: storage.filters.FilterRequest | None = None) -> tuple[list[models.Task], int]: """Remove and return the highest priority item from the queue. Optionally apply filters to the queue. @@ -113,14 +113,13 @@ def pop(self, filters: storage.filters.FilterRequest | None = None) -> models.Ta if self.empty(): raise QueueEmptyError(f"Queue {self.pq_id} is empty.") - item = self.pq_store.pop(self.pq_id, filters) - if item is None: - return None + items, count = self.pq_store.pop(self.pq_id, filters) + if items is None: + return ([], 0) - item.status = models.TaskStatus.DISPATCHED - self.pq_store.update(self.pq_id, item) + self.pq_store.bulk_update_status(self.pq_id, [item.id for item in items], models.TaskStatus.DISPATCHED) - return item + return items, count def push(self, task: models.Task) -> models.Task: """Push an item onto the queue. @@ -202,7 +201,17 @@ def push(self, task: models.Task) -> models.Task: task.status = models.TaskStatus.QUEUED item_db = self.pq_store.push(task) else: - self.pq_store.update(self.pq_id, task) + # Get the item from the queue and update it + stored_item_data = self.get_item_by_identifier(task) + if stored_item_data is None: + raise ItemNotFoundError(f"Item {task} not found in datastore {self.pq_id}") + + # Update the item with the new data + patch_data = task.dict(exclude_unset=True) + updated_task = stored_item_data.model_copy(update=patch_data) + + # Update the item in the queue + self.pq_store.update(self.pq_id, updated_task) item_db = self.get_item_by_identifier(task) if not item_db: diff --git a/mula/scheduler/schedulers/rankers/boefje.py b/mula/scheduler/schedulers/rankers/boefje.py index f951aea4f84..06b65e98cc1 100644 --- a/mula/scheduler/schedulers/rankers/boefje.py +++ b/mula/scheduler/schedulers/rankers/boefje.py @@ -1,4 +1,3 @@ -import random from datetime import datetime, timedelta, timezone from typing import Any @@ -53,6 +52,4 @@ class BoefjeRankerTimeBased(Ranker): """ def rank(self, obj: Any) -> int: - minimum = datetime.today() + timedelta(days=1) - maximum = minimum + timedelta(days=7) - return random.randint(int(minimum.timestamp()), int(maximum.timestamp())) # noqa: S311 + return int(obj.created_at.timestamp()) diff --git a/mula/scheduler/schedulers/scheduler.py b/mula/scheduler/schedulers/scheduler.py index a9115abbc9d..251360c23e2 100644 --- a/mula/scheduler/schedulers/scheduler.py +++ b/mula/scheduler/schedulers/scheduler.py @@ -18,41 +18,38 @@ class Scheduler(abc.ABC): - """The Scheduler class combines the priority queue. - The scheduler is responsible for populating the queue, and ranking tasks. + """The scheduler base class that all schedulers should inherit from. Attributes: logger: - The logger for the class + The logger instance. ctx: Application context of shared data (e.g. configuration, external services connections). - queue: - A queue.PriorityQueue instance - callback: - A callback function to call when the scheduler is stopped. scheduler_id: - The id of the scheduler. + The id of the scheduler. max_tries: The maximum number of retries for an item to be pushed to the queue. - enabled: - Whether the scheduler is enabled or not. - _last_activity: + create_schedule: + Whether to create a schedule for a task. + last_activity: The last activity of the scheduler. + queue: + A queues.PriorityQueue instance listeners: - A dict of connector.Listener instances, used for listening to - external events. + A dictionary of listeners, typically AMQP listeners on which + event messages are received. + threads: + A list of threads that are running, typically long running + processes. lock: - A threading.Lock instance used for locking + A threading lock stop_event_threads: - A threading.Event object used for communicating a stop - event across threads. - threads: - A dict of ThreadRunner instances, used for running processes - concurrently. + A threading event to stop the running threads. """ + TYPE: models.SchedulerType = models.SchedulerType.UNKNOWN ITEM_TYPE: Any = None def __init__( @@ -60,36 +57,16 @@ def __init__( ctx: context.AppContext, scheduler_id: str, queue: PriorityQueue | None = None, - callback: Callable[..., None] | None = None, max_tries: int = -1, create_schedule: bool = False, auto_calculate_deadline: bool = True, ): - """Initialize the Scheduler. - - Args: - ctx: - Application context of shared data (e.g. configuration, external - services connections). - scheduler_id: - The id of the scheduler. - queue: - A queue.PriorityQueue instance - callback: - A callback function to call when the scheduler is stopped. - max_tries: - The maximum number of retries for an item to be pushed to - the queue. - """ - self.logger: structlog.BoundLogger = structlog.getLogger(__name__) self.ctx: context.AppContext = ctx - self.callback: Callable[[], Any] | None = callback # Properties self.scheduler_id: str = scheduler_id self.max_tries: int = max_tries - self.enabled: bool = True self.create_schedule: bool = create_schedule self.auto_calculate_deadline: bool = auto_calculate_deadline self._last_activity: datetime | None = None @@ -106,9 +83,9 @@ def __init__( self.listeners: dict[str, clients.amqp.Listener] = {} # Threads + self.threads: list[thread.ThreadRunner] = [] self.lock: threading.Lock = threading.Lock() self.stop_event_threads: threading.Event = threading.Event() - self.threads: list[thread.ThreadRunner] = [] @abc.abstractmethod def run(self) -> None: @@ -184,6 +161,7 @@ def push_item_to_queue_with_timeout( while not self.is_space_on_queue() and (tries < max_tries or max_tries == -1): self.logger.debug( "Queue %s is full, waiting for space", + self.queue.pq_id, queue_id=self.queue.pq_id, queue_qsize=self.queue.qsize(), scheduler_id=self.scheduler_id, @@ -207,16 +185,6 @@ def push_item_to_queue(self, item: models.Task, create_schedule: bool = True) -> QueueFullError: When the queue is full. InvalidItemError: When the item is invalid. """ - if not self.is_enabled(): - self.logger.warning( - "Scheduler is disabled, not pushing item to queue %s", - self.queue.pq_id, - item_id=item.id, - queue_id=self.queue.pq_id, - scheduler_id=self.scheduler_id, - ) - raise NotAllowedError("Scheduler is disabled") - try: if item.type is None: item.type = self.ITEM_TYPE.type @@ -316,7 +284,9 @@ def post_push(self, item: models.Task, create_schedule: bool = True) -> models.T schedule_db = self.ctx.datastores.schedule_store.get_schedule_by_hash(item.hash) if schedule_db is None: - schedule = models.Schedule(scheduler_id=self.scheduler_id, hash=item.hash, data=item.data) + schedule = models.Schedule( + scheduler_id=self.scheduler_id, hash=item.hash, data=item.data, organisation=item.organisation + ) schedule_db = self.ctx.datastores.schedule_store.create_schedule(schedule) if schedule_db is None: @@ -364,9 +334,10 @@ def post_push(self, item: models.Task, create_schedule: bool = True) -> models.T return item - def pop_item_from_queue(self, filters: storage.filters.FilterRequest | None = None) -> models.Task | None: + def pop_item_from_queue( + self, filters: storage.filters.FilterRequest | None = None + ) -> tuple[list[models.Task], int]: """Pop an item from the queue. - Args: filters: Optional filters to apply when popping an item. @@ -377,38 +348,26 @@ def pop_item_from_queue(self, filters: storage.filters.FilterRequest | None = No NotAllowedError: When the scheduler is disabled. QueueEmptyError: When the queue is empty. """ - if not self.is_enabled(): - self.logger.warning( - "Scheduler is disabled, not popping item from queue", - queue_id=self.queue.pq_id, - queue_qsize=self.queue.qsize(), - scheduler_id=self.scheduler_id, - ) - raise NotAllowedError("Scheduler is disabled") - try: - item = self.queue.pop(filters) + items, count = self.queue.pop(filters) except QueueEmptyError as exc: raise exc - if item is not None: + if items is not None: self.logger.debug( - "Popped item %s from queue %s with priority %s", - item.id, + "Popped %s item(s) from queue %s", + count, self.queue.pq_id, - item.priority, - item_id=item.id, queue_id=self.queue.pq_id, scheduler_id=self.scheduler_id, ) - self.post_pop(item) + self.post_pop(items) - return item + return items, count - def post_pop(self, item: models.Task) -> None: + def post_pop(self, items: list[models.Task]) -> None: """After an item is popped from the queue, we execute this function - Args: item: An item from the queue """ @@ -435,54 +394,7 @@ def calculate_deadline(self, task: models.Task) -> datetime: return adjusted_time - def enable(self) -> None: - """Enable the scheduler. - - This will start the scheduler, and start all listeners and threads. - """ - if self.is_enabled(): - self.logger.debug("Scheduler is already enabled") - return - - self.logger.info("Enabling scheduler: %s", self.scheduler_id, scheduler_id=self.scheduler_id) - self.enabled = True - self.stop_event_threads.clear() - self.run() - - self.logger.info("Enabled scheduler: %s", self.scheduler_id, scheduler_id=self.scheduler_id) - - def disable(self) -> None: - """Disable the scheduler. - - This will stop all listeners and threads, and clear the queue, and any - tasks that were on the queue will be set to CANCELLED. - """ - if not self.is_enabled(): - self.logger.warning("Scheduler already disabled: %s", self.scheduler_id, scheduler_id=self.scheduler_id) - return - - self.logger.info("Disabling scheduler: %s", self.scheduler_id) - self.enabled = False - - self.stop_listeners() - self.stop_threads() - self.queue.clear() - - # Get all tasks that were on the queue and set them to CANCELLED - tasks, _ = self.ctx.datastores.task_store.get_tasks( - scheduler_id=self.scheduler_id, status=models.TaskStatus.QUEUED - ) - task_ids = [task.id for task in tasks] - self.ctx.datastores.task_store.cancel_tasks(scheduler_id=self.scheduler_id, task_ids=task_ids) - - self.logger.info("Disabled scheduler: %s", self.scheduler_id, scheduler_id=self.scheduler_id) - - def stop(self, callback: bool = True) -> None: - """Stop the scheduler. - - Args: - callback: Whether to call the callback function. - """ + def stop(self) -> None: self.logger.info("Stopping scheduler: %s", self.scheduler_id, scheduler_id=self.scheduler_id) # First, stop the listeners, when those are running in a thread and @@ -491,9 +403,6 @@ def stop(self, callback: bool = True) -> None: self.stop_listeners() self.stop_threads() - if self.callback and callback: - self.callback(self.scheduler_id) # type: ignore [call-arg] - self.logger.info("Stopped scheduler: %s", self.scheduler_id, scheduler_id=self.scheduler_id) def stop_listeners(self) -> None: @@ -510,14 +419,6 @@ def stop_threads(self) -> None: self.threads = [] - def is_enabled(self) -> bool: - """Check if the scheduler is enabled. - - Returns: - True if the scheduler is enabled, False otherwise. - """ - return self.enabled - def is_space_on_queue(self) -> bool: """Check if there is space on the queue. @@ -550,15 +451,8 @@ def dict(self) -> dict[str, Any]: """Get a dict representation of the scheduler.""" return { "id": self.scheduler_id, - "enabled": self.enabled, - "priority_queue": { - "id": self.queue.pq_id, - "item_type": self.queue.item_type.type, - "maxsize": self.queue.maxsize, - "qsize": self.queue.qsize(), - "allow_replace": self.queue.allow_replace, - "allow_updates": self.queue.allow_updates, - "allow_priority_updates": self.queue.allow_priority_updates, - }, + "type": self.TYPE.value, + "item_type": self.ITEM_TYPE.__name__, + "qsize": self.queue.qsize(), "last_activity": self.last_activity, } diff --git a/mula/scheduler/schedulers/schedulers/boefje.py b/mula/scheduler/schedulers/schedulers/boefje.py index 7310853f26a..3b344f93711 100644 --- a/mula/scheduler/schedulers/schedulers/boefje.py +++ b/mula/scheduler/schedulers/schedulers/boefje.py @@ -1,85 +1,43 @@ import uuid -from collections.abc import Callable from concurrent import futures from datetime import datetime, timedelta, timezone -from types import SimpleNamespace -from typing import Any +from typing import Any, Literal -import structlog from opentelemetry import trace +from pydantic import ValidationError -from scheduler import clients, context, storage, utils +from scheduler import clients, context, models, utils from scheduler.clients.errors import ExternalServiceError -from scheduler.models import ( - OOI, - Boefje, - BoefjeTask, - MutationOperationType, - Organisation, - Plugin, - ScanProfileMutation, - Task, - TaskStatus, -) +from scheduler.models import MutationOperationType from scheduler.models.ooi import RunOn -from scheduler.schedulers import Scheduler -from scheduler.schedulers.queue import PriorityQueue, QueueFullError -from scheduler.schedulers.rankers import BoefjeRanker +from scheduler.schedulers import Scheduler, rankers +from scheduler.schedulers.errors import exception_handler from scheduler.storage import filters +from scheduler.storage.errors import StorageError tracer = trace.get_tracer(__name__) class BoefjeScheduler(Scheduler): - """A KAT specific implementation of a Boefje scheduler. It extends - the `Scheduler` class by adding an `organisation` attribute. + """Scheduler implementation for the creation of BoefjeTask models. Attributes: - logger: A logger instance. - organisation: The organisation that this scheduler is for. + ranker: The ranker to calculate the priority of a task. """ - ITEM_TYPE: Any = BoefjeTask + ID: Literal["boefje"] = "boefje" + TYPE: models.SchedulerType = models.SchedulerType.BOEFJE + ITEM_TYPE: Any = models.BoefjeTask - def __init__( - self, - ctx: context.AppContext, - scheduler_id: str, - organisation: Organisation, - queue: PriorityQueue | None = None, - callback: Callable[..., None] | None = None, - ): + def __init__(self, ctx: context.AppContext): """Initializes the BoefjeScheduler. Args: - ctx: The application context. - scheduler_id: The id of the scheduler. - organisation: The organisation that this scheduler is for. - queue: The queue to use for this scheduler. - callback: The callback function to call when a task is completed. + ctx (context.AppContext): Application context of shared data (e.g. + configuration, external services connections). """ - self.logger: structlog.BoundLogger = structlog.getLogger(__name__) - self.organisation: Organisation = organisation - - self.queue = queue or PriorityQueue( - pq_id=scheduler_id, - maxsize=ctx.config.pq_maxsize, - item_type=self.ITEM_TYPE, - allow_priority_updates=True, - pq_store=ctx.datastores.pq_store, - ) - - super().__init__( - ctx=ctx, - queue=self.queue, - scheduler_id=scheduler_id, - callback=callback, - create_schedule=True, - auto_calculate_deadline=True, - ) - - # Priority ranker - self.priority_ranker = BoefjeRanker(self.ctx) + super().__init__(ctx=ctx, scheduler_id=self.ID, create_schedule=True, auto_calculate_deadline=True) + self.ranker = rankers.BoefjeRankerTimeBased(self.ctx) def run(self) -> None: """The run method is called when the scheduler is started. It will @@ -97,242 +55,174 @@ def run(self) -> None: - Rescheduling; when a task has passed its deadline, we need to reschedule it. """ - # Scan profile mutations - self.listeners["scan_profile_mutations"] = clients.ScanProfileMutation( + self.listeners["mutations"] = clients.ScanProfileMutation( dsn=str(self.ctx.config.host_raw_data), - queue=f"{self.organisation.id}__scan_profile_mutations", - func=self.push_tasks_for_scan_profile_mutations, + queue="scan_profile_mutations", + func=self.process_mutations, prefetch_count=self.ctx.config.rabbitmq_prefetch_count, ) - self.run_in_thread( - name=f"BoefjeScheduler-{self.scheduler_id}-mutations", - target=self.listeners["scan_profile_mutations"].listen, - loop=False, - ) - - # New Boefjes - self.run_in_thread( - name=f"BoefjeScheduler-{self.scheduler_id}-new_boefjes", - target=self.push_tasks_for_new_boefjes, - interval=60.0, - ) - - # Rescheduling - self.run_in_thread( - name=f"scheduler-{self.scheduler_id}-reschedule", target=self.push_tasks_for_rescheduling, interval=60.0 - ) + self.run_in_thread(name="BoefjeScheduler-mutations", target=self.listeners["mutations"].listen, loop=False) + self.run_in_thread(name="BoefjeScheduler-new_boefjes", target=self.process_new_boefjes, interval=60.0) + self.run_in_thread(name="BoefjeScheduler-rescheduling", target=self.process_rescheduling, interval=60.0) self.logger.info( - "Boefje scheduler started for %s", - self.organisation.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - item_type=self.queue.item_type.__name__, + "Boefje scheduler started", scheduler_id=self.scheduler_id, item_type=self.queue.item_type.__name__ ) - @tracer.start_as_current_span("boefje_push_tasks_for_scan_profile_mutations") - def push_tasks_for_scan_profile_mutations(self, body: bytes) -> None: + @tracer.start_as_current_span("BoefjeScheduler.process_mutations") + def process_mutations(self, body: bytes) -> None: """Create tasks for oois that have a scan level change. Args: mutation: The mutation that was received. """ - # Convert body into a ScanProfileMutation - mutation = ScanProfileMutation.model_validate_json(body) - - self.logger.debug( - "Received scan level mutation %s for: %s", - mutation.operation, - mutation.primary_key, - ooi_primary_key=mutation.primary_key, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - ) - - # There should be an OOI in value - ooi = mutation.value - if ooi is None: + try: + # Convert body into a ScanProfileMutation + mutation = models.ScanProfileMutation.model_validate_json(body) self.logger.debug( - "Mutation value is None, skipping", organisation_id=self.organisation.id, scheduler_id=self.scheduler_id - ) - return - - if mutation.operation == MutationOperationType.DELETE: - # When there are tasks of the ooi are on the queue, we need to - # remove them from the queue. - items, _ = self.ctx.datastores.pq_store.get_items( + "Received scan level mutation %s for: %s", + mutation.operation, + mutation.primary_key, + ooi_primary_key=mutation.primary_key, scheduler_id=self.scheduler_id, - filters=filters.FilterRequest( - filters=[filters.Filter(column="data", field="input_ooi", operator="eq", value=ooi.primary_key)] - ), ) - # Delete all items for this ooi, update all tasks for this ooi - # to cancelled. - for item in items: - task = self.ctx.datastores.task_store.get_task(item.id) - if task is None: - continue + # There should be an OOI in value + ooi = mutation.value + if ooi is None: + self.logger.debug("Mutation value is None, skipping", scheduler_id=self.scheduler_id) + return + + # When the mutation is a delete operation, we need to remove all tasks + if mutation.operation == models.MutationOperationType.DELETE: + items, _ = self.ctx.datastores.pq_store.get_items( + scheduler_id=self.scheduler_id, + filters=filters.FilterRequest( + filters=[filters.Filter(column="data", field="input_ooi", operator="eq", value=ooi.primary_key)] + ), + ) - task.status = TaskStatus.CANCELLED - self.ctx.datastores.task_store.update_task(task) + # Delete all items for this ooi, update all tasks for this ooi + # to cancelled. + for item in items: + task = self.ctx.datastores.task_store.get_task(item.id) + if task is None: + continue - return + task.status = models.TaskStatus.CANCELLED + self.ctx.datastores.task_store.update_task(task) - # What available boefjes do we have for this ooi? - boefjes = self.get_boefjes_for_ooi(ooi) - if not boefjes: - self.logger.debug( - "No boefjes available for %s", - ooi.primary_key, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - ) + return + + # What available boefjes do we have for this ooi? + boefjes = self.get_boefjes_for_ooi(ooi, mutation.client_id) + if not boefjes: + self.logger.debug("No boefjes available for %s", ooi.primary_key, scheduler_id=self.scheduler_id) + return + except (StorageError, ValidationError): + self.logger.exception("Error occurred while processing mutation", scheduler_id=self.scheduler_id) return - with futures.ThreadPoolExecutor( - thread_name_prefix=f"BoefjeScheduler-TPE-{self.scheduler_id}-mutations" - ) as executor: - for boefje in boefjes: - # Is the boefje allowed to run on the ooi? - if not self.has_boefje_permission_to_run(boefje, ooi): - self.logger.debug( - "Boefje not allowed to run on ooi", - boefje_id=boefje.id, - boefje_name=boefje.name, - ooi_primary_key=ooi.primary_key, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - ) - continue + # Create tasks for the boefjes + boefje_tasks = [] + for boefje in boefjes: + if not self.has_boefje_permission_to_run(boefje, ooi): + self.logger.debug( + "Boefje not allowed to run on ooi", + boefje_id=boefje.id, + ooi_primary_key=ooi.primary_key, + scheduler_id=self.scheduler_id, + ) + continue - create_schedule = True - run_task = True - - # What type of run boefje is it? - if boefje.run_on: - create_schedule = False - run_task = False - if mutation.operation == MutationOperationType.CREATE: - run_task = RunOn.CREATE in boefje.run_on - elif mutation.operation == MutationOperationType.UPDATE: - run_task = RunOn.UPDATE in boefje.run_on - - if not run_task: - self.logger.debug( - "Based on boefje run on type, skipping", - boefje_id=boefje.id, - ooi_primary_key=ooi.primary_key, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - ) - continue + create_schedule, run_task = True, True - boefje_task = BoefjeTask( - boefje=Boefje.model_validate(boefje.model_dump()), + # What type of run boefje is it? + if boefje.run_on: + create_schedule = False + run_task = False + if mutation.operation == MutationOperationType.CREATE: + run_task = RunOn.CREATE in boefje.run_on + elif mutation.operation == MutationOperationType.UPDATE: + run_task = RunOn.UPDATE in boefje.run_on + + if not run_task: + self.logger.debug( + "Based on boefje run on type, skipping", + boefje_id=boefje.id, + ooi_primary_key=ooi.primary_key, + organisation_id=mutation.client_id, + scheduler_id=self.scheduler_id, + ) + continue + + boefje_tasks.append( + models.BoefjeTask( + boefje=models.Boefje.model_validate(boefje.model_dump()), input_ooi=ooi.primary_key if ooi else None, - organization=self.organisation.id, + organization=mutation.client_id, ) + ) + with futures.ThreadPoolExecutor(thread_name_prefix=f"TPE-{self.scheduler_id}-mutations") as executor: + for boefje_task in boefje_tasks: executor.submit( self.push_boefje_task, boefje_task, + mutation.client_id, create_schedule, - self.push_tasks_for_scan_profile_mutations.__name__, + self.process_mutations.__name__, ) - @tracer.start_as_current_span("boefje_push_tasks_for_new_boefjes") - def push_tasks_for_new_boefjes(self) -> None: + @tracer.start_as_current_span("BoefjeScheduler.process_new_boefjes") + def process_new_boefjes(self) -> None: """When new boefjes are added or enabled we find the ooi's that boefjes can run on, and create tasks for it.""" - new_boefjes = None + boefje_tasks = [] + + # TODO: this should be optimized see #3357 try: - new_boefjes = self.ctx.services.katalogus.get_new_boefjes_by_org_id(self.organisation.id) + orgs = self.ctx.services.katalogus.get_organisations() except ExternalServiceError: - self.logger.error( - "Failed to get new boefjes for organisation: %s from katalogus", - self.organisation.name, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - ) + self.logger.exception("Error occurred while processing new boefjes", scheduler_id=self.scheduler_id) return - if new_boefjes is None or not new_boefjes: - self.logger.debug( - "No new boefjes for organisation: %s", - self.organisation.name, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - ) - return + for org in orgs: + try: + # Get new boefjes for organisation + new_boefjes = self.ctx.services.katalogus.get_new_boefjes_by_org_id(org.id) + if not new_boefjes: + self.logger.debug("No new boefjes found for organisation", organisation_id=org.id) + continue - self.logger.debug( - "Received new boefjes", - boefjes=[boefje.name for boefje in new_boefjes], - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - ) + # Get all oois for the new boefjes + for boefje in new_boefjes: + oois = self.get_oois_for_boefje(boefje, org.id) + for ooi in oois: + boefje_task = models.BoefjeTask( + boefje=models.Boefje.model_validate(boefje.dict()), + input_ooi=ooi.primary_key, + organization=org.id, + ) - for boefje in new_boefjes: - if not boefje.consumes: - self.logger.debug( - "No consumes found for boefje: %s", - boefje.name, - boefje_id=boefje.id, - organisation_id=self.organisation.id, + boefje_tasks.append((boefje_task, org.id)) + except ExternalServiceError: + self.logger.warning( + "Error occurred while processing new boefjes", + organisation_id=org.id, scheduler_id=self.scheduler_id, ) continue - oois_by_object_type: list[OOI] = [] - try: - oois_by_object_type = self.ctx.services.octopoes.get_objects_by_object_types( - self.organisation.id, boefje.consumes, list(range(boefje.scan_level, 5)) - ) - except ExternalServiceError as exc: - self.logger.error( - "Could not get oois for organisation: %s from octopoes", - self.organisation.name, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - exc_info=exc, + with futures.ThreadPoolExecutor(thread_name_prefix=f"TPE-{self.scheduler_id}-new_boefjes") as executor: + for boefje_task, org_id in boefje_tasks: + executor.submit( + self.push_boefje_task, boefje_task, org_id, self.create_schedule, self.process_new_boefjes.__name__ ) - continue - - with futures.ThreadPoolExecutor( - thread_name_prefix=f"BoefjeScheduler-TPE-{self.scheduler_id}-new_boefjes" - ) as executor: - for ooi in oois_by_object_type: - if not self.has_boefje_permission_to_run(boefje, ooi): - self.logger.debug( - "Boefje not allowed to run on ooi", - boefje_id=boefje.id, - ooi_primary_key=ooi.primary_key, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - ) - continue - - boefje_task = BoefjeTask( - boefje=Boefje.model_validate(boefje.dict()), - input_ooi=ooi.primary_key, - organization=self.organisation.id, - ) - - executor.submit(self.push_boefje_task, boefje_task, self.push_tasks_for_new_boefjes.__name__) - - @tracer.start_as_current_span("boefje_push_tasks_for_rescheduling") - def push_tasks_for_rescheduling(self): - if self.queue.full(): - self.logger.warning( - "Boefjes queue is full, not populating with new tasks", - queue_qsize=self.queue.qsize(), - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - ) - return + @tracer.start_as_current_span("BoefjeScheduler.process_rescheduling") + def process_rescheduling(self): try: schedules, _ = self.ctx.datastores.schedule_store.get_schedules( filters=filters.FilterRequest( @@ -343,247 +233,168 @@ def push_tasks_for_rescheduling(self): ] ) ) - except storage.errors.StorageError as exc_db: - self.logger.error( - "Could not get schedules for rescheduling %s", - self.scheduler_id, - scheduler_id=self.scheduler_id, - organisation_id=self.organisation.id, - exc_info=exc_db, - ) - raise exc_db - - if not schedules: - self.logger.debug( - "No schedules tasks found for scheduler: %s", - self.scheduler_id, - scheduler_id=self.scheduler_id, - organisation_id=self.organisation.id, - ) + if not schedules: + self.logger.debug( + "No schedules tasks found for scheduler: %s", self.scheduler_id, scheduler_id=self.scheduler_id + ) + return + except StorageError: + self.logger.exception("Error occurred while processing rescheduling", scheduler_id=self.scheduler_id) return - with futures.ThreadPoolExecutor( - thread_name_prefix=f"BoefjeScheduler-TPE-{self.scheduler_id}-rescheduling" - ) as executor: + with futures.ThreadPoolExecutor(thread_name_prefix=f"TPE-{self.scheduler_id}-rescheduling") as executor: for schedule in schedules: - boefje_task = BoefjeTask.model_validate(schedule.data) - - # Plugin still exists? try: + boefje_task = models.BoefjeTask.model_validate(schedule.data) + + # Plugin still exists? plugin = self.ctx.services.katalogus.get_plugin_by_id_and_org_id( - boefje_task.boefje.id, self.organisation.id + boefje_task.boefje.id, schedule.organisation ) if not plugin: self.logger.info( "Boefje does not exist anymore, skipping and disabling schedule", boefje_id=boefje_task.boefje.id, schedule_id=schedule.id, - organisation_id=self.organisation.id, scheduler_id=self.scheduler_id, ) schedule.enabled = False self.ctx.datastores.schedule_store.update_schedule(schedule) continue - except ExternalServiceError as exc_plugin: - self.logger.error( - "Could not get plugin %s from katalogus", - boefje_task.boefje.id, - boefje_id=boefje_task.boefje.id, - schedule_id=schedule.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - exc_info=exc_plugin, - ) - continue - # Plugin still enabled? - if not plugin.enabled: - self.logger.debug( - "Boefje is disabled, skipping", - boefje_id=boefje_task.boefje.id, - schedule_id=schedule.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - ) - schedule.enabled = False - self.ctx.datastores.schedule_store.update_schedule(schedule) - continue - - # Plugin a boefje? - if plugin.type != "boefje": - # We don't disable the schedule, since we should've gotten - # schedules for boefjes only. - self.logger.warning( - "Plugin is not a boefje, skipping", - plugin_id=plugin.id, - schedule_id=schedule.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - ) - continue - - # When the boefje task has an ooi, we need to do some additional - # checks. - ooi = None - if boefje_task.input_ooi: - # OOI still exists? - try: - ooi = self.ctx.services.octopoes.get_object(boefje_task.organization, boefje_task.input_ooi) - if not ooi: - self.logger.info( - "OOI does not exist anymore, skipping and disabling schedule", - ooi_primary_key=boefje_task.input_ooi, - schedule_id=schedule.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - ) - schedule.enabled = False - self.ctx.datastores.schedule_store.update_schedule(schedule) - continue - except ExternalServiceError as exc_ooi: - self.logger.error( - "Could not get ooi %s from octopoes", - boefje_task.input_ooi, - ooi_primary_key=boefje_task.input_ooi, - schedule_id=schedule.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - exc_info=exc_ooi, - ) - continue - - # Boefje still consuming ooi type? - if ooi.object_type not in plugin.consumes: + # Plugin still enabled? + if not plugin.enabled: self.logger.debug( - "Boefje does not consume ooi anymore, skipping", + "Boefje is disabled, skipping", boefje_id=boefje_task.boefje.id, - ooi_primary_key=ooi.primary_key, - organisation_id=self.organisation.id, + schedule_id=schedule.id, scheduler_id=self.scheduler_id, ) schedule.enabled = False self.ctx.datastores.schedule_store.update_schedule(schedule) continue - # TODO: do we want to disable the schedule when a - # boefje is not allowed to scan an ooi? - - # Boefje allowed to scan ooi? - if not self.has_boefje_permission_to_run(plugin, ooi): - self.logger.info( - "Boefje not allowed to scan ooi, skipping and disabling schedule", - boefje_id=boefje_task.boefje.id, - ooi_primary_key=ooi.primary_key, + # Plugin a boefje? + if plugin.type != "boefje": + # We don't disable the schedule, since we should've gotten + # schedules for boefjes only. + self.logger.warning( + "Plugin is not a boefje, skipping", + plugin_id=plugin.id, schedule_id=schedule.id, - organisation_id=self.organisation.id, + organisation_id=schedule.organisation, scheduler_id=self.scheduler_id, ) - schedule.enabled = False - self.ctx.datastores.schedule_store.update_schedule(schedule) continue - new_boefje_task = BoefjeTask( - boefje=Boefje.model_validate(plugin.dict()), - input_ooi=ooi.primary_key if ooi else None, - organization=self.organisation.id, - ) + # When the boefje task has an ooi, we need to do some additional + # checks. + ooi = None + if boefje_task.input_ooi: + # OOI still exists? + ooi = self.ctx.services.octopoes.get_object(boefje_task.organization, boefje_task.input_ooi) + if not ooi: + self.logger.info( + "OOI does not exist anymore, skipping and disabling schedule", + ooi_primary_key=boefje_task.input_ooi, + schedule_id=schedule.id, + organisation_id=schedule.organisation, + scheduler_id=self.scheduler_id, + ) + schedule.enabled = False + self.ctx.datastores.schedule_store.update_schedule(schedule) + continue - executor.submit(self.push_boefje_task, new_boefje_task, self.push_tasks_for_rescheduling.__name__) + # Boefje still consuming ooi type? + if ooi.object_type not in plugin.consumes: + self.logger.debug( + "Boefje does not consume ooi anymore, skipping", + boefje_id=boefje_task.boefje.id, + ooi_primary_key=ooi.primary_key, + organisation_id=schedule.organisation, + scheduler_id=self.scheduler_id, + ) + schedule.enabled = False + self.ctx.datastores.schedule_store.update_schedule(schedule) + continue - @tracer.start_as_current_span("boefje_push_task") - def push_boefje_task(self, boefje_task: BoefjeTask, create_schedule: bool = True, caller: str = "") -> None: - """Given a Boefje and OOI create a BoefjeTask and push it onto - the queue. + # TODO: do we want to disable the schedule when a + # boefje is not allowed to scan an ooi? - Args: - boefje: Boefje to run. - ooi: OOI to run Boefje on. - caller: The name of the function that called this function, used for logging. + # Boefje allowed to scan ooi? + if not self.has_boefje_permission_to_run(plugin, ooi): + self.logger.info( + "Boefje not allowed to scan ooi, skipping and disabling schedule", + boefje_id=boefje_task.boefje.id, + ooi_primary_key=ooi.primary_key, + schedule_id=schedule.id, + organisation_id=schedule.organisation, + scheduler_id=self.scheduler_id, + ) + schedule.enabled = False + self.ctx.datastores.schedule_store.update_schedule(schedule) + continue - """ - self.logger.debug( - "Pushing boefje task", - task_hash=boefje_task.hash, - boefje_id=boefje_task.boefje.id, - ooi_primary_key=boefje_task.input_ooi, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - caller=caller, - ) + new_boefje_task = models.BoefjeTask( + boefje=models.Boefje.model_validate(plugin.dict()), + input_ooi=ooi.primary_key if ooi else None, + organization=schedule.organisation, + ) + except (StorageError, ValidationError, ExternalServiceError): + self.logger.exception( + "Error occurred while processing rescheduling", + schedule_id=schedule.id, + scheduler_id=self.scheduler_id, + ) + continue - try: - grace_period_passed = self.has_boefje_task_grace_period_passed(boefje_task) - if not grace_period_passed: - self.logger.debug( - "Task has not passed grace period: %s", - boefje_task.hash, - task_hash=boefje_task.hash, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - caller=caller, + executor.submit( + self.push_boefje_task, + new_boefje_task, + schedule.organisation, + self.create_schedule, + self.process_rescheduling.__name__, ) - return - except Exception as exc_grace_period: - self.logger.warning( - "Could not check if grace period has passed: %s", + + @exception_handler + @tracer.start_as_current_span("BoefjeScheduler.push_boefje_task") + def push_boefje_task( + self, boefje_task: models.BoefjeTask, organisation_id: str, create_schedule: bool = True, caller: str = "" + ) -> None: + grace_period_passed = self.has_boefje_task_grace_period_passed(boefje_task) + if not grace_period_passed: + self.logger.debug( + "Task has not passed grace period: %s", boefje_task.hash, task_hash=boefje_task.hash, - organisation_id=self.organisation.id, scheduler_id=self.scheduler_id, caller=caller, - exc_info=exc_grace_period, ) return - try: - is_stalled = self.has_boefje_task_stalled(boefje_task) - if is_stalled: - self.logger.debug( - "Task is stalled: %s", - boefje_task.hash, - task_hash=boefje_task.hash, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - caller=caller, - ) - - # Update task in datastore to be failed - task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(boefje_task.hash) - task_db.status = TaskStatus.FAILED - self.ctx.datastores.task_store.update_task(task_db) - except Exception as exc_stalled: - self.logger.warning( - "Could not check if task is stalled: %s", + is_stalled = self.has_boefje_task_stalled(boefje_task) + if is_stalled: + self.logger.debug( + "Task is stalled: %s", boefje_task.hash, - boefje_task_hash=boefje_task.hash, - organisation_id=self.organisation.id, + task_hash=boefje_task.hash, scheduler_id=self.scheduler_id, caller=caller, - exc_info=exc_stalled, ) - return - try: - is_running = self.has_boefje_task_started_running(boefje_task) - if is_running: - self.logger.debug( - "Task is still running: %s", - boefje_task.hash, - task_hash=boefje_task.hash, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - caller=caller, - ) - return - except Exception as exc_running: - self.logger.warning( - "Could not check if task is running: %s", + # Update task in datastore to be failed + task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(boefje_task.hash) + task_db.status = models.TaskStatus.FAILED + self.ctx.datastores.task_store.update_task(task_db) + + is_running = self.has_boefje_task_started_running(boefje_task) + if is_running: + self.logger.debug( + "Task is still running: %s", boefje_task.hash, task_hash=boefje_task.hash, - organisation_id=self.organisation.id, scheduler_id=self.scheduler_id, caller=caller, - exc_info=exc_running, ) return @@ -592,39 +403,24 @@ def push_boefje_task(self, boefje_task: BoefjeTask, create_schedule: bool = True "Task is already on queue: %s", boefje_task.hash, task_hash=boefje_task.hash, - organisation_id=self.organisation.id, scheduler_id=self.scheduler_id, caller=caller, exc_info=True, ) return - latest_task = self.ctx.datastores.task_store.get_latest_task_by_hash(boefje_task.hash) - score = self.priority_ranker.rank(SimpleNamespace(latest_task=latest_task, task=boefje_task)) - - task = Task( + task = models.Task( id=boefje_task.id, scheduler_id=self.scheduler_id, + organisation=organisation_id, type=self.ITEM_TYPE.type, - priority=score, hash=boefje_task.hash, data=boefje_task.model_dump(), ) - try: - self.push_item_to_queue_with_timeout(item=task, max_tries=self.max_tries, create_schedule=create_schedule) - except QueueFullError: - self.logger.warning( - "Could not add task to queue, queue was full: %s", - boefje_task.hash, - task_hash=boefje_task.hash, - queue_qsize=self.queue.qsize(), - queue_maxsize=self.queue.maxsize, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - caller=caller, - ) - return + task.priority = self.ranker.rank(task) + + self.push_item_to_queue_with_timeout(item=task, max_tries=self.max_tries, create_schedule=create_schedule) self.logger.info( "Created boefje task", @@ -632,15 +428,15 @@ def push_boefje_task(self, boefje_task: BoefjeTask, create_schedule: bool = True task_hash=task.hash, boefje_id=boefje_task.boefje.id, ooi_primary_key=boefje_task.input_ooi, - organisation_id=self.organisation.id, scheduler_id=self.scheduler_id, + organisation_id=organisation_id, caller=caller, ) - def push_item_to_queue(self, item: Task, create_schedule: bool = True) -> Task: + def push_item_to_queue(self, item: models.Task, create_schedule: bool = True) -> models.Task: """Some boefje scheduler specific logic before pushing the item to the queue.""" - boefje_task = BoefjeTask.model_validate(item.data) + boefje_task = models.BoefjeTask.model_validate(item.data) # Check if id's are unique and correctly set. Same id's are necessary # for the task runner. @@ -652,8 +448,7 @@ def push_item_to_queue(self, item: Task, create_schedule: bool = True) -> Task: return super().push_item_to_queue(item=item, create_schedule=create_schedule) - @tracer.start_as_current_span("boefje_has_boefje_permission_to_run") - def has_boefje_permission_to_run(self, boefje: Plugin, ooi: OOI) -> bool: + def has_boefje_permission_to_run(self, boefje: models.Plugin, ooi: models.OOI) -> bool: """Checks whether a boefje is allowed to run on an ooi. Args: @@ -665,22 +460,14 @@ def has_boefje_permission_to_run(self, boefje: Plugin, ooi: OOI) -> bool: """ if boefje.enabled is False: self.logger.debug( - "Boefje: %s is disabled", - boefje.name, - boefje_id=boefje.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, + "Boefje: %s is disabled", boefje.name, boefje_id=boefje.id, scheduler_id=self.scheduler_id ) return False boefje_scan_level = boefje.scan_level if boefje_scan_level is None: self.logger.warning( - "No scan level found for boefje: %s", - boefje.id, - boefje_id=boefje.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, + "No scan level found for boefje: %s", boefje.id, boefje_id=boefje.id, scheduler_id=self.scheduler_id ) return False @@ -693,7 +480,6 @@ def has_boefje_permission_to_run(self, boefje: Plugin, ooi: OOI) -> bool: "No scan_profile found for ooi: %s", ooi.primary_key, ooi_primary_key=ooi.primary_key, - organisation_id=self.organisation.id, scheduler_id=self.scheduler_id, ) return False @@ -704,7 +490,6 @@ def has_boefje_permission_to_run(self, boefje: Plugin, ooi: OOI) -> bool: "No scan level found for ooi: %s", ooi.primary_key, ooi_primary_key=ooi.primary_key, - organisation_id=self.organisation.id, scheduler_id=self.scheduler_id, ) return False @@ -722,15 +507,13 @@ def has_boefje_permission_to_run(self, boefje: Plugin, ooi: OOI) -> bool: ooi_scan_level, boefje_id=boefje.id, ooi_primary_key=ooi.primary_key, - organisation_id=self.organisation.id, scheduler_id=self.scheduler_id, ) return False return True - @tracer.start_as_current_span("boefje_has_boefje_task_started_running") - def has_boefje_task_started_running(self, task: BoefjeTask) -> bool: + def has_boefje_task_started_running(self, task: models.BoefjeTask) -> bool: """Check if the same task is already running. Args: @@ -740,44 +523,17 @@ def has_boefje_task_started_running(self, task: BoefjeTask) -> bool: True if the task is still running, False otherwise. """ # Is task still running according to the datastore? - task_db = None - try: - task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(task.hash) - except Exception as exc_db: - self.logger.error( - "Could not get latest task by hash: %s", - task.hash, - task_id=task.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - exc_info=exc_db, - ) - raise exc_db - - if task_db is not None and task_db.status not in [TaskStatus.FAILED, TaskStatus.COMPLETED]: + task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(task.hash) + if task_db is not None and task_db.status not in [models.TaskStatus.FAILED, models.TaskStatus.COMPLETED]: self.logger.debug( - "Task is still running, according to the datastore", - task_id=task_db.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, + "Task is still running, according to the datastore", task_id=task_db.id, scheduler_id=self.scheduler_id ) return True # Is task running according to bytes? - try: - task_bytes = self.ctx.services.bytes.get_last_run_boefje( - boefje_id=task.boefje.id, input_ooi=task.input_ooi, organization_id=task.organization - ) - except ExternalServiceError as exc: - self.logger.error( - "Failed to get last run boefje from bytes", - boefje_id=task.boefje.id, - input_ooi_primary_key=task.input_ooi, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - exc_info=exc, - ) - raise exc + task_bytes = self.ctx.services.bytes.get_last_run_boefje( + boefje_id=task.boefje.id, input_ooi=task.input_ooi, organization_id=task.organization + ) # Task has been finished (failed, or succeeded) according to # the datastore, but we have no results of it in bytes, meaning @@ -786,7 +542,7 @@ def has_boefje_task_started_running(self, task: BoefjeTask) -> bool: if ( task_bytes is None and task_db is not None - and task_db.status in [TaskStatus.COMPLETED, TaskStatus.FAILED] + and task_db.status in [models.TaskStatus.COMPLETED, models.TaskStatus.FAILED] and ( task_db.modified_at is not None and task_db.modified_at @@ -798,24 +554,19 @@ def has_boefje_task_started_running(self, task: BoefjeTask) -> bool: "please review the bytes logs for more information regarding " "this error.", task_id=task_db.id, - organisation_id=self.organisation.id, scheduler_id=self.scheduler_id, ) raise RuntimeError("Task has been finished, but no results found in bytes") if task_bytes is not None and task_bytes.ended_at is None and task_bytes.started_at is not None: self.logger.debug( - "Task is still running, according to bytes", - task_id=task_bytes.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, + "Task is still running, according to bytes", task_id=task_bytes.id, scheduler_id=self.scheduler_id ) return True return False - @tracer.start_as_current_span("boefje_is_task_stalled") - def has_boefje_task_stalled(self, task: BoefjeTask) -> bool: + def has_boefje_task_stalled(self, task: models.BoefjeTask) -> bool: """Check if the same task is stalled. Args: @@ -824,23 +575,10 @@ def has_boefje_task_stalled(self, task: BoefjeTask) -> bool: Returns: True if the task is stalled, False otherwise. """ - task_db = None - try: - task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(task.hash) - except Exception as exc_db: - self.logger.warning( - "Could not get latest task by hash: %s", - task.hash, - task_hash=task.hash, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - exc_info=exc_db, - ) - raise exc_db - + task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(task.hash) if ( task_db is not None - and task_db.status == TaskStatus.DISPATCHED + and task_db.status == models.TaskStatus.DISPATCHED and ( task_db.modified_at is not None and datetime.now(timezone.utc) @@ -851,8 +589,7 @@ def has_boefje_task_stalled(self, task: BoefjeTask) -> bool: return False - @tracer.start_as_current_span("boefje_has_boefje_task_grace_period_passed") - def has_boefje_task_grace_period_passed(self, task: BoefjeTask) -> bool: + def has_boefje_task_grace_period_passed(self, task: models.BoefjeTask) -> bool: """Check if the grace period has passed for a task in both the datastore and bytes. @@ -866,24 +603,13 @@ def has_boefje_task_grace_period_passed(self, task: BoefjeTask) -> bool: True if the grace period has passed, False otherwise. """ # Does boefje have an interval specified? - plugin = self.ctx.services.katalogus.get_plugin_by_id_and_org_id(task.boefje.id, self.organisation.id) + plugin = self.ctx.services.katalogus.get_plugin_by_id_and_org_id(task.boefje.id, task.organization) if plugin is not None and plugin.interval is not None and plugin.interval > 0: timeout = timedelta(minutes=plugin.interval) else: timeout = timedelta(seconds=self.ctx.config.pq_grace_period) - try: - task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(task.hash) - except Exception as exc_db: - self.logger.warning( - "Could not get latest task by hash: %s", - task.hash, - task_hash=task.hash, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - exc_info=exc_db, - ) - raise exc_db + task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(task.hash) # Has grace period passed according to datastore? if task_db is not None and datetime.now(timezone.utc) - task_db.modified_at < timeout: @@ -891,24 +617,13 @@ def has_boefje_task_grace_period_passed(self, task: BoefjeTask) -> bool: "Task has not passed grace period, according to the datastore", task_id=task_db.id, task_hash=task.hash, - organisation_id=self.organisation.id, scheduler_id=self.scheduler_id, ) return False - try: - task_bytes = self.ctx.services.bytes.get_last_run_boefje( - boefje_id=task.boefje.id, input_ooi=task.input_ooi, organization_id=task.organization - ) - except ExternalServiceError as exc_bytes: - self.logger.error( - "Failed to get last run boefje from bytes", - boefje_id=task.boefje.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - exc_info=exc_bytes, - ) - raise exc_bytes + task_bytes = self.ctx.services.bytes.get_last_run_boefje( + boefje_id=task.boefje.id, input_ooi=task.input_ooi, organization_id=task.organization + ) # Did the grace period pass, according to bytes? if ( @@ -920,14 +635,13 @@ def has_boefje_task_grace_period_passed(self, task: BoefjeTask) -> bool: "Task has not passed grace period, according to bytes", task_id=task_bytes.id, task_hash=task.hash, - organisation_id=self.organisation.id, scheduler_id=self.scheduler_id, ) return False return True - def get_boefjes_for_ooi(self, ooi: OOI) -> list[Plugin]: + def get_boefjes_for_ooi(self, ooi: models.OOI, organisation: str) -> list[models.Plugin]: """Get available all boefjes (enabled and disabled) for an ooi. Args: @@ -936,24 +650,13 @@ def get_boefjes_for_ooi(self, ooi: OOI) -> list[Plugin]: Returns: A list of Plugin of type Boefje that can be run on the ooi. """ - try: - boefjes = self.ctx.services.katalogus.get_boefjes_by_type_and_org_id(ooi.object_type, self.organisation.id) - except ExternalServiceError: - self.logger.error( - "Could not get boefjes for object_type: %s", - ooi.object_type, - object_type=ooi.object_type, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - ) - return [] + boefjes = self.ctx.services.katalogus.get_boefjes_by_type_and_org_id(ooi.object_type, organisation) if boefjes is None: self.logger.debug( "No boefjes found for type: %s", ooi.object_type, input_ooi_primary_key=ooi.primary_key, - organisation_id=self.organisation.id, scheduler_id=self.scheduler_id, ) return [] @@ -964,30 +667,52 @@ def get_boefjes_for_ooi(self, ooi: OOI) -> list[Plugin]: ooi, input_ooi_primary_key=ooi.primary_key, boefjes=[boefje.id for boefje in boefjes], - organisation_id=self.organisation.id, scheduler_id=self.scheduler_id, ) return boefjes - def set_cron(self, item: Task) -> str | None: + def get_oois_for_boefje(self, boefje: models.Plugin, organisation: str) -> list[models.OOI]: + oois = [] + + oois_by_object_type = self.ctx.services.octopoes.get_objects_by_object_types( + organisation, + boefje.consumes, + list(range(boefje.scan_level, 5)), # type: ignore + ) + + # Filter OOIs based on permission + for ooi in oois_by_object_type: + if not self.has_boefje_permission_to_run(boefje, ooi): + self.logger.debug( + "Boefje not allowed to run on ooi", + boefje_id=boefje.id, + ooi_primary_key=ooi.primary_key, + scheduler_id=self.scheduler_id, + ) + continue + oois.append(ooi) + + return oois + + def set_cron(self, item: models.Task) -> str | None: """Override Schedule.set_cron() when a boefje specifies a schedule for execution (cron expression) we schedule for its execution""" # Does a boefje have a schedule defined? plugin = self.ctx.services.katalogus.get_plugin_by_id_and_org_id( - utils.deep_get(item.data, ["boefje", "id"]), self.organisation.id + utils.deep_get(item.data, ["boefje", "id"]), item.organisation ) if plugin is None or plugin.cron is None: return super().set_cron(item) return plugin.cron - def calculate_deadline(self, task: Task) -> datetime: + def calculate_deadline(self, task: models.Task) -> datetime: """Override Scheduler.calculate_deadline() to calculate the deadline for a task and based on the boefje interval.""" # Does the boefje have an interval defined? plugin = self.ctx.services.katalogus.get_plugin_by_id_and_org_id( - utils.deep_get(task.data, ["boefje", "id"]), self.organisation.id + utils.deep_get(task.data, ["boefje", "id"]), task.organisation ) if plugin is not None and plugin.interval is not None and plugin.interval > 0: return datetime.now(timezone.utc) + timedelta(minutes=plugin.interval) diff --git a/mula/scheduler/schedulers/schedulers/normalizer.py b/mula/scheduler/schedulers/schedulers/normalizer.py index d1dff6e7c56..ff0918ef991 100644 --- a/mula/scheduler/schedulers/schedulers/normalizer.py +++ b/mula/scheduler/schedulers/schedulers/normalizer.py @@ -1,62 +1,39 @@ import uuid -from collections.abc import Callable from concurrent import futures from types import SimpleNamespace -from typing import Any +from typing import Any, Literal -import structlog from opentelemetry import trace +from pydantic import ValidationError from scheduler import clients, context, models from scheduler.clients.errors import ExternalServiceError -from scheduler.models import Normalizer, NormalizerTask, Organisation, Plugin, RawDataReceivedEvent, Task, TaskStatus -from scheduler.schedulers import Scheduler -from scheduler.schedulers.queue import PriorityQueue, QueueFullError -from scheduler.schedulers.rankers import NormalizerRanker +from scheduler.schedulers import Scheduler, rankers +from scheduler.schedulers.errors import exception_handler tracer = trace.get_tracer(__name__) class NormalizerScheduler(Scheduler): - """A KAT specific implementation of a Normalizer scheduler. It extends - the `Scheduler` class by adding a `organisation` attribute. + """Scheduler implementation for the creation of NormalizerTask models. Attributes: - logger: A logger instance. - organisation: The organisation that this scheduler is for. + ranker: The ranker to calculate the priority of a task. """ - ITEM_TYPE: Any = NormalizerTask - - def __init__( - self, - ctx: context.AppContext, - scheduler_id: str, - organisation: Organisation, - queue: PriorityQueue | None = None, - callback: Callable[..., None] | None = None, - ): - self.logger: structlog.BoundLogger = structlog.getLogger(__name__) - self.organisation: Organisation = organisation - - self.queue = queue or PriorityQueue( - pq_id=scheduler_id, - maxsize=ctx.config.pq_maxsize, - item_type=self.ITEM_TYPE, - allow_priority_updates=True, - pq_store=ctx.datastores.pq_store, - ) + ID: Literal["normalizer"] = "normalizer" + TYPE: models.SchedulerType = models.SchedulerType.NORMALIZER + ITEM_TYPE: Any = models.NormalizerTask - super().__init__( - ctx=ctx, - queue=self.queue, - scheduler_id=scheduler_id, - callback=callback, - create_schedule=False, - auto_calculate_deadline=False, - ) + def __init__(self, ctx: context.AppContext): + """Initializes the NormalizerScheduler. - self.ranker = NormalizerRanker(ctx=self.ctx) + Args: + ctx (context.AppContext): Application context of shared data (e.g. + configuration, external services connections). + """ + super().__init__(ctx=ctx, scheduler_id=self.ID, create_schedule=False, auto_calculate_deadline=False) + self.ranker = rankers.NormalizerRanker(ctx=self.ctx) def run(self) -> None: """The run method is called when the scheduler is started. It will @@ -68,166 +45,113 @@ def run(self) -> None: for each normalizer that is registered for the mime type of the raw file. """ - listener = clients.RawData( + self.listeners["raw_data"] = clients.RawData( dsn=str(self.ctx.config.host_raw_data), - queue=f"{self.organisation.id}__raw_file_received", - func=self.push_tasks_for_received_raw_data, + queue="raw_file_received", + func=self.process_raw_data, prefetch_count=self.ctx.config.rabbitmq_prefetch_count, ) - self.listeners["raw_data"] = listener - - self.run_in_thread( - name=f"NormalizerScheduler-{self.scheduler_id}-raw_file", - target=self.listeners["raw_data"].listen, - loop=False, - ) + self.run_in_thread(name="NormalizerScheduler-raw_file", target=self.listeners["raw_data"].listen, loop=False) self.logger.info( - "Normalizer scheduler started for %s", - self.organisation.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - item_type=self.queue.item_type.__name__, + "Normalizer scheduler started", scheduler_id=self.scheduler_id, item_type=self.queue.item_type.__name__ ) - @tracer.start_as_current_span("normalizer_push_task_for_received_raw_data") - def push_tasks_for_received_raw_data(self, body: bytes) -> None: + @tracer.start_as_current_span("NormalizerScheduler.process_raw_data") + def process_raw_data(self, body: bytes) -> None: """Create tasks for the received raw data. Args: latest_raw_data: A `RawData` object that was received from the message queue. """ - # Convert body into a RawDataReceivedEvent - latest_raw_data = RawDataReceivedEvent.model_validate_json(body) - - self.logger.debug( - "Received raw data %s", - latest_raw_data.raw_data.id, - raw_data_id=latest_raw_data.raw_data.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - ) + try: + # Convert body into a RawDataReceivedEvent + latest_raw_data = models.RawDataReceivedEvent.model_validate_json(body) + self.logger.debug( + "Received raw data %s", + latest_raw_data.raw_data.id, + raw_data_id=latest_raw_data.raw_data.id, + scheduler_id=self.scheduler_id, + ) + except ValidationError: + self.logger.exception("Failed to validate raw data", scheduler_id=self.scheduler_id) + return # Check if the raw data doesn't contain an error mime-type, # we don't need to create normalizers when the raw data returned # an error. - for mime_type in latest_raw_data.raw_data.mime_types: - if mime_type.get("value", "").startswith("error/"): - self.logger.debug( - "Skipping raw data %s with error mime type", - latest_raw_data.raw_data.id, - mime_type=mime_type.get("value"), - raw_data_id=latest_raw_data.raw_data.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - ) - return - - # Get all normalizers for the mime types of the raw data - normalizers: dict[str, Plugin] = {} - for mime_type in latest_raw_data.raw_data.mime_types: - normalizers_by_mime_type: list[Plugin] = self.get_normalizers_for_mime_type(mime_type.get("value")) - - for normalizer in normalizers_by_mime_type: - normalizers[normalizer.id] = normalizer - - if not normalizers: + if self.has_raw_data_errors(latest_raw_data.raw_data): self.logger.debug( - "No normalizers found for raw data %s", + "Skipping raw data %s with error mime type", latest_raw_data.raw_data.id, raw_data_id=latest_raw_data.raw_data.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, ) + return - with futures.ThreadPoolExecutor( - thread_name_prefix=f"NormalizerScheduler-TPE-{self.scheduler_id}-raw_data" - ) as executor: - for normalizer in normalizers.values(): - if not self.has_normalizer_permission_to_run(normalizer): - self.logger.debug( - "Normalizer is not allowed to run: %s", - normalizer.id, - normalizer_id=normalizer.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - ) - continue - - normalizer_task = NormalizerTask( - normalizer=Normalizer.model_validate(normalizer.model_dump()), raw_data=latest_raw_data.raw_data - ) + # Get all unique normalizers for the mime types of the raw data + normalizers: dict[str, models.Plugin] = {} + for mime_type in latest_raw_data.raw_data.mime_types: + normalizers_by_mime_type = self.get_normalizers_for_mime_type( + mime_type.get("value"), latest_raw_data.organization + ) - executor.submit( - self.push_normalizer_task, normalizer_task, self.push_tasks_for_received_raw_data.__name__ - ) + self.logger.debug( + "Found normalizers for mime type", + mime_type=mime_type.get("value"), + normalizers=normalizers_by_mime_type, + ) - @tracer.start_as_current_span("normalizer_push_task") - def push_normalizer_task(self, normalizer_task: models.NormalizerTask, caller: str = "") -> None: - """Given a normalizer and raw data, create a task and push it to the - queue. + for normalizer in normalizers_by_mime_type: + normalizers[normalizer.id] = normalizer + + unique_normalizers = list(normalizers.values()) - Args: - normalizer: The normalizer to create a task for. - raw_data: The raw data to create a task for. - caller: The name of the function that called this function, used for logging. - """ self.logger.debug( - "Pushing normalizer task", - task_id=normalizer_task.id, - normalizer_id=normalizer_task.normalizer.id, - organisation_id=self.organisation.id, + "Found normalizers for raw data", + raw_data_id=latest_raw_data.raw_data.id, + mime_types=[mime_type.get("value") for mime_type in latest_raw_data.raw_data.mime_types], + normalizers=[normalizer.id for normalizer in unique_normalizers], scheduler_id=self.scheduler_id, - caller=caller, ) - try: - plugin = self.ctx.services.katalogus.get_plugin_by_id_and_org_id( - normalizer_task.normalizer.id, self.organisation.id - ) - if not self.has_normalizer_permission_to_run(plugin): + # Create tasks for the normalizers + normalizer_tasks = [] + for normalizer in unique_normalizers: + if not self.has_normalizer_permission_to_run(normalizer): self.logger.debug( - "Task is not allowed to run: %s", - normalizer_task.id, - task_id=normalizer_task.id, - organisation_id=self.organisation.id, + "Normalizer is not allowed to run: %s", + normalizer.id, + normalizer_id=normalizer.id, scheduler_id=self.scheduler_id, - caller=caller, ) - return - except ExternalServiceError: - self.logger.warning( - "Could not get plugin by id: %s", - normalizer_task.normalizer.id, - task_id=normalizer_task.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - caller=caller, + continue + + normalizer_task = models.NormalizerTask( + normalizer=models.Normalizer.model_validate(normalizer.model_dump()), raw_data=latest_raw_data.raw_data ) - return - try: - if self.has_normalizer_task_started_running(normalizer_task): - self.logger.debug( - "Task is still running: %s", - normalizer_task.id, - task_id=normalizer_task.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - caller=caller, + normalizer_tasks.append(normalizer_task) + + with futures.ThreadPoolExecutor(thread_name_prefix=f"TPE-{self.scheduler_id}-raw_data") as executor: + for normalizer_task in normalizer_tasks: + executor.submit( + self.push_normalizer_task, normalizer_task, latest_raw_data.organization, self.create_schedule ) - return - except Exception: - self.logger.warning( - "Could not check if task is running: %s", + + @exception_handler + @tracer.start_as_current_span("NormalizerScheduler.push_normalizer_task") + def push_normalizer_task( + self, normalizer_task: models.NormalizerTask, organisation_id: str, create_schedule: bool, caller: str = "" + ) -> None: + if self.has_normalizer_task_started_running(normalizer_task): + self.logger.debug( + "Task is still running: %s", normalizer_task.id, task_id=normalizer_task.id, - organisation_id=self.organisation.id, scheduler_id=self.scheduler_id, caller=caller, - exc_info=True, ) return @@ -236,37 +160,23 @@ def push_normalizer_task(self, normalizer_task: models.NormalizerTask, caller: s "Task is already on queue: %s", normalizer_task.id, task_id=normalizer_task.id, - organisation_id=self.organisation.id, scheduler_id=self.scheduler_id, caller=caller, ) return - score = self.ranker.rank(SimpleNamespace(raw_data=normalizer_task.raw_data, task=normalizer_task)) - - task = Task( + task = models.Task( id=normalizer_task.id, scheduler_id=self.scheduler_id, - type=self.ITEM_TYPE.type, - priority=score, + organisation=organisation_id, + type=normalizer_task.type, hash=normalizer_task.hash, data=normalizer_task.model_dump(), ) - try: - self.push_item_to_queue_with_timeout(item=task, max_tries=self.max_tries) - except QueueFullError: - self.logger.warning( - "Could not add task to queue, queue was full: %s", - task.id, - task_id=task.id, - queue_qsize=self.queue.qsize(), - queue_maxsize=self.queue.maxsize, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - caller=caller, - ) - return + task.priority = self.ranker.rank(SimpleNamespace(raw_data=normalizer_task.raw_data, task=normalizer_task)) + + self.push_item_to_queue_with_timeout(task, self.max_tries, create_schedule=create_schedule) self.logger.info( "Created normalizer task", @@ -274,15 +184,15 @@ def push_normalizer_task(self, normalizer_task: models.NormalizerTask, caller: s task_hash=task.hash, normalizer_id=normalizer_task.normalizer.id, raw_data_id=normalizer_task.raw_data.id, - organisation_id=self.organisation.id, scheduler_id=self.scheduler_id, + organisation_id=organisation_id, caller=caller, ) - def push_item_to_queue(self, item: Task, create_schedule: bool = True) -> Task: + def push_item_to_queue(self, item: models.Task, create_schedule: bool = True) -> models.Task: """Some normalizer scheduler specific logic before pushing the item to the queue.""" - normalizer_task = NormalizerTask.model_validate(item.data) + normalizer_task = models.NormalizerTask.model_validate(item.data) # Check if id's are unique and correctly set. Same id's are necessary # for the task runner. @@ -294,8 +204,7 @@ def push_item_to_queue(self, item: Task, create_schedule: bool = True) -> Task: return super().push_item_to_queue(item=item, create_schedule=create_schedule) - @tracer.start_as_current_span("normalizer_has_normalizer_permission_to_run") - def has_normalizer_permission_to_run(self, normalizer: Plugin) -> bool: + def has_normalizer_permission_to_run(self, normalizer: models.Plugin) -> bool: """Check if the task is allowed to run. Args: @@ -306,18 +215,13 @@ def has_normalizer_permission_to_run(self, normalizer: Plugin) -> bool: """ if not normalizer.enabled: self.logger.debug( - "Normalizer: %s is disabled", - normalizer.id, - normalizer_id=normalizer.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, + "Normalizer: %s is disabled", normalizer.id, normalizer_id=normalizer.id, scheduler_id=self.scheduler_id ) return False return True - @tracer.start_as_current_span("normalizer_has_normalizer_task_started_running") - def has_normalizer_task_started_running(self, task: NormalizerTask) -> bool: + def has_normalizer_task_started_running(self, task: models.NormalizerTask) -> bool: """Check if the same task is already running. Args: @@ -328,33 +232,32 @@ def has_normalizer_task_started_running(self, task: NormalizerTask) -> bool: """ # Get the last tasks that have run or are running for the hash # of this particular NormalizerTask. - try: - task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(task.hash) - except Exception as exc_db: - self.logger.error( - "Could not get latest task by hash: %s", - task.hash, - task_id=task.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - exc_info=exc_db, - ) - raise exc_db + task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(task.hash) # Is task still running according to the datastore? - if task_db is not None and task_db.status not in [TaskStatus.COMPLETED, TaskStatus.FAILED]: + if task_db is not None and task_db.status not in [models.TaskStatus.COMPLETED, models.TaskStatus.FAILED]: self.logger.debug( "Task is still running, according to the datastore", task_id=task_db.id, task_hash=task.hash, - organisation_id=self.organisation.id, scheduler_id=self.scheduler_id, ) return True return False - def get_normalizers_for_mime_type(self, mime_type: str) -> list[Plugin]: + def has_raw_data_errors(self, raw_data: models.RawData) -> bool: + """Check if the raw data contains errors. + + Args: + raw_data: The raw data to check. + + Returns: + True if the raw data contains errors, False otherwise. + """ + return any(mime_type.get("value", "").startswith("error/") for mime_type in raw_data.mime_types) + + def get_normalizers_for_mime_type(self, mime_type: str, organisation: str) -> list[models.Plugin]: """Get available normalizers for a given mime type. Args: @@ -364,37 +267,17 @@ def get_normalizers_for_mime_type(self, mime_type: str) -> list[Plugin]: A list of Plugins of type normalizer for the given mime type. """ try: - normalizers = self.ctx.services.katalogus.get_normalizers_by_org_id_and_type( - self.organisation.id, mime_type - ) + normalizers = self.ctx.services.katalogus.get_normalizers_by_org_id_and_type(organisation, mime_type) except ExternalServiceError: - self.logger.warning( - "Could not get normalizers for mime_type: %s [mime_type=%s, organisation_id=%s, scheduler_id=%s]", - mime_type, - mime_type, - self.organisation.id, - self.scheduler_id, - ) - return [] - - if normalizers is None: - self.logger.debug( - "No normalizer found for mime_type: %s", + self.logger.error( + "Failed to get normalizers for mime type %s", mime_type, mime_type=mime_type, - organisation_id=self.organisation.id, scheduler_id=self.scheduler_id, ) return [] - self.logger.debug( - "Found %d normalizers for mime_type: %s", - len(normalizers), - mime_type, - mime_type=mime_type, - normalizers=[normalizer.id for normalizer in normalizers], - organisation_=self.organisation.id, - scheduler_id=self.scheduler_id, - ) + if normalizers is None: + return [] return normalizers diff --git a/mula/scheduler/schedulers/schedulers/report.py b/mula/scheduler/schedulers/schedulers/report.py index 05f4d5d3e43..3c58c1518e6 100644 --- a/mula/scheduler/schedulers/schedulers/report.py +++ b/mula/scheduler/schedulers/schedulers/report.py @@ -1,176 +1,97 @@ -from collections.abc import Callable from concurrent import futures from datetime import datetime, timezone -from typing import Any +from typing import Any, Literal -import structlog from opentelemetry import trace -from scheduler import context, storage -from scheduler.models import Organisation, ReportTask, Task, TaskStatus +from scheduler import context, models from scheduler.schedulers import Scheduler -from scheduler.schedulers.queue import PriorityQueue, QueueFullError +from scheduler.schedulers.errors import exception_handler from scheduler.storage import filters tracer = trace.get_tracer(__name__) class ReportScheduler(Scheduler): - ITEM_TYPE: Any = ReportTask - - def __init__( - self, - ctx: context.AppContext, - scheduler_id: str, - organisation: Organisation, - queue: PriorityQueue | None = None, - callback: Callable[..., None] | None = None, - ): - self.logger: structlog.BoundLogger = structlog.get_logger(__name__) - self.organisation = organisation - self.queue = queue or PriorityQueue( - pq_id=scheduler_id, - maxsize=ctx.config.pq_maxsize, - item_type=self.ITEM_TYPE, - allow_priority_updates=True, - pq_store=ctx.datastores.pq_store, - ) + """Scheduler implementation for the creation of ReportTask models.""" - super().__init__( - ctx=ctx, - queue=self.queue, - scheduler_id=scheduler_id, - callback=callback, - create_schedule=True, - auto_calculate_deadline=False, - ) + ID: Literal["report"] = "report" + TYPE: models.SchedulerType = models.SchedulerType.REPORT + ITEM_TYPE: Any = models.ReportTask + + def __init__(self, ctx: context.AppContext): + """Initializes the NormalizerScheduler. + + Args: + ctx (context.AppContext): Application context of shared data (e.g. + configuration, external services connections). + """ + super().__init__(ctx=ctx, scheduler_id=self.ID, create_schedule=True, auto_calculate_deadline=False) def run(self) -> None: + """The run method is called when the schedulers is started. It will + start the rescheduling process for the ReportTask models that are + scheduled. + """ # Rescheduling - self.run_in_thread( - name=f"scheduler-{self.scheduler_id}-reschedule", target=self.push_tasks_for_rescheduling, interval=60.0 + self.run_in_thread(name="ReportScheduler-rescheduling", target=self.process_rescheduling, interval=60.0) + self.logger.info( + "Report scheduler started", scheduler_id=self.scheduler_id, item_type=self.queue.item_type.__name__ ) - @tracer.start_as_current_span(name="report_push_tasks_for_rescheduling") - def push_tasks_for_rescheduling(self): - if self.queue.full(): - self.logger.warning( - "Report queue is full, not populating with new tasks", - queue_qsize=self.queue.qsize(), - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, + @tracer.start_as_current_span(name="ReportScheduler.process_rescheduling") + def process_rescheduling(self): + schedules, _ = self.ctx.datastores.schedule_store.get_schedules( + filters=filters.FilterRequest( + filters=[ + filters.Filter(column="scheduler_id", operator="eq", value=self.scheduler_id), + filters.Filter(column="deadline_at", operator="lt", value=datetime.now(timezone.utc)), + filters.Filter(column="enabled", operator="eq", value=True), + ] ) - return - - try: - schedules, _ = self.ctx.datastores.schedule_store.get_schedules( - filters=filters.FilterRequest( - filters=[ - filters.Filter(column="scheduler_id", operator="eq", value=self.scheduler_id), - filters.Filter(column="deadline_at", operator="lt", value=datetime.now(timezone.utc)), - filters.Filter(column="enabled", operator="eq", value=True), - ] - ) - ) - except storage.errors.StorageError as exc_db: - self.logger.error( - "Could not get schedules for rescheduling %s", - self.scheduler_id, - scheduler_id=self.scheduler_id, - organisation_id=self.organisation.id, - exc_info=exc_db, - ) - raise exc_db - - with futures.ThreadPoolExecutor( - thread_name_prefix=f"ReportScheduler-TPE-{self.scheduler_id}-rescheduling" - ) as executor: - for schedule in schedules: - report_task = ReportTask.model_validate(schedule.data) - executor.submit(self.push_report_task, report_task, self.push_tasks_for_rescheduling.__name__) - - def push_report_task(self, report_task: ReportTask, caller: str = "") -> None: - self.logger.debug( - "Pushing report task", - task_hash=report_task.hash, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - caller=caller, ) - if self.has_report_task_started_running(report_task): - self.logger.debug( - "Report task already running", - task_hash=report_task.hash, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - caller=caller, - ) - return + # Create report tasks for the schedules + report_tasks = [] + for schedule in schedules: + report_task = models.ReportTask.model_validate(schedule.data) + report_tasks.append(report_task) + + with futures.ThreadPoolExecutor(thread_name_prefix=f"TPE-{self.scheduler_id}-rescheduling") as executor: + for report_task in report_tasks: + executor.submit( + self.push_report_task, + report_task, + report_task.organisation_id, + self.create_schedule, + self.process_rescheduling.__name__, + ) + @exception_handler + @tracer.start_as_current_span("ReportScheduler.push_report_task") + def push_report_task( + self, report_task: models.ReportTask, organisation_id: str, create_schedule: bool, caller: str = "" + ) -> None: if self.is_item_on_queue_by_hash(report_task.hash): - self.logger.debug( - "Report task already on queue", - task_hash=report_task.hash, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - caller=caller, - ) + self.logger.debug("Report task already on queue", scheduler_id=self.scheduler_id, caller=caller) return - task = Task( + task = models.Task( scheduler_id=self.scheduler_id, + organisation=organisation_id, priority=int(datetime.now().timestamp()), type=self.ITEM_TYPE.type, hash=report_task.hash, data=report_task.model_dump(), ) - try: - self.push_item_to_queue_with_timeout(task, self.max_tries) - except QueueFullError: - self.logger.warning( - "Could not add task %s to queue, queue was full", - report_task.hash, - task_hash=report_task.hash, - queue_qsize=self.queue.qsize(), - queue_maxsize=self.queue.maxsize, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - caller=caller, - ) - return + self.push_item_to_queue_with_timeout(task, self.max_tries) self.logger.info( - "Report task pushed to queue", + "Created report task", task_id=task.id, - task_hash=report_task.hash, - organisation_id=self.organisation.id, + task_hash=task.hash, scheduler_id=self.scheduler_id, + organisation_id=organisation_id, caller=caller, ) - - def has_report_task_started_running(self, task: ReportTask) -> bool: - task_db = None - try: - task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(task.hash) - except storage.errors.StorageError as exc_db: - self.logger.error( - "Could not get latest task by hash %s", - task.hash, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - exc_info=exc_db, - ) - raise exc_db - - if task_db is not None and task_db.status not in [TaskStatus.FAILED, TaskStatus.COMPLETED]: - self.logger.debug( - "Task is still running, according to the datastore", - task_id=task_db.id, - organisation_id=self.organisation.id, - scheduler_id=self.scheduler_id, - ) - return True - - return False diff --git a/mula/scheduler/server/handlers/__init__.py b/mula/scheduler/server/handlers/__init__.py index 302806efaa3..2aea97fa01f 100644 --- a/mula/scheduler/server/handlers/__init__.py +++ b/mula/scheduler/server/handlers/__init__.py @@ -1,6 +1,5 @@ from .health import HealthAPI from .metrics import MetricsAPI -from .queues import QueueAPI from .root import RootAPI from .schedulers import SchedulerAPI from .schedules import ScheduleAPI diff --git a/mula/scheduler/server/handlers/queues.py b/mula/scheduler/server/handlers/queues.py deleted file mode 100644 index 461c897c5e9..00000000000 --- a/mula/scheduler/server/handlers/queues.py +++ /dev/null @@ -1,103 +0,0 @@ -from typing import Any - -import fastapi -import structlog -from fastapi import status - -from scheduler import context, models, schedulers, storage -from scheduler.schedulers.queue import NotAllowedError, QueueEmptyError, QueueFullError -from scheduler.server import serializers -from scheduler.server.errors import BadRequestError, ConflictError, NotFoundError, TooManyRequestsError - - -class QueueAPI: - def __init__(self, api: fastapi.FastAPI, ctx: context.AppContext, s: dict[str, schedulers.Scheduler]) -> None: - self.logger: structlog.BoundLogger = structlog.getLogger(__name__) - self.api: fastapi.FastAPI = api - self.ctx: context.AppContext = ctx - self.schedulers: dict[str, schedulers.Scheduler] = s - - self.api.add_api_route( - path="/queues", - endpoint=self.list, - methods=["GET"], - response_model=list[models.Queue], - response_model_exclude_unset=True, - status_code=status.HTTP_200_OK, - description="List all queues", - ) - - self.api.add_api_route( - path="/queues/{queue_id}", - endpoint=self.get, - methods=["GET"], - response_model=models.Queue, - status_code=status.HTTP_200_OK, - description="Get a queue", - ) - - self.api.add_api_route( - path="/queues/{queue_id}/pop", - endpoint=self.pop, - methods=["POST"], - response_model=models.Task | None, - status_code=status.HTTP_200_OK, - description="Pop an item from a queue", - ) - - self.api.add_api_route( - path="/queues/{queue_id}/push", - endpoint=self.push, - methods=["POST"], - response_model=models.Task | None, - status_code=status.HTTP_201_CREATED, - description="Push an item to a queue", - ) - - def list(self) -> Any: - return [models.Queue(**s.queue.dict(include_pq=False)) for s in self.schedulers.copy().values()] - - def get(self, queue_id: str) -> Any: - s = self.schedulers.get(queue_id) - if s is None: - raise NotFoundError(f"queue not found, by queue_id: {queue_id}") - - return models.Queue(**s.queue.dict()) - - def pop(self, queue_id: str, filters: storage.filters.FilterRequest | None = None) -> Any: - s = self.schedulers.get(queue_id) - if s is None: - raise NotFoundError(f"queue not found, by queue_id: {queue_id}") - - try: - item = s.pop_item_from_queue(filters) - except QueueEmptyError: - return None - - if item is None: - raise NotFoundError("could not pop item from queue, check your filters") - - return models.Task(**item.model_dump()) - - def push(self, queue_id: str, item_in: serializers.Task) -> Any: - s = self.schedulers.get(queue_id) - if s is None: - raise NotFoundError(f"queue not found, by queue_id: {queue_id}") - - # Load default values - new_item = models.Task(**item_in.model_dump(exclude_unset=True)) - - # Set values - if new_item.scheduler_id is None: - new_item.scheduler_id = s.scheduler_id - - try: - pushed_item = s.push_item_to_queue(new_item) - except ValueError: - raise BadRequestError("malformed item") - except QueueFullError: - raise TooManyRequestsError("queue is full") - except NotAllowedError: - raise ConflictError("queue is not allowed to push items") - - return pushed_item diff --git a/mula/scheduler/server/handlers/schedulers.py b/mula/scheduler/server/handlers/schedulers.py index 9358dcec45a..65ca2ac1c9a 100644 --- a/mula/scheduler/server/handlers/schedulers.py +++ b/mula/scheduler/server/handlers/schedulers.py @@ -4,12 +4,14 @@ import structlog from fastapi import status -from scheduler import context, models, schedulers -from scheduler.server.errors import BadRequestError, NotFoundError +from scheduler import context, models, schedulers, storage +from scheduler.schedulers.queue import NotAllowedError, QueueFullError +from scheduler.server import serializers, utils +from scheduler.server.errors import BadRequestError, ConflictError, NotFoundError, TooManyRequestsError class SchedulerAPI: - def __init__(self, api: fastapi.FastAPI, ctx: context.AppContext, s: dict[str, schedulers.Scheduler]) -> None: + def __init__(self, api: fastapi.FastAPI, ctx: context.AppContext, s: dict[str, schedulers.Scheduler]): self.logger: structlog.BoundLogger = structlog.getLogger(__name__) self.api: fastapi.FastAPI = api self.ctx: context.AppContext = ctx @@ -19,7 +21,7 @@ def __init__(self, api: fastapi.FastAPI, ctx: context.AppContext, s: dict[str, s path="/schedulers", endpoint=self.list, methods=["GET"], - response_model=list[models.Scheduler], + response_model=list[serializers.Scheduler], status_code=status.HTTP_200_OK, description="List all schedulers", ) @@ -28,51 +30,80 @@ def __init__(self, api: fastapi.FastAPI, ctx: context.AppContext, s: dict[str, s path="/schedulers/{scheduler_id}", endpoint=self.get, methods=["GET"], - response_model=models.Scheduler, + response_model=serializers.Scheduler, status_code=status.HTTP_200_OK, description="Get a scheduler", ) self.api.add_api_route( - path="/schedulers/{scheduler_id}", - endpoint=self.patch, - methods=["PATCH"], - response_model=models.Scheduler, + path="/schedulers/{scheduler_id}/push", + endpoint=self.push, + methods=["POST"], + response_model=models.Task, + status_code=status.HTTP_201_CREATED, + description="Push a task to a scheduler", + ) + + self.api.add_api_route( + path="/schedulers/{scheduler_id}/pop", + endpoint=self.pop, + methods=["POST"], + response_model=utils.PaginatedResponse, status_code=status.HTTP_200_OK, - description="Update a scheduler", + description="Pop a task from a scheduler", ) - def list(self) -> Any: - return [models.Scheduler(**s.dict()) for s in self.schedulers.values()] + def list(self) -> list[serializers.Scheduler]: + return [serializers.Scheduler(**s.dict()) for s in self.schedulers.values()] def get(self, scheduler_id: str) -> Any: s = self.schedulers.get(scheduler_id) if s is None: raise NotFoundError(f"Scheduler {scheduler_id} not found") - return models.Scheduler(**s.dict()) + return serializers.Scheduler(**s.dict()) + + def pop( + self, + request: fastapi.Request, + scheduler_id: str, + offset: int = 0, + limit: int = 100, + filters: storage.filters.FilterRequest | None = None, + ) -> utils.PaginatedResponse: + results, count = self.ctx.datastores.pq_store.pop( + scheduler_id=scheduler_id, offset=offset, limit=limit, filters=filters + ) + + # Update status for popped items + self.ctx.datastores.pq_store.bulk_update_status( + scheduler_id, [item.id for item in results], models.TaskStatus.DISPATCHED + ) + + return utils.paginate(request, results, count, offset, limit) - def patch(self, scheduler_id: str, item: models.Scheduler) -> Any: + def push(self, scheduler_id: str, item: serializers.TaskPush) -> Any: s = self.schedulers.get(scheduler_id) if s is None: raise NotFoundError(f"Scheduler {scheduler_id} not found") - stored_scheduler_model = models.Scheduler(**s.dict()) - patch_data = item.model_dump(exclude_unset=True) - if len(patch_data) == 0: - raise BadRequestError("no data to patch") + if item.scheduler_id is not None and item.scheduler_id != scheduler_id: + raise BadRequestError("scheduler_id in item does not match the scheduler_id in the path") - updated_scheduler = stored_scheduler_model.model_copy(update=patch_data) + # Set scheduler_id if not set + if item.scheduler_id is None: + item.scheduler_id = scheduler_id - # We update the patched attributes, since the schedulers are kept - # in memory. - for attr, value in patch_data.items(): - setattr(s, attr, value) + # Load default values + new_item = models.Task(**item.model_dump(exclude_unset=True)) - # Enable or disable the scheduler if needed. - if updated_scheduler.enabled: - s.enable() - elif not updated_scheduler.enabled: - s.disable() + try: + pushed_item = s.push_item_to_queue(new_item) + except ValueError: + raise BadRequestError("malformed item") + except QueueFullError: + raise TooManyRequestsError("queue is full") + except NotAllowedError: + raise ConflictError("queue is not allowed to push items") - return updated_scheduler + return pushed_item diff --git a/mula/scheduler/server/handlers/schedules.py b/mula/scheduler/server/handlers/schedules.py index 895a50c9b24..e67fa0f9bc6 100644 --- a/mula/scheduler/server/handlers/schedules.py +++ b/mula/scheduler/server/handlers/schedules.py @@ -12,13 +12,11 @@ class ScheduleAPI: - def __init__( - self, api: fastapi.FastAPI, ctx: context.AppContext, schedulers: dict[str, schedulers.Scheduler] - ) -> None: - self.logger: structlog.BoundLogger = structlog.get_logger(__name__) - self.api = api - self.ctx = ctx - self.schedulers = schedulers + def __init__(self, api: fastapi.FastAPI, ctx: context.AppContext, s: dict[str, schedulers.Scheduler]): + self.logger: structlog.BoundLogger = structlog.getLogger(__name__) + self.api: fastapi.FastAPI = api + self.ctx: context.AppContext = ctx + self.schedulers: dict[str, schedulers.Scheduler] = s self.api.add_api_route( path="/schedules", @@ -113,8 +111,8 @@ def create(self, schedule: serializers.ScheduleCreate) -> Any: try: new_schedule = models.Schedule(**schedule.model_dump()) - except ValueError: - raise ValidationError("validation error") + except ValueError as exc: + raise ValidationError(exc) s = self.schedulers.get(new_schedule.scheduler_id) if s is None: @@ -123,8 +121,8 @@ def create(self, schedule: serializers.ScheduleCreate) -> Any: # Validate data with task type of the scheduler try: instance = s.ITEM_TYPE.model_validate(new_schedule.data) - except ValueError: - raise BadRequestError("validation error") + except ValueError as exc: + raise BadRequestError(exc) # Create hash for schedule with task type new_schedule.hash = instance.hash diff --git a/mula/scheduler/server/handlers/tasks.py b/mula/scheduler/server/handlers/tasks.py index 46b6cc7469a..ac933085b8c 100644 --- a/mula/scheduler/server/handlers/tasks.py +++ b/mula/scheduler/server/handlers/tasks.py @@ -34,14 +34,6 @@ def __init__(self, api: fastapi.FastAPI, ctx: context.AppContext) -> None: description="Get task status counts for all schedulers in last 24 hours", ) - self.api.add_api_route( - path="/tasks/stats/{scheduler_id}", - endpoint=self.stats, - methods=["GET"], - status_code=status.HTTP_200_OK, - description="Get task status counts for a scheduler in last 24 hours", - ) - self.api.add_api_route( path="/tasks/{task_id}", endpoint=self.get, @@ -163,5 +155,7 @@ def patch(self, task_id: uuid.UUID, item: serializers.Task) -> Any: return updated_task - def stats(self, scheduler_id: str | None = None) -> dict[str, dict[str, int]] | None: - return self.ctx.datastores.task_store.get_status_count_per_hour(scheduler_id) + def stats( + self, scheduler_id: str | None = None, organisation_id: str | None = None + ) -> dict[str, dict[str, int]] | None: + return self.ctx.datastores.task_store.get_status_count_per_hour(scheduler_id, organisation_id) diff --git a/mula/scheduler/server/serializers/__init__.py b/mula/scheduler/server/serializers/__init__.py index a4d3c0b20c4..ac706a15163 100644 --- a/mula/scheduler/server/serializers/__init__.py +++ b/mula/scheduler/server/serializers/__init__.py @@ -1,2 +1,3 @@ from .schedule import ScheduleCreate, SchedulePatch -from .task import Task, TaskStatus +from .scheduler import Scheduler +from .task import Task, TaskPush, TaskStatus diff --git a/mula/scheduler/server/serializers/schedule.py b/mula/scheduler/server/serializers/schedule.py index 5e3c0a0bbb9..e614b623f50 100644 --- a/mula/scheduler/server/serializers/schedule.py +++ b/mula/scheduler/server/serializers/schedule.py @@ -7,11 +7,9 @@ class ScheduleCreate(BaseModel): model_config = ConfigDict(from_attributes=True) scheduler_id: str - + organisation: str data: dict - schedule: str | None = None - deadline_at: datetime | None = None @@ -20,11 +18,7 @@ class SchedulePatch(BaseModel): model_config = ConfigDict(from_attributes=True) hash: str | None = Field(None, max_length=32) - data: dict | None = None - enabled: bool | None = None - schedule: str | None = None - deadline_at: datetime | None = None diff --git a/mula/scheduler/server/serializers/scheduler.py b/mula/scheduler/server/serializers/scheduler.py new file mode 100644 index 00000000000..f267e98909d --- /dev/null +++ b/mula/scheduler/server/serializers/scheduler.py @@ -0,0 +1,11 @@ +from datetime import datetime + +from pydantic import BaseModel + + +class Scheduler(BaseModel): + id: str + type: str + item_type: str + qsize: int = 0 + last_activity: datetime | None = None diff --git a/mula/scheduler/server/serializers/task.py b/mula/scheduler/server/serializers/task.py index 3a4e6fc3846..cc2aafbfdac 100644 --- a/mula/scheduler/server/serializers/task.py +++ b/mula/scheduler/server/serializers/task.py @@ -34,21 +34,20 @@ class Task(BaseModel): model_config = ConfigDict(from_attributes=True, use_enum_values=True) id: uuid.UUID | None = None - scheduler_id: str | None = None - schedule_id: uuid.UUID | None = None - + organisation: str | None = None priority: int | None = None - status: TaskStatus | None = None - type: str | None = None - hash: str | None = None - data: dict | None = None - created_at: datetime | None = None - modified_at: datetime | None = None + + +class TaskPush(BaseModel): + scheduler_id: str | None = None + organisation: str + priority: int | None = None + data: dict diff --git a/mula/scheduler/server/server.py b/mula/scheduler/server/server.py index b39cf1fca5c..2c08ebcc156 100644 --- a/mula/scheduler/server/server.py +++ b/mula/scheduler/server/server.py @@ -19,7 +19,7 @@ class Server: api: A fastapi.FastAPI object used for exposing API endpoints. """ - def __init__(self, ctx: context.AppContext, s: dict[str, schedulers.Scheduler]): + def __init__(self, ctx: context.AppContext, s: dict[str, schedulers.Scheduler]) -> None: """Initializer of the Server class. Args: @@ -45,7 +45,6 @@ def __init__(self, ctx: context.AppContext, s: dict[str, schedulers.Scheduler]): # Set up API endpoints handlers.SchedulerAPI(self.api, self.ctx, s) - handlers.QueueAPI(self.api, self.ctx, s) handlers.ScheduleAPI(self.api, self.ctx, s) handlers.TaskAPI(self.api, self.ctx) handlers.MetricsAPI(self.api, self.ctx) diff --git a/mula/scheduler/storage/connection.py b/mula/scheduler/storage/connection.py index dc381191528..4787afe44bf 100644 --- a/mula/scheduler/storage/connection.py +++ b/mula/scheduler/storage/connection.py @@ -10,7 +10,7 @@ class DBConn: def __init__(self, dsn: str, pool_size: int = 25): - self.logger: structlog.BoundLogger = structlog.get_logger(__name__) + self.logger: structlog.BoundLogger = structlog.getLogger(__name__) self.dsn = dsn self.pool_size = pool_size diff --git a/mula/scheduler/storage/migrations/versions/0009_add_organisation.py b/mula/scheduler/storage/migrations/versions/0009_add_organisation.py new file mode 100644 index 00000000000..5136c2a4a15 --- /dev/null +++ b/mula/scheduler/storage/migrations/versions/0009_add_organisation.py @@ -0,0 +1,48 @@ +"""Add organisation column to schedules and tasks + +Revision ID: 0009 +Revises: 0008 +Create Date: 2024-12-10 15:21:27.445743 + +""" + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "0009" +down_revision = "0008" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("schedules", sa.Column("organisation", sa.String(), nullable=True)) + op.add_column("tasks", sa.Column("organisation", sa.String(), nullable=True)) + + conn = op.get_bind() + conn.execute( + sa.text( + """ +UPDATE schedules SET organisation = data->>'organization' WHERE data->>'organization' IS NOT NULL; +UPDATE schedules SET organisation = data->'raw_data'->'boefje_meta'->>'organization' WHERE data->'raw_data'->'boefje_meta'->>'organization' IS NOT NULL; +UPDATE schedules SET organisation = data->>'organisation_id' WHERE data->>'organisation_id' IS NOT NULL; + +UPDATE tasks SET organisation = data->>'organization' WHERE type = 'boefje'; +UPDATE tasks SET organisation = data->'raw_data'->'boefje_meta'->>'organization' WHERE type = 'normalizer'; +UPDATE tasks SET organisation = data->>'organisation_id' WHERE type = 'report'; +""" # noqa: E501 + ) + ) + + op.alter_column("schedules", "organisation", nullable=False) + op.alter_column("tasks", "organisation", nullable=False) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("tasks", "organisation") + op.drop_column("schedules", "organisation") + # ### end Alembic commands ### diff --git a/mula/scheduler/storage/storage.py b/mula/scheduler/storage/storage.py deleted file mode 100644 index 7fe2f8d1438..00000000000 --- a/mula/scheduler/storage/storage.py +++ /dev/null @@ -1,52 +0,0 @@ -import json -from functools import partial - -import sqlalchemy -import structlog - -from scheduler.config import settings - -from .errors import StorageError - - -class DBConn: - def __init__(self, dsn: str, pool_size: int = 25): - self.logger: structlog.BoundLogger = structlog.getLogger(__name__) - - self.dsn = dsn - self.pool_size = pool_size - - def connect(self) -> None: - db_uri_redacted = sqlalchemy.engine.make_url(name_or_url=self.dsn).render_as_string(hide_password=True) - - pool_size = settings.Settings().db_connection_pool_size - - self.logger.debug( - "Connecting to database %s with pool size %s...", - self.dsn, - pool_size, - dsn=db_uri_redacted, - pool_size=pool_size, - ) - - try: - serializer = partial(json.dumps, default=str) - self.engine = sqlalchemy.create_engine( - self.dsn, - pool_pre_ping=True, - pool_size=pool_size, - pool_recycle=300, - json_serializer=serializer, - connect_args={"options": "-c timezone=utc"}, - ) - except sqlalchemy.exc.SQLAlchemyError as e: - self.logger.error("Failed to connect to database %s: %s", self.dsn, e, dsn=db_uri_redacted) - raise StorageError("Failed to connect to database.") - - self.logger.debug("Connected to database %s.", db_uri_redacted, dsn=db_uri_redacted) - - try: - self.session = sqlalchemy.orm.sessionmaker(bind=self.engine) - except sqlalchemy.exc.SQLAlchemyError as e: - self.logger.error("Failed to create session: %s", e) - raise StorageError("Failed to create session.") diff --git a/mula/scheduler/storage/stores/pq.py b/mula/scheduler/storage/stores/pq.py index feb62bd01c7..b7c8951225c 100644 --- a/mula/scheduler/storage/stores/pq.py +++ b/mula/scheduler/storage/stores/pq.py @@ -1,8 +1,10 @@ from uuid import UUID +from sqlalchemy import exc + from scheduler import models from scheduler.storage import DBConn -from scheduler.storage.errors import exception_handler +from scheduler.storage.errors import StorageError, exception_handler from scheduler.storage.filters import FilterRequest, apply_filter from scheduler.storage.utils import retry @@ -15,25 +17,33 @@ def __init__(self, dbconn: DBConn) -> None: @retry() @exception_handler - def pop(self, scheduler_id: str, filters: FilterRequest | None = None) -> models.Task | None: + def pop( + self, scheduler_id: str | None = None, offset: int = 0, limit: int = 100, filters: FilterRequest | None = None + ) -> tuple[list[models.Task], int]: with self.dbconn.session.begin() as session: - query = ( - session.query(models.TaskDB) - .filter(models.TaskDB.status == models.TaskStatus.QUEUED) - .order_by(models.TaskDB.priority.asc()) - .order_by(models.TaskDB.created_at.asc()) - .filter(models.TaskDB.scheduler_id == scheduler_id) - ) + query = session.query(models.TaskDB).filter(models.TaskDB.status == models.TaskStatus.QUEUED) + + if scheduler_id is not None: + query = query.filter(models.TaskDB.scheduler_id == scheduler_id) if filters is not None: query = apply_filter(models.TaskDB, query, filters) - item_orm = query.first() + try: + count = query.count() + item_orm = ( + query.order_by(models.TaskDB.priority.asc()) + .order_by(models.TaskDB.created_at.asc()) + .offset(offset) + .limit(limit) + .all() + ) + except exc.ProgrammingError as e: + raise StorageError(f"Invalid filter: {e}") from e - if item_orm is None: - return None + items = [models.Task.model_validate(item_orm) for item_orm in item_orm] - return models.Task.model_validate(item_orm) + return items, count @retry() @exception_handler @@ -188,3 +198,14 @@ def clear(self, scheduler_id: str) -> None: .filter(models.TaskDB.scheduler_id == scheduler_id) .delete(), ) + + @retry() + @exception_handler + def bulk_update_status(self, scheduler_id: str, item_ids: list[UUID], status: models.TaskStatus) -> None: + with self.dbconn.session.begin() as session: + ( + session.query(models.TaskDB) + .filter(models.TaskDB.scheduler_id == scheduler_id) + .filter(models.TaskDB.id.in_([str(item_id) for item_id in item_ids])) + .update({"status": status.name}, synchronize_session=False), + ) diff --git a/mula/scheduler/storage/stores/task.py b/mula/scheduler/storage/stores/task.py index 437e10ca538..d2a83adcfe8 100644 --- a/mula/scheduler/storage/stores/task.py +++ b/mula/scheduler/storage/stores/task.py @@ -136,7 +136,9 @@ def cancel_tasks(self, scheduler_id: str, task_ids: list[str]) -> None: @retry() @exception_handler - def get_status_count_per_hour(self, scheduler_id: str | None = None) -> dict[str, dict[str, int]] | None: + def get_status_count_per_hour( + self, scheduler_id: str | None = None, organisation_id: str | None = None + ) -> dict[str, dict[str, int]] | None: with self.dbconn.session.begin() as session: query = ( session.query( @@ -152,6 +154,9 @@ def get_status_count_per_hour(self, scheduler_id: str | None = None) -> dict[str if scheduler_id is not None: query = query.filter(models.TaskDB.scheduler_id == scheduler_id) + if organisation_id is not None: + query = query.filter(models.TaskDB.organisation == organisation_id) + results = query.all() response: dict[str, dict[str, int]] = {} @@ -166,7 +171,9 @@ def get_status_count_per_hour(self, scheduler_id: str | None = None) -> dict[str @retry() @exception_handler - def get_status_counts(self, scheduler_id: str | None = None) -> dict[str, int] | None: + def get_status_counts( + self, scheduler_id: str | None = None, organisation_id: str | None = None + ) -> dict[str, int] | None: with self.dbconn.session.begin() as session: query = ( session.query(models.TaskDB.status, func.count(models.TaskDB.id).label("count")) @@ -177,6 +184,9 @@ def get_status_counts(self, scheduler_id: str | None = None) -> dict[str, int] | if scheduler_id is not None: query = query.filter(models.TaskDB.scheduler_id == scheduler_id) + if organisation_id is not None: + query = query.filter(models.TaskDB.organisation == organisation_id) + results = query.all() response = {k.value: 0 for k in models.TaskStatus} diff --git a/mula/scheduler/utils/dict_utils.py b/mula/scheduler/utils/dict_utils.py index af5bda651cf..6084ec058c9 100644 --- a/mula/scheduler/utils/dict_utils.py +++ b/mula/scheduler/utils/dict_utils.py @@ -34,6 +34,10 @@ def get(self, key: str, default: Any | None = None) -> Any: except KeyError: return default + def is_empty(self) -> bool: + with self.lock: + return len(self.cache) == 0 + def reset(self) -> None: with self.lock: self.cache.clear() diff --git a/mula/tests/integration/test_api.py b/mula/tests/integration/test_api.py index 6eaa82086c2..0c927d6a3dd 100644 --- a/mula/tests/integration/test_api.py +++ b/mula/tests/integration/test_api.py @@ -64,7 +64,7 @@ def tearDown(self): self.dbconn.engine.dispose() -class APITestCase(APITemplateTestCase): +class APISchedulerEndpointTestCase(APITemplateTestCase): def test_get_schedulers(self): response = self.client.get("/schedulers") self.assertEqual(response.status_code, 200) @@ -78,78 +78,12 @@ def test_get_scheduler_malformed_id(self): response = self.client.get("/schedulers/123.123") self.assertEqual(response.status_code, 404) - def test_patch_scheduler(self): - self.assertTrue(self.scheduler.is_enabled()) - response = self.client.patch(f"/schedulers/{self.scheduler.scheduler_id}", json={"enabled": False}) - self.assertEqual(200, response.status_code) - self.assertFalse(response.json().get("enabled")) - self.assertFalse(self.scheduler.is_enabled()) - - def test_patch_scheduler_attr_not_found(self): - response = self.client.patch(f"/schedulers/{self.scheduler.scheduler_id}", json={"not_found": "not found"}) - self.assertEqual(response.status_code, 400) - self.assertEqual(response.json(), {"detail": "Bad request error occurred: no data to patch"}) - - def test_patch_scheduler_not_found(self): - mock_id = uuid.uuid4() - response = self.client.patch(f"/schedulers/{mock_id}", json={"enabled": False}) - self.assertEqual(response.status_code, 404) - self.assertEqual(response.json(), {"detail": f"Resource not found: Scheduler {mock_id} not found"}) - - def test_patch_scheduler_disable(self): - self.assertTrue(self.scheduler.is_enabled()) - response = self.client.patch(f"/schedulers/{self.scheduler.scheduler_id}", json={"enabled": False}) - self.assertEqual(200, response.status_code) - self.assertFalse(response.json().get("enabled")) - self.assertFalse(self.scheduler.is_enabled()) - - # Try to push to queue - item = create_task_in(0) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=item) - self.assertNotEqual(response.status_code, 201) - self.assertEqual(0, self.scheduler.queue.qsize()) - - def test_patch_scheduler_enable(self): - # Disable queue first - self.assertTrue(self.scheduler.is_enabled()) - response = self.client.patch(f"/schedulers/{self.scheduler.scheduler_id}", json={"enabled": False}) - self.assertEqual(200, response.status_code) - self.assertFalse(response.json().get("enabled")) - self.assertFalse(self.scheduler.is_enabled()) - - # Enable again - response = self.client.patch(f"/schedulers/{self.scheduler.scheduler_id}", json={"enabled": True}) - self.assertEqual(200, response.status_code) - self.assertTrue(response.json().get("enabled")) - self.assertTrue(self.scheduler.is_enabled()) - - # Try to push to queue - self.assertEqual(0, self.scheduler.queue.qsize()) - item = create_task_in(1) - - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=item) - self.assertEqual(response.status_code, 201) - self.assertEqual(1, self.scheduler.queue.qsize()) - - def test_get_queues(self): - response = self.client.get("/queues") - self.assertEqual(response.status_code, 200) - - def test_get_queue(self): - response = self.client.get(f"/queues/{self.scheduler.scheduler_id}") - self.assertEqual(response.status_code, 200) - self.assertEqual(response.json().get("id"), self.scheduler.scheduler_id) - - def test_get_queue_malformed_id(self): - response = self.client.get("/queues/123.123") - self.assertEqual(response.status_code, 404) - def test_push_queue(self): self.assertEqual(0, self.scheduler.queue.qsize()) - item = create_task_in(1) + item = create_task_in(1, self.organisation.id) - response_post = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=item) + response_post = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=item) self.assertEqual(201, response_post.status_code) self.assertEqual(1, self.scheduler.queue.qsize()) self.assertIsNotNone(response_post.json().get("id")) @@ -166,23 +100,24 @@ def test_push_queue(self): def test_push_incorrect_item_type(self): response = self.client.post( - f"/queues/{self.scheduler.scheduler_id}/push", json={"priority": 0, "item": "not a task"} + f"/schedulers/{self.scheduler.scheduler_id}/push", json={"organisation": self.organisation.id, "data": {}} ) self.assertEqual(response.status_code, 400) + self.assertEqual(response.json(), {"detail": "Bad request error occurred: malformed item"}) def test_push_queue_full(self): # Set maxsize of the queue to 1 self.scheduler.queue.maxsize = 1 # Add one task to the queue - first_item = create_task_in(1) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=first_item) + first_item = create_task_in(1, self.organisation.id) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=first_item) self.assertEqual(response.status_code, 201) self.assertEqual(1, self.scheduler.queue.qsize()) # Try to add another task to the queue through the api - second_item = create_task_in(2) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=second_item) + second_item = create_task_in(2, self.organisation.id) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=second_item) self.assertEqual(response.status_code, 429) self.assertEqual(1, self.scheduler.queue.qsize()) @@ -191,14 +126,14 @@ def test_push_queue_full_high_priority(self): self.scheduler.queue.maxsize = 1 # Add one task to the queue - first_item = create_task_in(1) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=first_item) + first_item = create_task_in(1, self.organisation.id) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=first_item) self.assertEqual(response.status_code, 201) self.assertEqual(1, self.scheduler.queue.qsize()) # Try to add another task to the queue through the api - second_item = create_task_in(1) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=second_item) + second_item = create_task_in(1, self.organisation.id) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=second_item) self.assertEqual(response.status_code, 201) self.assertEqual(2, self.scheduler.queue.qsize()) @@ -212,13 +147,13 @@ def test_push_replace_not_allowed(self): self.scheduler.queue.allow_priority_updates = False # Add one task to the queue - initial_item = create_task_in(1) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=initial_item) + initial_item = create_task_in(1, self.organisation.id) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=initial_item) self.assertEqual(response.status_code, 201) self.assertEqual(1, self.scheduler.queue.qsize()) # Add the same item again through the api - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=initial_item) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=initial_item) # The queue should still have one item self.assertEqual(response.status_code, 409) @@ -230,13 +165,13 @@ def test_push_replace_allowed(self): self.scheduler.queue.allow_replace = True # Add one task to the queue - initial_item = create_task_in(1) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=initial_item) + initial_item = create_task_in(1, self.organisation.id) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=initial_item) self.assertEqual(response.status_code, 201) self.assertEqual(1, self.scheduler.queue.qsize()) # Add the same item again through the api - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", json=response.json()) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", json=response.json()) # The queue should have one item self.assertEqual(response.status_code, 201) @@ -252,8 +187,8 @@ def test_push_updates_not_allowed(self): self.scheduler.queue.allow_priority_updates = False # Add one task to the queue - initial_item = create_task_in(1) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=initial_item) + initial_item = create_task_in(1, self.organisation.id) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=initial_item) self.assertEqual(response.status_code, 201) self.assertEqual(1, self.scheduler.queue.qsize()) @@ -262,7 +197,9 @@ def test_push_updates_not_allowed(self): updated_item.data["name"] = "updated-name" # Try to update the item through the api - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=updated_item.model_dump_json()) + response = self.client.post( + f"/schedulers/{self.scheduler.scheduler_id}/push", data=updated_item.model_dump_json() + ) # The queue should still have one item self.assertEqual(response.status_code, 409) @@ -274,8 +211,8 @@ def test_push_updates_allowed(self): self.scheduler.queue.allow_updates = True # Add one task to the queue - initial_item = create_task_in(1) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=initial_item) + initial_item = create_task_in(1, self.organisation.id) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=initial_item) self.assertEqual(response.status_code, 201) self.assertEqual(1, self.scheduler.queue.qsize()) @@ -284,7 +221,9 @@ def test_push_updates_allowed(self): updated_item.data["name"] = "updated-name" # Try to update the item through the api - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=updated_item.model_dump_json()) + response = self.client.post( + f"/schedulers/{self.scheduler.scheduler_id}/push", data=updated_item.model_dump_json() + ) self.assertEqual(response.status_code, 201) # The queue should have one item @@ -301,8 +240,8 @@ def test_push_priority_updates_not_allowed(self): self.scheduler.queue.allow_priority_updates = False # Add one task to the queue - initial_item = create_task_in(1) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=initial_item) + initial_item = create_task_in(1, self.organisation.id) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=initial_item) self.assertEqual(response.status_code, 201) self.assertEqual(1, self.scheduler.queue.qsize()) @@ -311,7 +250,9 @@ def test_push_priority_updates_not_allowed(self): updated_item.priority = 2 # Try to update the item through the api - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=updated_item.model_dump_json()) + response = self.client.post( + f"/schedulers/{self.scheduler.scheduler_id}/push", data=updated_item.model_dump_json() + ) # The queue should still have one item self.assertEqual(response.status_code, 409) @@ -328,8 +269,8 @@ def test_update_priority_higher(self): self.scheduler.queue.allow_priority_updates = True # Add one task to the queue - initial_item = create_task_in(2) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=initial_item) + initial_item = create_task_in(2, self.organisation.id) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=initial_item) self.assertEqual(response.status_code, 201) # Update priority of the item @@ -337,7 +278,9 @@ def test_update_priority_higher(self): updated_item.priority = 1 # Try to update the item through the api - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=updated_item.model_dump_json()) + response = self.client.post( + f"/schedulers/{self.scheduler.scheduler_id}/push", data=updated_item.model_dump_json() + ) self.assertEqual(response.status_code, 201) # The queue should have one item @@ -356,8 +299,8 @@ def test_update_priority_lower(self): self.scheduler.queue.allow_priority_updates = True # Add one task to the queue - initial_item = create_task_in(1) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=initial_item) + initial_item = create_task_in(1, self.organisation.id) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=initial_item) self.assertEqual(response.status_code, 201) # Update priority of the item @@ -365,7 +308,9 @@ def test_update_priority_lower(self): updated_item.priority = 2 # Try to update the item through the api - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=updated_item.model_dump_json()) + response = self.client.post( + f"/schedulers/{self.scheduler.scheduler_id}/push", data=updated_item.model_dump_json() + ) self.assertEqual(response.status_code, 201) # The queue should have one item @@ -376,135 +321,227 @@ def test_update_priority_lower(self): def test_pop_queue(self): # Add one task to the queue - initial_item = create_task_in(1) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=initial_item) + initial_item = create_task_in(1, self.organisation.id) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=initial_item) initial_item_id = response.json().get("id") self.assertEqual(response.status_code, 201) self.assertEqual(1, self.scheduler.queue.qsize()) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/pop") + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/pop") self.assertEqual(200, response.status_code) - self.assertEqual(initial_item_id, response.json().get("id")) + self.assertEqual(1, response.json().get("count")) + self.assertEqual(initial_item_id, response.json().get("results")[0].get("id")) self.assertEqual(0, self.scheduler.queue.qsize()) + # Status of the item should be DISPATCHED + get_item = self.client.get(f"/tasks/{initial_item_id}") + self.assertEqual(get_item.json().get("status"), models.TaskStatus.DISPATCHED.name.lower()) + + def test_pop_queue_multiple(self): + # Add one task to the queue + first_item = create_task_in(1, self.organisation.id) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=first_item) + first_item_id = response.json().get("id") + self.assertEqual(response.status_code, 201) + self.assertEqual(1, self.scheduler.queue.qsize()) + + # Add second item to the queue + second_item = create_task_in(2, self.organisation.id) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=second_item) + second_item_id = response.json().get("id") + self.assertEqual(response.status_code, 201) + self.assertEqual(2, self.scheduler.queue.qsize()) + + # Should get two items, and queue should be empty + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/pop") + self.assertEqual(200, response.status_code) + self.assertEqual(2, response.json().get("count")) + self.assertEqual(first_item_id, response.json().get("results")[0].get("id")) + self.assertEqual(second_item_id, response.json().get("results")[1].get("id")) + self.assertEqual(0, self.scheduler.queue.qsize()) + + # Status of the items should be DISPATCHED + get_first_item = self.client.get(f"/tasks/{first_item_id}") + get_second_item = self.client.get(f"/tasks/{second_item_id}") + self.assertEqual(get_first_item.json().get("status"), models.TaskStatus.DISPATCHED.name.lower()) + self.assertEqual(get_second_item.json().get("status"), models.TaskStatus.DISPATCHED.name.lower()) + + def test_pop_queue_multiple_pagination(self): + # Add 10 tasks to the queue + for i in range(10): + item = create_task_in(1, self.organisation.id) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=item) + self.assertEqual(response.status_code, 201) + + # Should get 5 items, and queue should have 5 items + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/pop?limit=5") + self.assertEqual(200, response.status_code) + self.assertEqual(10, response.json().get("count")) + self.assertEqual(5, self.scheduler.queue.qsize()) + self.assertEqual(5, len(response.json().get("results"))) + + # Status of the items should be DISPATCHED + for item in response.json().get("results"): + get_item = self.client.get(f"/tasks/{item.get('id')}") + self.assertEqual(get_item.json().get("status"), models.TaskStatus.DISPATCHED.name.lower()) + + # Should get 5 items, and queue should be empty + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/pop?limit=5") + self.assertEqual(200, response.status_code) + self.assertEqual(5, response.json().get("count")) + self.assertEqual(0, self.scheduler.queue.qsize()) + + # Status of the items should be DISPATCHED + for item in response.json().get("results"): + get_item = self.client.get(f"/tasks/{item.get('id')}") + self.assertEqual(get_item.json().get("status"), models.TaskStatus.DISPATCHED.name.lower()) + def test_pop_queue_not_found(self): mock_id = uuid.uuid4() - response = self.client.post(f"/queues/{mock_id}/pop") - self.assertEqual(404, response.status_code) - self.assertEqual({"detail": f"Resource not found: queue not found, by queue_id: {mock_id}"}, response.json()) + response = self.client.post(f"/schedulers/{mock_id}/pop") + self.assertEqual(200, response.status_code) + self.assertEqual(0, response.json().get("count")) - def test_pop_queue_filters(self): + def test_pop_queue_filters_two_items(self): # Add one task to the queue - first_item = create_task_in(1, data=functions.TestModel(id="123", name="test")) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=first_item) + first_item = create_task_in(1, self.organisation.id, data=functions.TestModel(id="123", name="test")) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=first_item) first_item_id = response.json().get("id") self.assertEqual(response.status_code, 201) self.assertEqual(1, self.scheduler.queue.qsize()) # Add second item to the queue - second_item = create_task_in(2, data=functions.TestModel(id="456", name="test")) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=second_item) + second_item = create_task_in(2, self.organisation.id, data=functions.TestModel(id="456", name="test")) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=second_item) second_item_id = response.json().get("id") self.assertEqual(response.status_code, 201) self.assertEqual(2, self.scheduler.queue.qsize()) - # Should get the first item + # Should get two items, and queue should be empty response = self.client.post( - f"/queues/{self.scheduler.scheduler_id}/pop", + f"/schedulers/{self.scheduler.scheduler_id}/pop", json={"filters": [{"column": "data", "field": "name", "operator": "eq", "value": "test"}]}, ) self.assertEqual(200, response.status_code) - self.assertEqual(first_item_id, response.json().get("id")) + self.assertEqual(2, response.json().get("count")) + self.assertEqual(first_item_id, response.json().get("results")[0].get("id")) + self.assertEqual(second_item_id, response.json().get("results")[1].get("id")) + self.assertEqual(0, self.scheduler.queue.qsize()) + + # Status of the items should be DISPATCHED + get_first_item = self.client.get(f"/tasks/{first_item_id}") + get_second_item = self.client.get(f"/tasks/{second_item_id}") + self.assertEqual(get_first_item.json().get("status"), models.TaskStatus.DISPATCHED.name.lower()) + self.assertEqual(get_second_item.json().get("status"), models.TaskStatus.DISPATCHED.name.lower()) + + def test_pop_queue_filters_one_item(self): + # Add one task to the queue + first_item = create_task_in(1, self.organisation.id, data=functions.TestModel(id="123", name="test")) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=first_item) + first_item_id = response.json().get("id") + self.assertEqual(response.status_code, 201) self.assertEqual(1, self.scheduler.queue.qsize()) - # Should not return any items + # Add second item to the queue + second_item = create_task_in(2, self.organisation.id, data=functions.TestModel(id="456", name="test")) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=second_item) + second_item_id = response.json().get("id") + self.assertEqual(response.status_code, 201) + self.assertEqual(2, self.scheduler.queue.qsize()) + + # Should get the first item, and should still be an item on the queue response = self.client.post( - f"/queues/{self.scheduler.scheduler_id}/pop", + f"/schedulers/{self.scheduler.scheduler_id}/pop", json={"filters": [{"column": "data", "field": "id", "operator": "eq", "value": "123"}]}, ) - self.assertEqual(404, response.status_code) - self.assertEqual( - response.json(), {"detail": "Resource not found: could not pop item from queue, check your filters"} - ) + self.assertEqual(200, response.status_code) + self.assertEqual(1, response.json().get("count")) + self.assertEqual(first_item_id, response.json().get("results")[0].get("id")) self.assertEqual(1, self.scheduler.queue.qsize()) - # Should get the second item + # Should get the second item, and should be no items on the queue response = self.client.post( - f"/queues/{self.scheduler.scheduler_id}/pop", - json={"filters": [{"column": "data", "field": "name", "operator": "eq", "value": "test"}]}, + f"/schedulers/{self.scheduler.scheduler_id}/pop", + json={"filters": [{"column": "data", "field": "id", "operator": "eq", "value": "456"}]}, ) self.assertEqual(200, response.status_code) - self.assertEqual(second_item_id, response.json().get("id")) + self.assertEqual(1, response.json().get("count")) + self.assertEqual(second_item_id, response.json().get("results")[0].get("id")) self.assertEqual(0, self.scheduler.queue.qsize()) def test_pop_queue_filters_nested(self): # Add one task to the queue - first_item = create_task_in(1, data=functions.TestModel(id="123", name="test", categories=["foo", "bar"])) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=first_item) + first_item = create_task_in( + 1, self.organisation.id, data=functions.TestModel(id="123", name="test", categories=["foo", "bar"]) + ) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=first_item) first_item_id = response.json().get("id") self.assertEqual(response.status_code, 201) self.assertEqual(1, self.scheduler.queue.qsize()) # Add second item to the queue - second_item = create_task_in(2, data=functions.TestModel(id="456", name="test", categories=["baz", "bat"])) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=second_item) + second_item = create_task_in( + 2, self.organisation.id, data=functions.TestModel(id="456", name="test", categories=["baz", "bat"]) + ) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=second_item) second_item_id = response.json().get("id") self.assertEqual(response.status_code, 201) self.assertEqual(2, self.scheduler.queue.qsize()) # Should get the first item response = self.client.post( - f"/queues/{self.scheduler.scheduler_id}/pop", + f"/schedulers/{self.scheduler.scheduler_id}/pop", json={ "filters": [{"column": "data", "operator": "@>", "value": json.dumps({"categories": ["foo", "bar"]})}] }, ) self.assertEqual(200, response.status_code) - self.assertEqual(first_item_id, response.json().get("id")) + self.assertEqual(first_item_id, response.json().get("results")[0].get("id")) self.assertEqual(1, self.scheduler.queue.qsize()) # Should not return any items response = self.client.post( - f"/queues/{self.scheduler.scheduler_id}/pop", + f"/schedulers/{self.scheduler.scheduler_id}/pop", json={ "filters": [{"column": "data", "operator": "@>", "value": json.dumps({"categories": ["foo", "bar"]})}] }, ) - - self.assertEqual(404, response.status_code) - self.assertEqual( - response.json(), {"detail": "Resource not found: could not pop item from queue, check your filters"} - ) + self.assertEqual(200, response.status_code) + self.assertEqual(0, response.json().get("count")) self.assertEqual(1, self.scheduler.queue.qsize()) # Should get the second item response = self.client.post( - f"/queues/{self.scheduler.scheduler_id}/pop", + f"/schedulers/{self.scheduler.scheduler_id}/pop", json={ "filters": [{"column": "data", "operator": "@>", "value": json.dumps({"categories": ["baz", "bat"]})}] }, ) self.assertEqual(200, response.status_code) - self.assertEqual(second_item_id, response.json().get("id")) + self.assertEqual(second_item_id, response.json().get("results")[0].get("id")) self.assertEqual(0, self.scheduler.queue.qsize()) def test_pop_queue_filters_nested_contained_by(self): # Add one task to the queue - first_item = create_task_in(1, data=functions.TestModel(id="123", name="test", categories=["foo", "bar"])) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=first_item) + first_item = create_task_in( + 1, self.organisation.id, data=functions.TestModel(id="123", name="test", categories=["foo", "bar"]) + ) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=first_item) self.assertEqual(response.status_code, 201) self.assertEqual(1, self.scheduler.queue.qsize()) # Add second item to the queue - second_item = create_task_in(2, data=functions.TestModel(id="456", name="test", categories=["baz", "bat"])) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=second_item) + second_item = create_task_in( + 2, self.organisation.id, data=functions.TestModel(id="456", name="test", categories=["baz", "bat"]) + ) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=second_item) second_item_id = response.json().get("id") self.assertEqual(response.status_code, 201) self.assertEqual(2, self.scheduler.queue.qsize()) # Test contained by response = self.client.post( - f"/queues/{self.scheduler.scheduler_id}/pop", + f"/schedulers/{self.scheduler.scheduler_id}/pop", json={ "filters": [ {"column": "data", "operator": "<@", "field": "categories", "value": json.dumps(["baz", "bat"])} @@ -513,13 +550,14 @@ def test_pop_queue_filters_nested_contained_by(self): ) self.assertEqual(200, response.status_code) - self.assertEqual(second_item_id, response.json().get("id")) + self.assertEqual(second_item_id, response.json().get("results")[0].get("id")) self.assertEqual(1, self.scheduler.queue.qsize()) def test_pop_empty(self): """When queue is empty it should return an empty response""" - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/pop") + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/pop") self.assertEqual(200, response.status_code) + self.assertEqual(0, response.json().get("count")) class APITasksEndpointTestCase(APITemplateTestCase): @@ -529,9 +567,10 @@ def setUp(self): # Add one task to the queue first_item = create_task_in( 1, + self.organisation.id, data=functions.TestModel(id="123", name="test", child=functions.TestModel(id="123.123", name="test.child")), ) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=first_item) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=first_item) initial_item_id = response.json().get("id") self.assertEqual(response.status_code, 201) self.assertEqual(1, self.scheduler.queue.qsize()) @@ -539,8 +578,8 @@ def setUp(self): self.first_item_api = self.client.get(f"/tasks/{initial_item_id}").json() # Add second item to the queue - second_item = create_task_in(1, data=functions.TestModel(id="456", name="test")) - response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=second_item) + second_item = create_task_in(1, self.organisation.id, data=functions.TestModel(id="456", name="test")) + response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=second_item) second_item_id = response.json().get("id") self.assertEqual(response.status_code, 201) self.assertEqual(2, self.scheduler.queue.qsize()) @@ -548,8 +587,8 @@ def setUp(self): self.second_item_api = self.client.get(f"/tasks/{second_item_id}").json() def test_create_task(self): - item = create_task_in(1) - response_post = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=item) + item = create_task_in(1, self.organisation.id) + response_post = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=item) self.assertEqual(201, response_post.status_code) initial_item_id = response_post.json().get("id") @@ -574,9 +613,9 @@ def test_get_tasks(self): def test_get_task(self): # First add a task - item = create_task_in(1) + item = create_task_in(1, self.organisation.id) - response_post = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=item) + response_post = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=item) self.assertEqual(201, response_post.status_code) initial_item_id = response_post.json().get("id") @@ -732,7 +771,10 @@ def test_get_tasks_stats(self): response = self.client.get("/tasks/stats") self.assertEqual(200, response.status_code) - response = self.client.get(f"/tasks/stats/{self.first_item_api.get('scheduler_id')}") + response = self.client.get(f"/tasks/stats?scheduler_id={self.first_item_api.get('scheduler_id')}") + self.assertEqual(200, response.status_code) + + response = self.client.get(f"/tasks/stats?organisation_id={self.first_item_api.get('organisation_id')}") self.assertEqual(200, response.status_code) @@ -740,20 +782,22 @@ class APIScheduleEndpointTestCase(APITemplateTestCase): def setUp(self): super().setUp() - self.first_item = functions.create_item(self.scheduler.scheduler_id, 1) + self.first_item = functions.create_task(self.scheduler.scheduler_id, self.organisation.id) self.first_schedule = self.mock_ctx.datastores.schedule_store.create_schedule( models.Schedule( scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, hash=self.first_item.hash, data=self.first_item.data, deadline_at=datetime.now(timezone.utc) + timedelta(days=1), ) ) - self.second_item = functions.create_item(self.scheduler.scheduler_id, 1) + self.second_item = functions.create_task(self.scheduler.scheduler_id, self.organisation.id) self.second_schedule = self.mock_ctx.datastores.schedule_store.create_schedule( models.Schedule( scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, hash=self.second_item.hash, data=self.second_item.data, deadline_at=datetime.now(timezone.utc) + timedelta(days=2), @@ -886,9 +930,15 @@ def test_list_schedules_min_and_max_created_at(self): self.assertEqual(str(self.first_schedule.id), response.json()["results"][0]["id"]) def test_post_schedule(self): - item = functions.create_item(self.scheduler.scheduler_id, 1) + item = functions.create_task(self.scheduler.scheduler_id, self.organisation.id) response = self.client.post( - "/schedules", json={"scheduler_id": item.scheduler_id, "schedule": "*/5 * * * *", "data": item.data} + "/schedules", + json={ + "scheduler_id": item.scheduler_id, + "organisation": self.organisation.id, + "schedule": "*/5 * * * *", + "data": item.data, + }, ) self.assertEqual(201, response.status_code) self.assertEqual(item.hash, response.json().get("hash")) @@ -904,10 +954,16 @@ def test_post_schedule(self): def test_post_schedule_explicit_deadline_at(self): """When a schedule is created, the deadline_at should be set if it is provided.""" - item = functions.create_item(self.scheduler.scheduler_id, 1) + item = functions.create_task(self.scheduler.scheduler_id, self.organisation.id) now = datetime.now(timezone.utc) response = self.client.post( - "/schedules", json={"scheduler_id": item.scheduler_id, "data": item.data, "deadline_at": now.isoformat()} + "/schedules", + json={ + "scheduler_id": item.scheduler_id, + "organisation": self.organisation.id, + "data": item.data, + "deadline_at": now.isoformat(), + }, ) self.assertEqual(201, response.status_code) self.assertIsNone(response.json().get("schedule")) @@ -920,54 +976,92 @@ def test_post_schedule_explicit_deadline_at(self): def test_post_schedule_schedule_and_deadline_at_none(self): """When a schedule is created, both schedule and deadline_at should not be None.""" - item = functions.create_item(self.scheduler.scheduler_id, 1) - response = self.client.post("/schedules", json={"scheduler_id": item.scheduler_id, "data": item.data}) + item = functions.create_task(self.scheduler.scheduler_id, self.organisation.id) + response = self.client.post( + "/schedules", + json={"scheduler_id": item.scheduler_id, "organisation": self.organisation.id, "data": item.data}, + ) self.assertEqual(400, response.status_code) self.assertEqual( {"detail": "Bad request error occurred: Either deadline_at or schedule must be provided"}, response.json() ) def test_post_schedule_invalid_schedule(self): - item = functions.create_item(self.scheduler.scheduler_id, 1) + item = functions.create_task(self.scheduler.scheduler_id, self.organisation.id) response = self.client.post( - "/schedules", json={"scheduler_id": item.scheduler_id, "schedule": "invalid", "data": item.data} + "/schedules", + json={ + "scheduler_id": item.scheduler_id, + "organisation": self.organisation.id, + "schedule": "invalid", + "data": item.data, + }, ) self.assertEqual(400, response.status_code) self.assertIn("validation error", response.json().get("detail")) def test_post_schedule_invalid_scheduler_id(self): - item = functions.create_item(self.scheduler.scheduler_id, 1) + item = functions.create_task(self.scheduler.scheduler_id, self.organisation.id) response = self.client.post( - "/schedules", json={"scheduler_id": "invalid", "schedule": "*/5 * * * *", "data": item.data} + "/schedules", + json={ + "scheduler_id": "invalid", + "organisation": self.organisation.id, + "schedule": "*/5 * * * *", + "data": item.data, + }, ) self.assertEqual(400, response.status_code) self.assertEqual({"detail": "Bad request error occurred: Scheduler invalid not found"}, response.json()) def test_post_schedule_invalid_data(self): - item = functions.create_item(self.scheduler.scheduler_id, 1) + item = functions.create_task(self.scheduler.scheduler_id, self.organisation.id) response = self.client.post( - "/schedules", json={"scheduler_id": item.scheduler_id, "schedule": "*/5 * * * *", "data": "invalid"} + "/schedules", + json={ + "scheduler_id": item.scheduler_id, + "organisation": self.organisation.id, + "schedule": "*/5 * * * *", + "data": "invalid", + }, ) self.assertEqual(422, response.status_code) def test_post_schedule_invalid_data_type(self): - item = functions.create_item(self.scheduler.scheduler_id, 1) + item = functions.create_task(self.scheduler.scheduler_id, self.organisation.id) response = self.client.post( "/schedules", - json={"scheduler_id": item.scheduler_id, "schedule": "*/5 * * * *", "data": {"invalid": "invalid"}}, + json={ + "scheduler_id": item.scheduler_id, + "organisation": self.organisation.id, + "schedule": "*/5 * * * *", + "data": {"invalid": "invalid"}, + }, ) self.assertEqual(400, response.status_code) self.assertIn("validation error", response.json().get("detail")) def test_post_schedule_hash_already_exists(self): - item = functions.create_item(self.scheduler.scheduler_id, 1) + item = functions.create_task(self.scheduler.scheduler_id, self.organisation.id) response = self.client.post( - "/schedules", json={"scheduler_id": item.scheduler_id, "schedule": "*/5 * * * *", "data": item.data} + "/schedules", + json={ + "scheduler_id": item.scheduler_id, + "organisation": self.organisation.id, + "schedule": "*/5 * * * *", + "data": item.data, + }, ) self.assertEqual(201, response.status_code) response = self.client.post( - "/schedules", json={"scheduler_id": item.scheduler_id, "schedule": "*/5 * * * *", "data": item.data} + "/schedules", + json={ + "scheduler_id": item.scheduler_id, + "organisation": self.organisation.id, + "schedule": "*/5 * * * *", + "data": item.data, + }, ) self.assertEqual(409, response.status_code) self.assertIn("schedule with the same hash already exists", response.json().get("detail")) diff --git a/mula/tests/integration/test_app.py b/mula/tests/integration/test_app.py index b75f0576883..aa8add3bb5a 100644 --- a/mula/tests/integration/test_app.py +++ b/mula/tests/integration/test_app.py @@ -40,105 +40,15 @@ def tearDown(self): models.Base.metadata.drop_all(self.dbconn.engine) self.dbconn.engine.dispose() - def test_monitor_orgs_add(self): - """Test that when a new organisation is added, a new scheduler is created""" - # Arrange - self.mock_ctx.services.katalogus.organisations = { - "org-1": OrganisationFactory(id="org-1"), - "org-2": OrganisationFactory(id="org-2"), - } - - # Act - self.app.monitor_organisations() - - # Assert: six schedulers should have been created for two organisations - self.assertEqual(6, len(self.app.schedulers.keys())) - self.assertEqual(6, len(self.app.server.schedulers.keys())) - - scheduler_org_ids = {s.organisation.id for s in self.app.schedulers.values()} - self.assertEqual({"org-1", "org-2"}, scheduler_org_ids) - - def test_monitor_orgs_remove(self): - """Test that when an organisation is removed, the scheduler is removed""" - # Arrange - self.mock_ctx.services.katalogus.organisations = { - "org-1": OrganisationFactory(id="org-1"), - "org-2": OrganisationFactory(id="org-2"), - } - - # Act - self.app.monitor_organisations() - - # Assert: six schedulers should have been created for two organisations - self.assertEqual(6, len(self.app.schedulers.keys())) - self.assertEqual(6, len(self.app.server.schedulers.keys())) - - scheduler_org_ids = {s.organisation.id for s in self.app.schedulers.values()} - self.assertEqual({"org-1", "org-2"}, scheduler_org_ids) - - # Arrange - self.mock_ctx.services.katalogus.organisations = {} - - # Act - self.app.monitor_organisations() - - # Assert - self.assertEqual(0, len(self.app.schedulers.keys())) - self.assertEqual(0, len(self.app.server.schedulers.keys())) - - scheduler_org_ids = {s.organisation.id for s in self.app.schedulers.values()} - self.assertEqual(set(), scheduler_org_ids) - - def test_monitor_orgs_add_and_remove(self): - """Test that when an organisation is added and removed, the scheduler - is removed""" - # Arrange - self.mock_ctx.services.katalogus.organisations = { - "org-1": OrganisationFactory(id="org-1"), - "org-2": OrganisationFactory(id="org-2"), - } - - # Act - self.app.monitor_organisations() - - # Assert: six schedulers should have been created for two organisations - self.assertEqual(6, len(self.app.schedulers.keys())) - self.assertEqual(6, len(self.app.server.schedulers.keys())) - - scheduler_org_ids = {s.organisation.id for s in self.app.schedulers.values()} - self.assertEqual({"org-1", "org-2"}, scheduler_org_ids) - - # Arrange - self.mock_ctx.services.katalogus.organisations = { - "org-1": OrganisationFactory(id="org-1"), - "org-3": OrganisationFactory(id="org-3"), - } - - # Act - self.app.monitor_organisations() - - # Assert - self.assertEqual(6, len(self.app.schedulers.keys())) - self.assertEqual(6, len(self.app.server.schedulers.keys())) - - scheduler_org_ids = {s.organisation.id for s in self.app.schedulers.values()} - self.assertEqual({"org-1", "org-3"}, scheduler_org_ids) - def test_shutdown(self): """Test that the app shuts down gracefully""" # Arrange self.mock_ctx.services.katalogus.organisations = {"org-1": OrganisationFactory(id="org-1")} - self.app.start_schedulers() - self.app.start_monitors() # Shutdown the app self.app.shutdown() - # Assert that the schedulers have been stopped - for s in self.app.schedulers.copy().values(): - self.assertFalse(s.is_alive()) - # Assert that all threads have been stopped # for thread in self.app.threads: for t in threading.enumerate(): diff --git a/mula/tests/integration/test_boefje_scheduler.py b/mula/tests/integration/test_boefje_scheduler.py index 9fcc9585ce6..b6411a3eea4 100644 --- a/mula/tests/integration/test_boefje_scheduler.py +++ b/mula/tests/integration/test_boefje_scheduler.py @@ -56,10 +56,10 @@ def setUp(self): ) # Scheduler + self.scheduler = schedulers.BoefjeScheduler(self.mock_ctx) + + # Organisation self.organisation = OrganisationFactory() - self.scheduler = schedulers.BoefjeScheduler( - ctx=self.mock_ctx, scheduler_id=self.organisation.id, organisation=self.organisation - ) def tearDown(self): self.scheduler.stop() @@ -88,6 +88,21 @@ def setUp(self): def tearDown(self): mock.patch.stopall() + def test_run(self): + """When the scheduler is started, the run method should be called. + And the scheduler should start the threads. + """ + # Act + self.scheduler.run() + + # Assert: threads started + thread_ids = ["BoefjeScheduler-mutations", "BoefjeScheduler-new_boefjes", "BoefjeScheduler-rescheduling"] + for thread in self.scheduler.threads: + self.assertIn(thread.name, thread_ids) + self.assertTrue(thread.is_alive()) + + self.scheduler.stop() + def test_is_allowed_to_run(self): # Arrange scan_profile = ScanProfileFactory(level=0) @@ -156,7 +171,9 @@ def test_has_boefje_task_started_running_datastore_running(self): boefje = BoefjeFactory() boefje_task = models.BoefjeTask(boefje=boefje, input_ooi=ooi.primary_key, organization=self.organisation.id) - task = functions.create_task(scheduler_id=self.scheduler.scheduler_id, data=boefje_task) + task = functions.create_task( + scheduler_id=self.scheduler.scheduler_id, data=boefje_task, organisation=self.organisation.id + ) # Mock self.mock_get_latest_task_by_hash.return_value = task @@ -180,6 +197,7 @@ def test_has_boefje_task_started_running_datastore_not_running(self): task_db_first = models.Task( scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, priority=1, status=models.TaskStatus.COMPLETED, type=models.BoefjeTask.type, @@ -191,6 +209,7 @@ def test_has_boefje_task_started_running_datastore_not_running(self): task_db_second = models.Task( scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, priority=1, type=models.BoefjeTask.type, hash=boefje_task.hash, @@ -294,11 +313,12 @@ def test_has_boefje_task_started_running_stalled_before_grace_period(self): task_db = models.Task( scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, priority=1, + status=models.TaskStatus.DISPATCHED, type=models.BoefjeTask.type, hash=boefje_task.hash, data=boefje_task.model_dump(), - status=models.TaskStatus.DISPATCHED, created_at=datetime.now(timezone.utc), modified_at=datetime.now(timezone.utc), ) @@ -321,6 +341,7 @@ def test_has_boefje_task_started_running_stalled_after_grace_period(self): task_db = models.Task( scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, priority=1, status=models.TaskStatus.DISPATCHED, type=models.BoefjeTask.type, @@ -351,6 +372,7 @@ def test_has_boefje_task_started_running_mismatch_before_grace_period(self): task_db = models.Task( scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, priority=1, status=models.TaskStatus.COMPLETED, type=models.BoefjeTask.type, @@ -384,6 +406,7 @@ def test_has_boefje_task_started_running_mismatch_after_grace_period(self): task_db = models.Task( scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, priority=1, status=models.TaskStatus.COMPLETED, type=models.BoefjeTask.type, @@ -412,6 +435,7 @@ def test_has_boefje_task_grace_period_passed_datastore_passed(self): task_db = models.Task( scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, priority=1, status=models.TaskStatus.COMPLETED, type=models.BoefjeTask.type, @@ -443,6 +467,7 @@ def test_has_boefje_task_grace_period_passed_datastore_not_passed(self): task_db = models.Task( scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, priority=1, status=models.TaskStatus.COMPLETED, type=models.BoefjeTask.type, @@ -472,6 +497,7 @@ def test_has_boefje_task_grace_period_passed_bytes_passed(self): task_db = models.Task( scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, priority=1, status=models.TaskStatus.COMPLETED, type=models.BoefjeTask.type, @@ -507,6 +533,7 @@ def test_has_boefje_task_grace_period_passed_bytes_not_passed(self): task_db = models.Task( scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, priority=1, status=models.TaskStatus.COMPLETED, type=models.BoefjeTask.type, @@ -531,7 +558,7 @@ def test_has_boefje_task_grace_period_passed_bytes_not_passed(self): # Assert self.assertFalse(has_passed) - def test_push_task(self): + def test_push_boefje_task(self): # Arrange scan_profile = ScanProfileFactory(level=0) ooi = OOIFactory(scan_profile=scan_profile) @@ -549,12 +576,12 @@ def test_push_task(self): self.mock_get_plugin.return_value = PluginFactory(scan_level=0, consumes=[ooi.object_type]) # Act - self.scheduler.push_boefje_task(boefje_task) + self.scheduler.push_boefje_task(boefje_task, self.organisation.id) # Assert self.assertEqual(1, self.scheduler.queue.qsize()) - def test_push_task_no_ooi(self): + def test_push_boefje_task_no_ooi(self): # Arrange boefje = BoefjeFactory() @@ -568,7 +595,7 @@ def test_push_task_no_ooi(self): self.mock_get_plugin.return_value = PluginFactory(scan_level=0) # Act - self.scheduler.push_boefje_task(boefje_task) + self.scheduler.push_boefje_task(boefje_task, self.organisation.id) # Assert self.assertEqual(1, self.scheduler.queue.qsize()) @@ -578,7 +605,7 @@ def test_push_task_no_ooi(self): @mock.patch("scheduler.schedulers.BoefjeScheduler.has_boefje_task_grace_period_passed") @mock.patch("scheduler.schedulers.BoefjeScheduler.is_item_on_queue_by_hash") @mock.patch("scheduler.context.AppContext.datastores.task_store.get_latest_task_by_hash") - def test_push_task_queue_full( + def test_push_boefje_task_queue_full( self, mock_get_latest_task_by_hash, mock_is_item_on_queue_by_hash, @@ -610,15 +637,15 @@ def test_push_task_queue_full( self.mock_get_plugin.return_value = PluginFactory(scan_level=0, consumes=[ooi.object_type]) # Act - self.scheduler.push_boefje_task(boefje_task) + self.scheduler.push_boefje_task(boefje_task, self.organisation.id) # Assert self.assertEqual(1, self.scheduler.queue.qsize()) with capture_logs() as cm: - self.scheduler.push_boefje_task(boefje_task) + self.scheduler.push_boefje_task(boefje_task, self.organisation.id) - self.assertIn("Could not add task to queue, queue was full", cm[-1].get("event")) + self.assertIn("Queue is full", cm[-1].get("event")) self.assertEqual(1, self.scheduler.queue.qsize()) @mock.patch("scheduler.schedulers.BoefjeScheduler.has_boefje_task_stalled") @@ -627,7 +654,7 @@ def test_push_task_queue_full( @mock.patch("scheduler.schedulers.BoefjeScheduler.has_boefje_task_grace_period_passed") @mock.patch("scheduler.schedulers.BoefjeScheduler.is_item_on_queue_by_hash") @mock.patch("scheduler.context.AppContext.datastores.task_store.get_tasks_by_hash") - def test_push_task_stalled( + def test_push_boefje_task_stalled( self, mock_get_tasks_by_hash, mock_is_item_on_queue_by_hash, @@ -646,6 +673,7 @@ def test_push_task_stalled( task = models.Task( scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, priority=1, type=models.BoefjeTask.type, hash=boefje_task.hash, @@ -654,13 +682,11 @@ def test_push_task_stalled( modified_at=datetime.now(timezone.utc), ) - item = functions.create_item(scheduler_id=self.organisation.id, priority=1, task=task) - # Mocks self.mock_get_plugin.return_value = PluginFactory(scan_level=0, consumes=[ooi.object_type]) # Act - self.scheduler.push_item_to_queue(item) + self.scheduler.push_item_to_queue(task) # Assert: task should be on priority queue task_pq = models.BoefjeTask(**self.scheduler.queue.peek(0).data) @@ -669,16 +695,16 @@ def test_push_task_stalled( self.assertEqual(boefje_task.boefje.id, task_pq.boefje.id) # Assert: task should be in datastore, and queued - task_db = self.mock_ctx.datastores.task_store.get_task(item.id) - self.assertEqual(task_db.id, item.id) + task_db = self.mock_ctx.datastores.task_store.get_task(task.id) + self.assertEqual(task_db.id, task.id) self.assertEqual(task_db.status, models.TaskStatus.QUEUED) # Act self.scheduler.pop_item_from_queue() # Assert: task should be in datastore, and dispatched - task_db = self.mock_ctx.datastores.task_store.get_task(item.id) - self.assertEqual(task_db.id, item.id) + task_db = self.mock_ctx.datastores.task_store.get_task(task.id) + self.assertEqual(task_db.id, task.id) self.assertEqual(task_db.status, models.TaskStatus.DISPATCHED) # Mocks @@ -691,11 +717,11 @@ def test_push_task_stalled( mock_get_tasks_by_hash.return_value = None # Act - self.scheduler.push_boefje_task(boefje_task) + self.scheduler.push_boefje_task(boefje_task, self.organisation.id) # Assert: task should be in datastore, and failed - task_db = self.mock_ctx.datastores.task_store.get_task(item.id) - self.assertEqual(task_db.id, item.id) + task_db = self.mock_ctx.datastores.task_store.get_task(task.id) + self.assertEqual(task_db.id, task.id) self.assertEqual(task_db.status, models.TaskStatus.FAILED) # Assert: new task should be queued @@ -715,6 +741,7 @@ def test_post_push(self): task = models.Task( scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, priority=1, type=models.BoefjeTask.type, hash=boefje_task.hash, @@ -723,12 +750,10 @@ def test_post_push(self): modified_at=datetime.now(timezone.utc), ) - item = functions.create_item(scheduler_id=self.organisation.id, priority=1, task=task) - self.mock_get_plugin.return_value = PluginFactory(scan_level=0, consumes=[ooi.object_type]) # Act - self.scheduler.push_item_to_queue(item) + self.scheduler.push_item_to_queue(task) # Task should be on priority queue task_pq = models.BoefjeTask(**self.scheduler.queue.peek(0).data) @@ -737,8 +762,8 @@ def test_post_push(self): self.assertEqual(boefje_task.boefje.id, task_pq.boefje.id) # Task should be in datastore, and queued - task_db = self.mock_ctx.datastores.task_store.get_task(item.id) - self.assertEqual(task_db.id, item.id) + task_db = self.mock_ctx.datastores.task_store.get_task(task.id) + self.assertEqual(task_db.id, task.id) self.assertEqual(task_db.status, models.TaskStatus.QUEUED) # Schedule should be in datastore @@ -764,6 +789,7 @@ def test_post_push_boefje_cron(self): task = models.Task( scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, priority=1, type=models.BoefjeTask.type, hash=boefje_task.hash, @@ -772,12 +798,10 @@ def test_post_push_boefje_cron(self): modified_at=datetime.now(timezone.utc), ) - item = functions.create_item(scheduler_id=self.organisation.id, priority=1, task=task) - self.mock_get_plugin.return_value = PluginFactory(scan_level=0, consumes=[ooi.object_type], cron=cron) # Act - self.scheduler.push_item_to_queue(item) + self.scheduler.push_item_to_queue(task) # Task should be on priority queue task_pq = models.BoefjeTask(**self.scheduler.queue.peek(0).data) @@ -786,8 +810,8 @@ def test_post_push_boefje_cron(self): self.assertEqual(boefje_task.boefje.id, task_pq.boefje.id) # Task should be in datastore, and queued - task_db = self.mock_ctx.datastores.task_store.get_task(item.id) - self.assertEqual(task_db.id, item.id) + task_db = self.mock_ctx.datastores.task_store.get_task(task.id) + self.assertEqual(task_db.id, task.id) self.assertEqual(task_db.status, models.TaskStatus.QUEUED) # Schedule should be in datastore @@ -819,6 +843,7 @@ def test_post_push_boefje_interval(self): task = models.Task( scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, priority=1, type=models.BoefjeTask.type, hash=boefje_task.hash, @@ -827,12 +852,10 @@ def test_post_push_boefje_interval(self): modified_at=datetime.now(timezone.utc), ) - item = functions.create_item(scheduler_id=self.organisation.id, priority=1, task=task) - self.mock_get_plugin.return_value = PluginFactory(scan_level=0, consumes=[ooi.object_type], interval=1500) # Act - self.scheduler.push_item_to_queue(item) + self.scheduler.push_item_to_queue(task) # Task should be on priority queue task_pq = models.BoefjeTask(**self.scheduler.queue.peek(0).data) @@ -841,8 +864,8 @@ def test_post_push_boefje_interval(self): self.assertEqual(boefje_task.boefje.id, task_pq.boefje.id) # Task should be in datastore, and queued - task_db = self.mock_ctx.datastores.task_store.get_task(item.id) - self.assertEqual(task_db.id, item.id) + task_db = self.mock_ctx.datastores.task_store.get_task(task.id) + self.assertEqual(task_db.id, task.id) self.assertEqual(task_db.status, models.TaskStatus.QUEUED) # Schedule should be in datastore @@ -871,6 +894,7 @@ def test_post_pop(self): task = models.Task( scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, priority=1, type=models.BoefjeTask.type, hash=boefje_task.hash, @@ -879,13 +903,11 @@ def test_post_pop(self): modified_at=datetime.now(timezone.utc), ) - item = functions.create_item(scheduler_id=self.organisation.id, priority=1, task=task) - # Mocks self.mock_get_plugin.return_value = PluginFactory(scan_level=0, consumes=[ooi.object_type]) # Act - self.scheduler.push_item_to_queue(item) + self.scheduler.push_item_to_queue(task) # Assert: task should be on priority queue task_pq = models.BoefjeTask(**self.scheduler.queue.peek(0).data) @@ -894,109 +916,18 @@ def test_post_pop(self): self.assertEqual(boefje_task.boefje.id, task_pq.boefje.id) # Assert: task should be in datastore, and queued - task_db = self.mock_ctx.datastores.task_store.get_task(item.id) - self.assertEqual(task_db.id, item.id) + task_db = self.mock_ctx.datastores.task_store.get_task(task.id) + self.assertEqual(task_db.id, task.id) self.assertEqual(task_db.status, models.TaskStatus.QUEUED) # Act self.scheduler.pop_item_from_queue() # Assert: task should be in datastore, and queued - task_db = self.mock_ctx.datastores.task_store.get_task(item.id) - self.assertEqual(task_db.id, item.id) + task_db = self.mock_ctx.datastores.task_store.get_task(task.id) + self.assertEqual(task_db.id, task.id) self.assertEqual(task_db.status, models.TaskStatus.DISPATCHED) - def test_disable_scheduler(self): - # Arrange: start scheduler - self.scheduler.run() - - # Arrange: add tasks - scan_profile = ScanProfileFactory(level=0) - ooi = OOIFactory(scan_profile=scan_profile) - boefje_task = models.BoefjeTask( - boefje=BoefjeFactory(), input_ooi=ooi.primary_key, organization=self.organisation.id - ) - - # Mocks - self.mock_get_plugin.return_value = PluginFactory(scan_level=0, consumes=[ooi.object_type]) - - # Act - task = functions.create_task(scheduler_id=self.scheduler.scheduler_id, data=boefje_task) - - item = functions.create_item(scheduler_id=self.organisation.id, priority=1, task=task) - self.scheduler.push_item_to_queue(item) - - # Assert: task should be on priority queue - pq_item = self.scheduler.queue.peek(0) - self.assertEqual(1, self.scheduler.queue.qsize()) - self.assertEqual(pq_item.id, item.id) - - # Assert: task should be in datastore, and queued - task_db = self.mock_ctx.datastores.task_store.get_task(item.id) - self.assertEqual(task_db.id, item.id) - self.assertEqual(task_db.status, models.TaskStatus.QUEUED) - - # Assert: listeners should be running - self.assertGreater(len(self.scheduler.listeners), 0) - - # Assert: threads should be running - self.assertGreater(len(self.scheduler.threads), 0) - - # Act - self.scheduler.disable() - - # Listeners should be stopped - self.assertEqual(0, len(self.scheduler.listeners)) - - # Threads should be stopped - self.assertEqual(0, len(self.scheduler.threads)) - - # Queue should be empty - self.assertEqual(0, self.scheduler.queue.qsize()) - - # All tasks on queue should be set to CANCELLED - tasks, _ = self.mock_ctx.datastores.task_store.get_tasks(self.scheduler.scheduler_id) - for task in tasks: - self.assertEqual(task.status, models.TaskStatus.CANCELLED) - - # Scheduler should be disabled - self.assertFalse(self.scheduler.is_enabled()) - - self.scheduler.stop() - - def test_enable_scheduler(self): - self.scheduler.run() - - # Assert: listeners should be running - self.assertGreater(len(self.scheduler.listeners), 0) - - # Assert: threads should be running - self.assertGreater(len(self.scheduler.threads), 0) - - # Disable scheduler first - self.scheduler.disable() - - # Listeners should be stopped - self.assertEqual(0, len(self.scheduler.listeners)) - - # Threads should be stopped - self.assertEqual(0, len(self.scheduler.threads)) - - # Queue should be empty - self.assertEqual(0, self.scheduler.queue.qsize()) - - # Re-enable scheduler - self.scheduler.enable() - - # Threads should be started - self.assertGreater(len(self.scheduler.threads), 0) - - # Scheduler should be enabled - self.assertTrue(self.scheduler.is_enabled()) - - # Stop the scheduler - self.scheduler.stop() - def test_has_boefje_permission_to_run(self): # Arrange scan_profile = ScanProfileFactory(level=0) @@ -1083,21 +1014,20 @@ def setUp(self): def tearDown(self): mock.patch.stopall() - def test_push_tasks_for_scan_profile_mutations(self): + def test_process_mutations(self): """Scan level change""" # Arrange - scan_profile = ScanProfileFactory(level=0) - ooi = OOIFactory(scan_profile=scan_profile) + ooi = OOIFactory(scan_profile=ScanProfileFactory(level=0)) boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type]) mutation = models.ScanProfileMutation( - operation="create", primary_key=ooi.primary_key, value=ooi + operation="create", primary_key=ooi.primary_key, value=ooi, client_id=self.organisation.id ).model_dump_json() # Mocks self.mock_get_boefjes_for_ooi.return_value = [boefje] # Act - self.scheduler.push_tasks_for_scan_profile_mutations(mutation) + self.scheduler.process_mutations(mutation) # Task should be on priority queue item = self.scheduler.queue.peek(0) @@ -1111,43 +1041,45 @@ def test_push_tasks_for_scan_profile_mutations(self): self.assertEqual(task_db.id, item.id) self.assertEqual(task_db.status, models.TaskStatus.QUEUED) - def test_push_tasks_for_scan_profile_mutations_value_empty(self): + def test_process_mutations_value_empty(self): """When the value of a mutation is empty it should not push any tasks""" # Arrange - mutation = models.ScanProfileMutation(operation="create", primary_key="123", value=None).model_dump_json() + mutation = models.ScanProfileMutation( + operation="create", primary_key="123", value=None, client_id=self.organisation.id + ).model_dump_json() # Act - self.scheduler.push_tasks_for_scan_profile_mutations(mutation) + self.scheduler.process_mutations(mutation) # Task should not be on priority queue self.assertEqual(0, self.scheduler.queue.qsize()) - def test_push_tasks_for_scan_profile_mutations_no_boefjes_found(self): + def test_process_mutations_no_boefjes_found(self): """When no plugins are found for boefjes, it should return no boefje tasks""" # Arrange scan_profile = ScanProfileFactory(level=0) ooi = OOIFactory(scan_profile=scan_profile) mutation = models.ScanProfileMutation( - operation="create", primary_key=ooi.primary_key, value=ooi + operation="create", primary_key=ooi.primary_key, value=ooi, client_id=self.organisation.id ).model_dump_json() # Mocks self.mock_get_boefjes_for_ooi.return_value = [] # Act - self.scheduler.push_tasks_for_scan_profile_mutations(mutation) + self.scheduler.process_mutations(mutation) # Task should not be on priority queue self.assertEqual(0, self.scheduler.queue.qsize()) - def test_push_tasks_for_scan_profile_mutations_not_allowed_to_run(self): + def test_process_mutations_not_allowed_to_run(self): """When a boefje is not allowed to run, it should not be added to the queue""" # Arrange scan_profile = ScanProfileFactory(level=0) ooi = OOIFactory(scan_profile=scan_profile) boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type]) mutation = models.ScanProfileMutation( - operation="create", primary_key=ooi.primary_key, value=ooi + operation="create", primary_key=ooi.primary_key, value=ooi, client_id=self.organisation.id ).model_dump_json() # Mocks @@ -1155,19 +1087,19 @@ def test_push_tasks_for_scan_profile_mutations_not_allowed_to_run(self): self.mock_has_boefje_permission_to_run.return_value = False # Act - self.scheduler.push_tasks_for_scan_profile_mutations(mutation) + self.scheduler.process_mutations(mutation) # Task should not be on priority queue self.assertEqual(0, self.scheduler.queue.qsize()) - def test_push_tasks_for_scan_profile_mutations_still_running(self): + def test_process_mutations_still_running(self): """When a boefje is still running, it should not be added to the queue""" # Arrange scan_profile = ScanProfileFactory(level=0) ooi = OOIFactory(scan_profile=scan_profile) boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type]) mutation = models.ScanProfileMutation( - operation="create", primary_key=ooi.primary_key, value=ooi + operation="create", primary_key=ooi.primary_key, value=ooi, client_id=self.organisation.id ).model_dump_json() # Mocks @@ -1175,30 +1107,31 @@ def test_push_tasks_for_scan_profile_mutations_still_running(self): self.mock_has_boefje_task_started_running.return_value = True # Act - self.scheduler.push_tasks_for_scan_profile_mutations(mutation) + self.scheduler.process_mutations(mutation) # Task should not be on priority queue self.assertEqual(0, self.scheduler.queue.qsize()) - def test_push_tasks_for_scan_profile_mutations_item_on_queue(self): + def test_process_mutations_item_on_queue(self): """When a boefje is already on the queue, it should not be added to the queue""" # Arrange scan_profile = ScanProfileFactory(level=0) ooi = OOIFactory(scan_profile=scan_profile) boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type]) + mutation1 = models.ScanProfileMutation( - operation="create", primary_key=ooi.primary_key, value=ooi + operation="create", primary_key=ooi.primary_key, value=ooi, client_id=self.organisation.id ).model_dump_json() mutation2 = models.ScanProfileMutation( - operation="create", primary_key=ooi.primary_key, value=ooi + operation="create", primary_key=ooi.primary_key, value=ooi, client_id=self.organisation.id ).model_dump_json() # Mocks self.mock_get_boefjes_for_ooi.return_value = [boefje] # Act - self.scheduler.push_tasks_for_scan_profile_mutations(mutation1) - self.scheduler.push_tasks_for_scan_profile_mutations(mutation2) + self.scheduler.process_mutations(mutation1) + self.scheduler.process_mutations(mutation2) # Task should be on priority queue (only one) task_pq = self.scheduler.queue.peek(0) @@ -1211,7 +1144,7 @@ def test_push_tasks_for_scan_profile_mutations_item_on_queue(self): task_db = self.mock_ctx.datastores.task_store.get_task(task_pq.id) self.assertEqual(task_db.status, models.TaskStatus.QUEUED) - def test_push_tasks_for_scan_profile_mutations_delete(self): + def test_process_mutations_delete(self): """When an OOI is deleted it should not create tasks""" # Arrange scan_profile = ScanProfileFactory(level=0) @@ -1219,19 +1152,22 @@ def test_push_tasks_for_scan_profile_mutations_delete(self): boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type]) mutation1 = models.ScanProfileMutation( - operation=models.MutationOperationType.DELETE, primary_key=ooi.primary_key, value=ooi + operation=models.MutationOperationType.DELETE, + primary_key=ooi.primary_key, + value=ooi, + client_id=self.organisation.id, ).model_dump_json() # Mocks self.mock_get_boefjes_for_ooi.return_value = [boefje] # Act - self.scheduler.push_tasks_for_scan_profile_mutations(mutation1) + self.scheduler.process_mutations(mutation1) # Assert self.assertEqual(0, self.scheduler.queue.qsize()) - def test_push_tasks_for_scan_profile_mutations_delete_on_queue(self): + def test_process_mutations_delete_on_queue(self): """When an OOI is deleted, and tasks associated with that ooi should be removed from the queue """ @@ -1241,14 +1177,17 @@ def test_push_tasks_for_scan_profile_mutations_delete_on_queue(self): boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type]) mutation1 = models.ScanProfileMutation( - operation=models.MutationOperationType.CREATE, primary_key=ooi.primary_key, value=ooi + operation=models.MutationOperationType.CREATE, + primary_key=ooi.primary_key, + value=ooi, + client_id=self.organisation.id, ).model_dump_json() # Mocks self.mock_get_boefjes_for_ooi.return_value = [boefje] # Act - self.scheduler.push_tasks_for_scan_profile_mutations(mutation1) + self.scheduler.process_mutations(mutation1) # Assert: task should be on priority queue item = self.scheduler.queue.peek(0) @@ -1259,11 +1198,14 @@ def test_push_tasks_for_scan_profile_mutations_delete_on_queue(self): # Arrange mutation2 = models.ScanProfileMutation( - operation=models.MutationOperationType.DELETE, primary_key=ooi.primary_key, value=ooi + operation=models.MutationOperationType.DELETE, + primary_key=ooi.primary_key, + value=ooi, + client_id=self.organisation.id, ).model_dump_json() # Act - self.scheduler.push_tasks_for_scan_profile_mutations(mutation2) + self.scheduler.process_mutations(mutation2) # Assert self.assertIsNone(self.scheduler.queue.peek(0)) @@ -1274,7 +1216,7 @@ def test_push_tasks_for_scan_profile_mutations_delete_on_queue(self): task_db = self.mock_ctx.datastores.task_store.get_task(item.id) self.assertEqual(task_db.status, models.TaskStatus.CANCELLED) - def test_push_tasks_for_scan_profile_mutations_op_create_run_on_create(self): + def test_process_mutations_op_create_run_on_create(self): """When a boefje has the run_on contains the setting create, and we receive a create mutation, it should: @@ -1286,14 +1228,17 @@ def test_push_tasks_for_scan_profile_mutations_op_create_run_on_create(self): ooi = OOIFactory(scan_profile=scan_profile) boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type], run_on=[RunOn.CREATE]) mutation = models.ScanProfileMutation( - operation=models.MutationOperationType.CREATE, primary_key=ooi.primary_key, value=ooi + operation=models.MutationOperationType.CREATE, + primary_key=ooi.primary_key, + value=ooi, + client_id=self.organisation.id, ).model_dump_json() # Mocks self.mock_get_boefjes_for_ooi.return_value = [boefje] # Act - self.scheduler.push_tasks_for_scan_profile_mutations(mutation) + self.scheduler.process_mutations(mutation) # Assert: task should be on priority queue item = self.scheduler.queue.peek(0) @@ -1311,7 +1256,7 @@ def test_push_tasks_for_scan_profile_mutations_op_create_run_on_create(self): schedule_db = self.mock_ctx.datastores.schedule_store.get_schedule_by_hash(task_db.hash) self.assertIsNone(schedule_db) - def test_push_tasks_for_scan_profile_mutations_op_create_run_on_create_update(self): + def test_process_mutations_op_create_run_on_create_update(self): """When a boefje has the run_on contains the setting create,update, and we receive a create mutation, it should: @@ -1323,14 +1268,17 @@ def test_push_tasks_for_scan_profile_mutations_op_create_run_on_create_update(se ooi = OOIFactory(scan_profile=scan_profile) boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type], run_on=[RunOn.CREATE, RunOn.UPDATE]) mutation = models.ScanProfileMutation( - operation=models.MutationOperationType.CREATE, primary_key=ooi.primary_key, value=ooi + operation=models.MutationOperationType.CREATE, + primary_key=ooi.primary_key, + value=ooi, + client_id=self.organisation.id, ).model_dump_json() # Mocks self.mock_get_boefjes_for_ooi.return_value = [boefje] # Act - self.scheduler.push_tasks_for_scan_profile_mutations(mutation) + self.scheduler.process_mutations(mutation) # Assert: task should be on priority queue item = self.scheduler.queue.peek(0) @@ -1348,7 +1296,7 @@ def test_push_tasks_for_scan_profile_mutations_op_create_run_on_create_update(se schedule_db = self.mock_ctx.datastores.schedule_store.get_schedule_by_hash(task_db.hash) self.assertIsNone(schedule_db) - def test_push_tasks_for_scan_profile_mutations_op_create_run_on_update(self): + def test_process_mutations_op_create_run_on_update(self): """When a boefje has the run_on contains the setting update, and we receive a create mutation, it should: @@ -1360,19 +1308,22 @@ def test_push_tasks_for_scan_profile_mutations_op_create_run_on_update(self): ooi = OOIFactory(scan_profile=scan_profile) boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type], run_on=[RunOn.UPDATE]) mutation = models.ScanProfileMutation( - operation=models.MutationOperationType.CREATE, primary_key=ooi.primary_key, value=ooi + operation=models.MutationOperationType.CREATE, + primary_key=ooi.primary_key, + value=ooi, + client_id=self.organisation.id, ).model_dump_json() # Mocks self.mock_get_boefjes_for_ooi.return_value = [boefje] # Act - self.scheduler.push_tasks_for_scan_profile_mutations(mutation) + self.scheduler.process_mutations(mutation) # Assert: task should NOT be on priority queue self.assertEqual(0, self.scheduler.queue.qsize()) - def test_push_tasks_for_scan_profile_mutations_op_create_run_on_none(self): + def test_process_mutations_op_create_run_on_none(self): """When a boefje has the run_on is empty, and we receive a create mutation, it should: @@ -1384,7 +1335,10 @@ def test_push_tasks_for_scan_profile_mutations_op_create_run_on_none(self): ooi = OOIFactory(scan_profile=scan_profile) boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type], run_on=None) mutation = models.ScanProfileMutation( - operation=models.MutationOperationType.CREATE, primary_key=ooi.primary_key, value=ooi + operation=models.MutationOperationType.CREATE, + primary_key=ooi.primary_key, + value=ooi, + client_id=self.organisation.id, ).model_dump_json() # Mocks @@ -1392,7 +1346,7 @@ def test_push_tasks_for_scan_profile_mutations_op_create_run_on_none(self): self.mock_set_cron.return_value = "0 0 * * *" # Act - self.scheduler.push_tasks_for_scan_profile_mutations(mutation) + self.scheduler.process_mutations(mutation) # Assert: task should be on priority queue item = self.scheduler.queue.peek(0) @@ -1410,7 +1364,7 @@ def test_push_tasks_for_scan_profile_mutations_op_create_run_on_none(self): schedule_db = self.mock_ctx.datastores.schedule_store.get_schedule(task_db.schedule_id) self.assertIsNotNone(schedule_db) - def test_push_tasks_for_scan_profile_mutations_op_update_run_on_create(self): + def test_process_mutations_op_update_run_on_create(self): """When a boefje has the run_on contains the setting create, and we receive an update mutation, it should: @@ -1422,19 +1376,22 @@ def test_push_tasks_for_scan_profile_mutations_op_update_run_on_create(self): ooi = OOIFactory(scan_profile=scan_profile) boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type], run_on=[RunOn.CREATE]) mutation = models.ScanProfileMutation( - operation=models.MutationOperationType.UPDATE, primary_key=ooi.primary_key, value=ooi + operation=models.MutationOperationType.UPDATE, + primary_key=ooi.primary_key, + value=ooi, + client_id=self.organisation.id, ).model_dump_json() # Mocks self.mock_get_boefjes_for_ooi.return_value = [boefje] # Act - self.scheduler.push_tasks_for_scan_profile_mutations(mutation) + self.scheduler.process_mutations(mutation) # Assert: task should NOT be on priority queue self.assertEqual(0, self.scheduler.queue.qsize()) - def test_push_tasks_scan_profile_mutations_op_update_run_on_create_update(self): + def test_process_mutations_op_update_run_on_create_update(self): """When a boefje has the run_on contains the setting create,update, and we receive an update mutation, it should: @@ -1446,14 +1403,17 @@ def test_push_tasks_scan_profile_mutations_op_update_run_on_create_update(self): ooi = OOIFactory(scan_profile=scan_profile) boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type], run_on=[RunOn.CREATE, RunOn.UPDATE]) mutation = models.ScanProfileMutation( - operation=models.MutationOperationType.UPDATE, primary_key=ooi.primary_key, value=ooi + operation=models.MutationOperationType.UPDATE, + primary_key=ooi.primary_key, + value=ooi, + client_id=self.organisation.id, ).model_dump_json() # Mocks self.mock_get_boefjes_for_ooi.return_value = [boefje] # Act - self.scheduler.push_tasks_for_scan_profile_mutations(mutation) + self.scheduler.process_mutations(mutation) # Assert: task should be on priority queue item = self.scheduler.queue.peek(0) @@ -1471,7 +1431,7 @@ def test_push_tasks_scan_profile_mutations_op_update_run_on_create_update(self): schedule_db = self.mock_ctx.datastores.schedule_store.get_schedule_by_hash(task_db.hash) self.assertIsNone(schedule_db) - def test_push_tasks_scan_profile_mutations_op_update_run_on_update(self): + def test_process_mutations_op_update_run_on_update(self): """When a boefje has the run_on contains the setting update, and we receive an update mutation, it should: @@ -1483,14 +1443,17 @@ def test_push_tasks_scan_profile_mutations_op_update_run_on_update(self): ooi = OOIFactory(scan_profile=scan_profile) boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type], run_on=[RunOn.UPDATE]) mutation = models.ScanProfileMutation( - operation=models.MutationOperationType.UPDATE, primary_key=ooi.primary_key, value=ooi + operation=models.MutationOperationType.UPDATE, + primary_key=ooi.primary_key, + value=ooi, + client_id=self.organisation.id, ).model_dump_json() # Mocks self.mock_get_boefjes_for_ooi.return_value = [boefje] # Act - self.scheduler.push_tasks_for_scan_profile_mutations(mutation) + self.scheduler.process_mutations(mutation) # Assert: task should be on priority queue item = self.scheduler.queue.peek(0) @@ -1508,7 +1471,7 @@ def test_push_tasks_scan_profile_mutations_op_update_run_on_update(self): schedule_db = self.mock_ctx.datastores.schedule_store.get_schedule_by_hash(task_db.hash) self.assertIsNone(schedule_db) - def test_push_tasks_scan_profile_mutations_op_update_run_on_none(self): + def test_process_mutations_op_update_run_on_none(self): """When a boefje has the run_on is empty, and we receive an update mutation, it should: @@ -1520,7 +1483,10 @@ def test_push_tasks_scan_profile_mutations_op_update_run_on_none(self): ooi = OOIFactory(scan_profile=scan_profile) boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type], run_on=None) mutation = models.ScanProfileMutation( - operation=models.MutationOperationType.UPDATE, primary_key=ooi.primary_key, value=ooi + operation=models.MutationOperationType.UPDATE, + primary_key=ooi.primary_key, + value=ooi, + client_id=self.organisation.id, ).model_dump_json() # Mocks @@ -1528,7 +1494,7 @@ def test_push_tasks_scan_profile_mutations_op_update_run_on_none(self): self.mock_set_cron.return_value = "0 0 * * *" # Act - self.scheduler.push_tasks_for_scan_profile_mutations(mutation) + self.scheduler.process_mutations(mutation) # Assert: task should be on priority queue item = self.scheduler.queue.peek(0) @@ -1571,21 +1537,26 @@ def setUp(self): "scheduler.context.AppContext.services.octopoes.get_objects_by_object_types" ).start() + self.mock_get_organisations = mock.patch( + "scheduler.context.AppContext.services.katalogus.get_organisations" + ).start() + def tearDown(self): mock.patch.stopall() - def test_push_tasks_for_new_boefjes(self): + def test_process_new_boefjes(self): # Arrange scan_profile = ScanProfileFactory(level=0) ooi = OOIFactory(scan_profile=scan_profile) boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type]) # Mocks + self.mock_get_organisations.return_value = [self.organisation] self.mock_get_objects_by_object_types.return_value = [ooi] self.mock_get_new_boefjes_by_org_id.return_value = [boefje] # Act - self.scheduler.push_tasks_for_new_boefjes() + self.scheduler.process_new_boefjes() # Task should be on priority queue task_pq = self.scheduler.queue.peek(0) @@ -1599,7 +1570,7 @@ def test_push_tasks_for_new_boefjes(self): self.assertEqual(task_db.id, task_pq.id) self.assertEqual(task_db.status, models.TaskStatus.QUEUED) - def test_push_tasks_for_new_boefjes_request_exception(self): + def test_process_new_boefjes_request_exception(self): # Arrange scan_profile = ScanProfileFactory(level=0) ooi = OOIFactory(scan_profile=scan_profile) @@ -1613,13 +1584,13 @@ def test_push_tasks_for_new_boefjes_request_exception(self): self.mock_get_new_boefjes_by_org_id.return_value = [boefje] # Act - self.scheduler.push_tasks_for_new_boefjes() - self.scheduler.push_tasks_for_new_boefjes() + self.scheduler.process_new_boefjes() + self.scheduler.process_new_boefjes() # Task should not be on priority queue self.assertEqual(0, self.scheduler.queue.qsize()) - def test_push_tasks_for_new_boefjes_no_new_boefjes(self): + def test_process_new_boefjes_no_new_boefjes(self): # Arrange scan_profile = ScanProfileFactory(level=0) ooi = OOIFactory(scan_profile=scan_profile) @@ -1629,12 +1600,12 @@ def test_push_tasks_for_new_boefjes_no_new_boefjes(self): self.mock_get_new_boefjes_by_org_id.return_value = [] # Act - self.scheduler.push_tasks_for_new_boefjes() + self.scheduler.process_new_boefjes() # Task should not be on priority queue self.assertEqual(0, self.scheduler.queue.qsize()) - def test_push_tasks_for_new_boefjes_empty_consumes(self): + def test_process_new_boefjes_empty_consumes(self): # Arrange scan_profile = ScanProfileFactory(level=0) ooi = OOIFactory(scan_profile=scan_profile) @@ -1645,12 +1616,12 @@ def test_push_tasks_for_new_boefjes_empty_consumes(self): self.mock_get_new_boefjes_by_org_id.return_value = [boefje] # Act - self.scheduler.push_tasks_for_new_boefjes() + self.scheduler.process_new_boefjes() # Task should not be on priority queue self.assertEqual(0, self.scheduler.queue.qsize()) - def test_push_tasks_for_new_boefjes_empty_consumes_no_ooi(self): + def test_process_new_boefjes_empty_consumes_no_ooi(self): # Arrange boefje = PluginFactory(scan_level=0, consumes=[]) @@ -1659,12 +1630,12 @@ def test_push_tasks_for_new_boefjes_empty_consumes_no_ooi(self): self.mock_get_new_boefjes_by_org_id.return_value = [boefje] # Act - self.scheduler.push_tasks_for_new_boefjes() + self.scheduler.process_new_boefjes() # Task should not be on priority queue self.assertEqual(0, self.scheduler.queue.qsize()) - def test_push_tasks_for_new_boefjes_no_oois_found(self): + def test_process_new_boefjes_no_oois_found(self): # Arrange scan_profile = ScanProfileFactory(level=0) ooi = OOIFactory(scan_profile=scan_profile) @@ -1675,12 +1646,12 @@ def test_push_tasks_for_new_boefjes_no_oois_found(self): self.mock_get_new_boefjes_by_org_id.return_value = [boefje] # Act - self.scheduler.push_tasks_for_new_boefjes() + self.scheduler.process_new_boefjes() # Task should not be on priority queue self.assertEqual(0, self.scheduler.queue.qsize()) - def test_push_tasks_for_new_boefjes_get_objects_request_exception(self): + def test_process_new_boefjes_get_objects_request_exception(self): # Arrange scan_profile = ScanProfileFactory(level=0) ooi = OOIFactory(scan_profile=scan_profile) @@ -1694,13 +1665,13 @@ def test_push_tasks_for_new_boefjes_get_objects_request_exception(self): self.mock_get_new_boefjes_by_org_id.return_value = [boefje] # Act - self.scheduler.push_tasks_for_new_boefjes() - self.scheduler.push_tasks_for_new_boefjes() + self.scheduler.process_new_boefjes() + self.scheduler.process_new_boefjes() # Task should not be on priority queue self.assertEqual(0, self.scheduler.queue.qsize()) - def test_push_tasks_for_new_boefjes_not_allowed_to_run(self): + def test_process_new_boefjes_not_allowed_to_run(self): # Arrange scan_profile = ScanProfileFactory(level=0) ooi = OOIFactory(scan_profile=scan_profile) @@ -1712,12 +1683,12 @@ def test_push_tasks_for_new_boefjes_not_allowed_to_run(self): self.mock_has_boefje_permission_to_run.return_value = False # Act - self.scheduler.push_tasks_for_new_boefjes() + self.scheduler.process_new_boefjes() # Task should not be on priority queue self.assertEqual(0, self.scheduler.queue.qsize()) - def test_push_tasks_for_new_boefjes_still_running(self): + def test_process_new_boefjes_still_running(self): # Arrange scan_profile = ScanProfileFactory(level=0) ooi = OOIFactory(scan_profile=scan_profile) @@ -1729,23 +1700,24 @@ def test_push_tasks_for_new_boefjes_still_running(self): self.mock_has_boefje_task_started_running.return_value = True # Act - self.scheduler.push_tasks_for_new_boefjes() + self.scheduler.process_new_boefjes() # Task should not be on priority queue self.assertEqual(0, self.scheduler.queue.qsize()) - def test_push_tasks_for_new_boefjes_item_on_queue(self): + def test_process_new_boefjes_item_on_queue(self): # Arrange scan_profile = ScanProfileFactory(level=0) ooi = OOIFactory(scan_profile=scan_profile) boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type]) # Mocks + self.mock_get_organisations.return_value = [self.organisation] self.mock_get_objects_by_object_types.return_value = [ooi] self.mock_get_new_boefjes_by_org_id.return_value = [boefje] # Act - self.scheduler.push_tasks_for_new_boefjes() + self.scheduler.process_new_boefjes() # Task should be on priority queue task_pq = self.scheduler.queue.peek(0) @@ -1760,7 +1732,7 @@ def test_push_tasks_for_new_boefjes_item_on_queue(self): self.assertEqual(task_db.status, models.TaskStatus.QUEUED) # Act - self.scheduler.push_tasks_for_new_boefjes() + self.scheduler.process_new_boefjes() # Should only be one task on queue task_pq = models.BoefjeTask(**self.scheduler.queue.peek(0).data) @@ -1792,10 +1764,10 @@ def setUp(self): def tearDown(self): mock.patch.stopall() - def test_push_tasks_for_rescheduling_scheduler_id(self): + def test_process_rescheduling_scheduler_id(self): pass - def test_push_tasks_for_rescheduling(self): + def test_process_rescheduling(self): """When the deadline of schedules have passed, the resulting task should be added to the queue""" # Arrange scan_profile = ScanProfileFactory(level=0) @@ -1809,7 +1781,10 @@ def test_push_tasks_for_rescheduling(self): ) schedule = models.Schedule( - scheduler_id=self.scheduler.scheduler_id, hash=boefje_task.hash, data=boefje_task.model_dump() + scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, + hash=boefje_task.hash, + data=boefje_task.model_dump(), ) schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule) @@ -1820,7 +1795,7 @@ def test_push_tasks_for_rescheduling(self): self.mock_get_plugin.return_value = plugin # Act - self.scheduler.push_tasks_for_rescheduling() + self.scheduler.process_rescheduling() # Assert: new item should be on queue self.assertEqual(1, self.scheduler.queue.qsize()) @@ -1834,7 +1809,7 @@ def test_push_tasks_for_rescheduling(self): self.assertIsNotNone(task_db) self.assertEqual(peek.id, task_db.id) - def test_push_tasks_for_rescheduling_no_ooi(self): + def test_process_rescheduling_no_ooi(self): """When the deadline has passed, and when the resulting tasks doesn't have an OOI, it should create a task. """ @@ -1850,7 +1825,10 @@ def test_push_tasks_for_rescheduling_no_ooi(self): ) schedule = models.Schedule( - scheduler_id=self.scheduler.scheduler_id, hash=boefje_task.hash, data=boefje_task.model_dump() + scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, + hash=boefje_task.hash, + data=boefje_task.model_dump(), ) schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule) @@ -1861,7 +1839,7 @@ def test_push_tasks_for_rescheduling_no_ooi(self): self.mock_get_plugin.return_value = plugin # Act - self.scheduler.push_tasks_for_rescheduling() + self.scheduler.process_rescheduling() # Assert: new item should be on queue self.assertEqual(1, self.scheduler.queue.qsize()) @@ -1875,7 +1853,7 @@ def test_push_tasks_for_rescheduling_no_ooi(self): self.assertIsNotNone(task_db) self.assertEqual(peek.id, task_db.id) - def test_push_tasks_for_rescheduling_ooi_not_found(self): + def test_process_rescheduling_ooi_not_found(self): """When ooi isn't found anymore for the schedule, we disable the schedule""" # Arrange scan_profile = ScanProfileFactory(level=0) @@ -1889,7 +1867,10 @@ def test_push_tasks_for_rescheduling_ooi_not_found(self): ) schedule = models.Schedule( - scheduler_id=self.scheduler.scheduler_id, hash=boefje_task.hash, data=boefje_task.model_dump() + scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, + hash=boefje_task.hash, + data=boefje_task.model_dump(), ) schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule) @@ -1900,7 +1881,7 @@ def test_push_tasks_for_rescheduling_ooi_not_found(self): self.mock_get_plugin.return_value = plugin # Act - self.scheduler.push_tasks_for_rescheduling() + self.scheduler.process_rescheduling() # Assert: item should not be on queue self.assertEqual(0, self.scheduler.queue.qsize()) @@ -1909,7 +1890,7 @@ def test_push_tasks_for_rescheduling_ooi_not_found(self): schedule_db_disabled = self.mock_ctx.datastores.schedule_store.get_schedule(schedule.id) self.assertFalse(schedule_db_disabled.enabled) - def test_push_tasks_for_rescheduling_boefje_not_found(self): + def test_process_rescheduling_boefje_not_found(self): """When boefje isn't found anymore for the schedule, we disable the schedule""" # Arrange scan_profile = ScanProfileFactory(level=0) @@ -1923,7 +1904,10 @@ def test_push_tasks_for_rescheduling_boefje_not_found(self): ) schedule = models.Schedule( - scheduler_id=self.scheduler.scheduler_id, hash=boefje_task.hash, data=boefje_task.model_dump() + scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, + hash=boefje_task.hash, + data=boefje_task.model_dump(), ) schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule) @@ -1934,7 +1918,7 @@ def test_push_tasks_for_rescheduling_boefje_not_found(self): self.mock_get_plugin.return_value = None # Act - self.scheduler.push_tasks_for_rescheduling() + self.scheduler.process_rescheduling() # Assert: item should not be on queue self.assertEqual(0, self.scheduler.queue.qsize()) @@ -1943,7 +1927,7 @@ def test_push_tasks_for_rescheduling_boefje_not_found(self): schedule_db_disabled = self.mock_ctx.datastores.schedule_store.get_schedule(schedule.id) self.assertFalse(schedule_db_disabled.enabled) - def test_push_tasks_for_rescheduling_boefje_disabled(self): + def test_process_rescheduling_boefje_disabled(self): """When boefje disabled for the schedule, we disable the schedule""" # Arrange scan_profile = ScanProfileFactory(level=0) @@ -1957,7 +1941,10 @@ def test_push_tasks_for_rescheduling_boefje_disabled(self): ) schedule = models.Schedule( - scheduler_id=self.scheduler.scheduler_id, hash=boefje_task.hash, data=boefje_task.model_dump() + scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, + hash=boefje_task.hash, + data=boefje_task.model_dump(), ) schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule) @@ -1968,7 +1955,7 @@ def test_push_tasks_for_rescheduling_boefje_disabled(self): self.mock_get_plugin.return_value = plugin # Act - self.scheduler.push_tasks_for_rescheduling() + self.scheduler.process_rescheduling() # Assert: item should not be on queue self.assertEqual(0, self.scheduler.queue.qsize()) @@ -1977,7 +1964,7 @@ def test_push_tasks_for_rescheduling_boefje_disabled(self): schedule_db_disabled = self.mock_ctx.datastores.schedule_store.get_schedule(schedule.id) self.assertFalse(schedule_db_disabled.enabled) - def test_push_tasks_for_rescheduling_boefje_doesnt_consume_ooi(self): + def test_process_rescheduling_boefje_doesnt_consume_ooi(self): """When boefje doesn't consume the ooi, we disable the schedule""" # Arrange scan_profile = ScanProfileFactory(level=0) @@ -1991,7 +1978,10 @@ def test_push_tasks_for_rescheduling_boefje_doesnt_consume_ooi(self): ) schedule = models.Schedule( - scheduler_id=self.scheduler.scheduler_id, hash=boefje_task.hash, data=boefje_task.model_dump() + scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, + hash=boefje_task.hash, + data=boefje_task.model_dump(), ) schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule) @@ -2002,7 +1992,7 @@ def test_push_tasks_for_rescheduling_boefje_doesnt_consume_ooi(self): self.mock_get_plugin.return_value = plugin # Act - self.scheduler.push_tasks_for_rescheduling() + self.scheduler.process_rescheduling() # Assert: item should not be on queue self.assertEqual(0, self.scheduler.queue.qsize()) @@ -2011,7 +2001,7 @@ def test_push_tasks_for_rescheduling_boefje_doesnt_consume_ooi(self): schedule_db_disabled = self.mock_ctx.datastores.schedule_store.get_schedule(schedule.id) self.assertFalse(schedule_db_disabled.enabled) - def test_push_tasks_for_rescheduling_boefje_cannot_scan_ooi(self): + def test_process_rescheduling_boefje_cannot_scan_ooi(self): """When boefje cannot scan the ooi, we disable the schedule""" # Arrange scan_profile = ScanProfileFactory(level=0) @@ -2025,7 +2015,10 @@ def test_push_tasks_for_rescheduling_boefje_cannot_scan_ooi(self): ) schedule = models.Schedule( - scheduler_id=self.scheduler.scheduler_id, hash=boefje_task.hash, data=boefje_task.model_dump() + scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, + hash=boefje_task.hash, + data=boefje_task.model_dump(), ) schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule) @@ -2036,7 +2029,7 @@ def test_push_tasks_for_rescheduling_boefje_cannot_scan_ooi(self): self.mock_get_plugin.return_value = plugin # Act - self.scheduler.push_tasks_for_rescheduling() + self.scheduler.process_rescheduling() # Assert: item should not be on queue self.assertEqual(0, self.scheduler.queue.qsize()) diff --git a/mula/tests/integration/test_normalizer_scheduler.py b/mula/tests/integration/test_normalizer_scheduler.py index 493b4bd3f54..ad3d72e9ea8 100644 --- a/mula/tests/integration/test_normalizer_scheduler.py +++ b/mula/tests/integration/test_normalizer_scheduler.py @@ -41,10 +41,10 @@ def setUp(self): ) # Scheduler + self.scheduler = schedulers.NormalizerScheduler(self.mock_ctx) + + # Organisation self.organisation = OrganisationFactory() - self.scheduler = schedulers.NormalizerScheduler( - ctx=self.mock_ctx, scheduler_id=self.organisation.id, organisation=self.organisation - ) def tearDown(self): self.scheduler.stop() @@ -64,57 +64,6 @@ def setUp(self): "scheduler.context.AppContext.services.katalogus.get_plugin_by_id_and_org_id" ).start() - def test_disable_scheduler(self): - # Act - self.scheduler.disable() - - # Listeners should be stopped - self.assertEqual(0, len(self.scheduler.listeners)) - - # Threads should be stopped - self.assertEqual(0, len(self.scheduler.threads)) - - # Queue should be empty - self.assertEqual(0, self.scheduler.queue.qsize()) - - # All tasks on queue should be set to CANCELLED - tasks, _ = self.mock_ctx.datastores.task_store.get_tasks(self.scheduler.scheduler_id) - for task in tasks: - self.assertEqual(task.status, models.TaskStatus.CANCELLED) - - # Scheduler should be disabled - self.assertFalse(self.scheduler.is_enabled()) - - def test_enable_scheduler(self): - # Disable scheduler first - self.scheduler.disable() - - # Listeners should be stopped - self.assertEqual(0, len(self.scheduler.listeners)) - - # Threads should be stopped - self.assertEqual(0, len(self.scheduler.threads)) - - # Queue should be empty - self.assertEqual(0, self.scheduler.queue.qsize()) - - # All tasks on queue should be set to CANCELLED - tasks, _ = self.mock_ctx.datastores.task_store.get_tasks(self.scheduler.scheduler_id) - for task in tasks: - self.assertEqual(task.status, models.TaskStatus.CANCELLED) - - # Re-enable scheduler - self.scheduler.enable() - - # Threads should be started - self.assertGreater(len(self.scheduler.threads), 0) - - # Scheduler should be enabled - self.assertTrue(self.scheduler.is_enabled()) - - # Stop the scheduler - self.scheduler.stop() - def test_is_allowed_to_run(self): # Arrange plugin = PluginFactory(type="normalizer", consumes=["text/plain"]) @@ -151,7 +100,7 @@ def test_get_normalizers_for_mime_type(self, mock_get_normalizers_by_org_id_and_ mock_get_normalizers_by_org_id_and_type.return_value = [normalizer] # Act - result = self.scheduler.get_normalizers_for_mime_type("text/plain") + result = self.scheduler.get_normalizers_for_mime_type("text/plain", self.organisation.id) # Assert self.assertEqual(len(result), 1) @@ -166,7 +115,7 @@ def test_get_normalizers_for_mime_type_request_exception(self, mock_get_normaliz ] # Act - result = self.scheduler.get_normalizers_for_mime_type("text/plain") + result = self.scheduler.get_normalizers_for_mime_type("text/plain", self.organisation.id) # Assert self.assertEqual(len(result), 0) @@ -177,7 +126,7 @@ def test_get_normalizers_for_mime_type_response_is_none(self, mock_get_normalize mock_get_normalizers_by_org_id_and_type.return_value = None # Act - result = self.scheduler.get_normalizers_for_mime_type("text/plain") + result = self.scheduler.get_normalizers_for_mime_type("text/plain", self.organisation.id) # Assert self.assertEqual(len(result), 0) @@ -199,7 +148,11 @@ def setUp(self): "scheduler.schedulers.NormalizerScheduler.get_normalizers_for_mime_type" ).start() - def test_push_tasks_for_received_raw_file(self): + self.mock_get_plugin = mock.patch( + "scheduler.context.AppContext.services.katalogus.get_plugin_by_id_and_org_id" + ).start() + + def test_process_raw_data(self): # Arrange ooi = OOIFactory(scan_profile=ScanProfileFactory(level=0)) boefje = BoefjeFactory() @@ -208,7 +161,7 @@ def test_push_tasks_for_received_raw_file(self): # Arrange: create the RawDataReceivedEvent raw_data_event = models.RawDataReceivedEvent( raw_data=RawDataFactory(boefje_meta=boefje_meta, mime_types=[{"value": "text/plain"}]), - organization=self.organisation.name, + organization=self.organisation.id, created_at=datetime.datetime.now(), ).model_dump_json() @@ -217,7 +170,7 @@ def test_push_tasks_for_received_raw_file(self): self.mock_get_normalizers_for_mime_type.return_value = [plugin] # Act - self.scheduler.push_tasks_for_received_raw_data(raw_data_event) + self.scheduler.process_raw_data(raw_data_event) # Task should be on priority queue task_pq = self.scheduler.queue.peek(0) @@ -228,7 +181,7 @@ def test_push_tasks_for_received_raw_file(self): self.assertEqual(task_db.id, task_pq.id) self.assertEqual(task_db.status, models.TaskStatus.QUEUED) - def test_push_tasks_for_received_raw_file_no_normalizers_found(self): + def test_process_raw_data_no_normalizers_found(self): # Arrange ooi = OOIFactory(scan_profile=ScanProfileFactory(level=0)) boefje = BoefjeFactory() @@ -236,7 +189,7 @@ def test_push_tasks_for_received_raw_file_no_normalizers_found(self): raw_data_event = models.RawDataReceivedEvent( raw_data=RawDataFactory(boefje_meta=boefje_meta, mime_types=[{"value": "text/plain"}]), - organization=self.organisation.name, + organization=self.organisation.id, created_at=datetime.datetime.now(), ).model_dump_json() @@ -244,19 +197,21 @@ def test_push_tasks_for_received_raw_file_no_normalizers_found(self): self.mock_get_normalizers_for_mime_type.return_value = [] # Act - self.scheduler.push_tasks_for_received_raw_data(raw_data_event) + self.scheduler.process_raw_data(raw_data_event) # Task should not be on priority queue self.assertEqual(0, self.scheduler.queue.qsize()) - def test_push_tasks_for_received_raw_file_not_allowed_to_run(self): + def test_process_raw_data_not_allowed_to_run(self): # Arrange scan_profile = ScanProfileFactory(level=0) ooi = OOIFactory(scan_profile=scan_profile) boefje = BoefjeFactory() boefje_task = models.BoefjeTask(boefje=boefje, input_ooi=ooi.primary_key, organization=self.organisation.id) - task = functions.create_task(scheduler_id=self.scheduler.scheduler_id, data=boefje_task) + task = functions.create_task( + scheduler_id=self.scheduler.scheduler_id, data=boefje_task, organisation=self.organisation.id + ) self.mock_ctx.datastores.task_store.create_task(task) boefje_meta = BoefjeMetaFactory(boefje=boefje, input_ooi=ooi.primary_key) @@ -264,7 +219,7 @@ def test_push_tasks_for_received_raw_file_not_allowed_to_run(self): # Mocks raw_data_event = models.RawDataReceivedEvent( raw_data=RawDataFactory(boefje_meta=boefje_meta, mime_types=[{"value": "text/plain"}]), - organization=self.organisation.name, + organization=self.organisation.id, created_at=datetime.datetime.now(), ).model_dump_json() @@ -272,19 +227,21 @@ def test_push_tasks_for_received_raw_file_not_allowed_to_run(self): self.mock_has_normalizer_permission_to_run.return_value = False # Act - self.scheduler.push_tasks_for_received_raw_data(raw_data_event) + self.scheduler.process_raw_data(raw_data_event) # Task should not be on priority queue self.assertEqual(0, self.scheduler.queue.qsize()) - def test_push_tasks_for_received_raw_file_still_running(self): + def test_process_raw_data_still_running(self): # Arrange scan_profile = ScanProfileFactory(level=0) ooi = OOIFactory(scan_profile=scan_profile) boefje = BoefjeFactory() boefje_task = models.BoefjeTask(boefje=boefje, input_ooi=ooi.primary_key, organization=self.organisation.id) - task = functions.create_task(scheduler_id=self.scheduler.scheduler_id, data=boefje_task) + task = functions.create_task( + scheduler_id=self.scheduler.scheduler_id, data=boefje_task, organisation=self.organisation.id + ) self.mock_ctx.datastores.task_store.create_task(task) boefje_meta = BoefjeMetaFactory(boefje=boefje, input_ooi=ooi.primary_key) @@ -292,7 +249,7 @@ def test_push_tasks_for_received_raw_file_still_running(self): # Mocks raw_data_event = models.RawDataReceivedEvent( raw_data=RawDataFactory(boefje_meta=boefje_meta, mime_types=[{"value": "text/plain"}]), - organization=self.organisation.name, + organization=self.organisation.id, created_at=datetime.datetime.now(), ).model_dump_json() @@ -301,19 +258,21 @@ def test_push_tasks_for_received_raw_file_still_running(self): self.mock_has_normalizer_task_started_running.return_value = True # Act - self.scheduler.push_tasks_for_received_raw_data(raw_data_event) + self.scheduler.process_raw_data(raw_data_event) # Task should not be on priority queue self.assertEqual(0, self.scheduler.queue.qsize()) - def test_push_tasks_for_received_raw_file_still_running_exception(self): + def test_process_raw_data_still_running_exception(self): # Arrange scan_profile = ScanProfileFactory(level=0) ooi = OOIFactory(scan_profile=scan_profile) boefje = BoefjeFactory() boefje_task = models.BoefjeTask(boefje=boefje, input_ooi=ooi.primary_key, organization=self.organisation.id) - task = functions.create_task(scheduler_id=self.scheduler.scheduler_id, data=boefje_task) + task = functions.create_task( + scheduler_id=self.scheduler.scheduler_id, data=boefje_task, organisation=self.organisation.id + ) self.mock_ctx.datastores.task_store.create_task(task) boefje_meta = BoefjeMetaFactory(boefje=boefje, input_ooi=ooi.primary_key) @@ -321,7 +280,7 @@ def test_push_tasks_for_received_raw_file_still_running_exception(self): # Mocks raw_data_event = models.RawDataReceivedEvent( raw_data=RawDataFactory(boefje_meta=boefje_meta, mime_types=[{"value": "text/plain"}]), - organization=self.organisation.name, + organization=self.organisation.id, created_at=datetime.datetime.now(), ).model_dump_json() @@ -330,12 +289,12 @@ def test_push_tasks_for_received_raw_file_still_running_exception(self): self.mock_has_normalizer_task_started_running.side_effect = Exception("Something went wrong") # Act - self.scheduler.push_tasks_for_received_raw_data(raw_data_event) + self.scheduler.process_raw_data(raw_data_event) # Task should not be on priority queue self.assertEqual(0, self.scheduler.queue.qsize()) - def test_push_tasks_for_received_raw_file_item_on_queue(self): + def test_process_raw_data_item_on_queue(self): # Arrange ooi = OOIFactory(scan_profile=ScanProfileFactory(level=0)) boefje = BoefjeFactory() @@ -343,13 +302,13 @@ def test_push_tasks_for_received_raw_file_item_on_queue(self): raw_data_event1 = models.RawDataReceivedEvent( raw_data=RawDataFactory(boefje_meta=boefje_meta, mime_types=[{"value": "text/plain"}]), - organization=self.organisation.name, + organization=self.organisation.id, created_at=datetime.datetime.now(), ).model_dump_json() raw_data_event2 = models.RawDataReceivedEvent( raw_data=RawDataFactory(boefje_meta=boefje_meta, mime_types=[{"value": "text/plain"}]), - organization=self.organisation.name, + organization=self.organisation.id, created_at=datetime.datetime.now(), ).model_dump_json() @@ -357,8 +316,8 @@ def test_push_tasks_for_received_raw_file_item_on_queue(self): self.mock_get_normalizers_for_mime_type.return_value = [NormalizerFactory()] # Act - self.scheduler.push_tasks_for_received_raw_data(raw_data_event1) - self.scheduler.push_tasks_for_received_raw_data(raw_data_event2) + self.scheduler.process_raw_data(raw_data_event1) + self.scheduler.process_raw_data(raw_data_event2) # Task should be on priority queue (only one) task_pq = self.scheduler.queue.peek(0) @@ -369,31 +328,33 @@ def test_push_tasks_for_received_raw_file_item_on_queue(self): self.assertEqual(task_db.id, task_pq.id) self.assertEqual(task_db.status, models.TaskStatus.QUEUED) - def test_push_tasks_for_received_raw_file_error_mimetype(self): + def test_process_raw_data_error_mimetype(self): # Arrange scan_profile = ScanProfileFactory(level=0) ooi = OOIFactory(scan_profile=scan_profile) boefje = BoefjeFactory() boefje_task = models.BoefjeTask(boefje=boefje, input_ooi=ooi.primary_key, organization=self.organisation.id) - task = functions.create_task(scheduler_id=self.scheduler.scheduler_id, data=boefje_task) + task = functions.create_task( + scheduler_id=self.scheduler.scheduler_id, data=boefje_task, organisation=self.organisation.id + ) self.mock_ctx.datastores.task_store.create_task(task) boefje_meta = BoefjeMetaFactory(boefje=boefje, input_ooi=ooi.primary_key) raw_data_event = models.RawDataReceivedEvent( raw_data=RawDataFactory(boefje_meta=boefje_meta, mime_types=[{"value": "error/unknown"}]), - organization=self.organisation.name, + organization=self.organisation.id, created_at=datetime.datetime.now(), ).model_dump_json() # Act - self.scheduler.push_tasks_for_received_raw_data(raw_data_event) + self.scheduler.process_raw_data(raw_data_event) # Task should not be on priority queue self.assertEqual(0, self.scheduler.queue.qsize()) - def test_push_tasks_for_received_raw_file_queue_full(self): + def test_process_raw_data_queue_full(self): events = [] for _ in range(0, 2): # Arrange @@ -401,14 +362,16 @@ def test_push_tasks_for_received_raw_file_queue_full(self): ooi = OOIFactory(scan_profile=scan_profile) boefje = BoefjeFactory() boefje_task = models.BoefjeTask(boefje=boefje, input_ooi=ooi.primary_key, organization=self.organisation.id) - task = functions.create_task(scheduler_id=self.scheduler.scheduler_id, data=boefje_task) + task = functions.create_task( + scheduler_id=self.scheduler.scheduler_id, data=boefje_task, organisation=self.organisation.id + ) self.mock_ctx.datastores.task_store.create_task(task) boefje_meta = BoefjeMetaFactory(boefje=boefje, input_ooi=ooi.primary_key) raw_data_event = models.RawDataReceivedEvent( raw_data=RawDataFactory(boefje_meta=boefje_meta, mime_types=[{"value": "text/plain"}]), - organization=self.organisation.name, + organization=self.organisation.id, created_at=datetime.datetime.now(), ).model_dump_json() @@ -421,13 +384,13 @@ def test_push_tasks_for_received_raw_file_queue_full(self): self.mock_get_normalizers_for_mime_type.return_value = [NormalizerFactory()] # Act - self.scheduler.push_tasks_for_received_raw_data(events[0]) + self.scheduler.process_raw_data(events[0]) # Assert self.assertEqual(1, self.scheduler.queue.qsize()) with capture_logs() as cm: - self.scheduler.push_tasks_for_received_raw_data(events[1]) + self.scheduler.process_raw_data(events[1]) - self.assertIn("Could not add task to queue, queue was full", cm[-1].get("event")) + self.assertIn("Queue is full", cm[-1].get("event")) self.assertEqual(1, self.scheduler.queue.qsize()) diff --git a/mula/tests/integration/test_pq_store.py b/mula/tests/integration/test_pq_store.py index 0ace0867758..8fd9b6e6d91 100644 --- a/mula/tests/integration/test_pq_store.py +++ b/mula/tests/integration/test_pq_store.py @@ -38,7 +38,7 @@ def tearDown(self): def test_push(self): # Arrange - item = functions.create_item(scheduler_id=uuid.uuid4().hex, priority=1) + item = functions.create_task(scheduler_id=uuid.uuid4().hex, organisation=self.organisation.id, priority=1) item.status = models.TaskStatus.QUEUED created_item = self.mock_ctx.datastores.pq_store.push(item) @@ -50,7 +50,7 @@ def test_push(self): self.assertEqual(item_db.id, created_item.id) def test_push_status_not_queued(self): - item = functions.create_item(scheduler_id=uuid.uuid4().hex, priority=1) + item = functions.create_task(scheduler_id=uuid.uuid4().hex, organisation=self.organisation.id, priority=1) item.status = models.TaskStatus.PENDING created_item = self.mock_ctx.datastores.pq_store.push(item) @@ -62,24 +62,26 @@ def test_push_status_not_queued(self): def test_pop(self): # Arrange - item = functions.create_item(scheduler_id=uuid.uuid4().hex, priority=1) + item = functions.create_task(scheduler_id=uuid.uuid4().hex, organisation=self.organisation.id, priority=1) item.status = models.TaskStatus.QUEUED created_item = self.mock_ctx.datastores.pq_store.push(item) - popped_item = self.mock_ctx.datastores.pq_store.pop(item.scheduler_id) + popped_items, count = self.mock_ctx.datastores.pq_store.pop(item.scheduler_id) # Assert - self.assertIsNotNone(popped_item) - self.assertEqual(popped_item.id, created_item.id) + self.assertIsNotNone(popped_items) + self.assertEqual(count, 1) + self.assertEqual(popped_items[0].id, created_item.id) def test_pop_status_not_queued(self): # Arrange - item = functions.create_item(scheduler_id=uuid.uuid4().hex, priority=1) + item = functions.create_task(scheduler_id=uuid.uuid4().hex, organisation=self.organisation.id, priority=1) item.status = models.TaskStatus.PENDING created_item = self.mock_ctx.datastores.pq_store.push(item) - popped_item = self.mock_ctx.datastores.pq_store.pop(item.scheduler_id) + popped_items, count = self.mock_ctx.datastores.pq_store.pop(item.scheduler_id) # Assert self.assertIsNotNone(created_item) - self.assertIsNone(popped_item) + self.assertEqual(count, 0) + self.assertEqual(len(popped_items), 0) diff --git a/mula/tests/integration/test_report_scheduler.py b/mula/tests/integration/test_report_scheduler.py index ee35f7ab25a..269d0dd9759 100644 --- a/mula/tests/integration/test_report_scheduler.py +++ b/mula/tests/integration/test_report_scheduler.py @@ -29,10 +29,10 @@ def setUp(self): ) # Scheduler + self.scheduler = schedulers.ReportScheduler(ctx=self.mock_ctx) + + # Organisation self.organisation = OrganisationFactory() - self.scheduler = schedulers.ReportScheduler( - ctx=self.mock_ctx, scheduler_id=self.organisation.id, organisation=self.organisation - ) def tearDown(self): self.scheduler.stop() @@ -51,48 +51,16 @@ def setUp(self): def tearDown(self): mock.patch.stopall() - def test_enable_scheduler(self): - # Disable scheduler first - self.scheduler.disable() - - # Threads should be stopped - self.assertEqual(0, len(self.scheduler.threads)) - - # Queue should be empty - self.assertEqual(0, self.scheduler.queue.qsize()) - - # Re-enable scheduler - self.scheduler.enable() - - # Threads should be started - self.assertGreater(len(self.scheduler.threads), 0) - - # Scheduler should be enabled - self.assertTrue(self.scheduler.is_enabled()) - - # Stop the scheduler - self.scheduler.stop() - - def test_disable_scheduler(self): - # Disable scheduler - self.scheduler.disable() - - # Threads should be stopped - self.assertEqual(0, len(self.scheduler.threads)) - - # Queue should be empty - self.assertEqual(0, self.scheduler.queue.qsize()) - - # Scheduler should be disabled - self.assertFalse(self.scheduler.is_enabled()) - - def test_push_tasks_for_rescheduling(self): + def test_process_rescheduling(self): """When the deadline of schedules have passed, the resulting task should be added to the queue""" # Arrange report_task = models.ReportTask(organisation_id=self.organisation.id, report_recipe_id="123") schedule = models.Schedule( - scheduler_id=self.scheduler.scheduler_id, hash=report_task.hash, data=report_task.model_dump() + scheduler_id=self.scheduler.scheduler_id, + hash=report_task.hash, + data=report_task.model_dump(), + organisation=self.organisation.id, ) schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule) @@ -101,7 +69,7 @@ def test_push_tasks_for_rescheduling(self): self.mock_get_schedules.return_value = ([schedule_db], 1) # Act - self.scheduler.push_tasks_for_rescheduling() + self.scheduler.process_rescheduling() # Assert: new item should be on queue self.assertEqual(1, self.scheduler.queue.qsize()) @@ -115,13 +83,16 @@ def test_push_tasks_for_rescheduling(self): self.assertIsNotNone(task_db) self.assertEqual(peek.id, task_db.id) - def test_push_tasks_for_rescheduling_item_on_queue(self): + def test_process_rescheduling_item_on_queue(self): """When the deadline of schedules have passed, the resulting task should be added to the queue""" # Arrange report_task = models.ReportTask(organisation_id=self.organisation.id, report_recipe_id="123") schedule = models.Schedule( - scheduler_id=self.scheduler.scheduler_id, hash=report_task.hash, data=report_task.model_dump() + scheduler_id=self.scheduler.scheduler_id, + hash=report_task.hash, + data=report_task.model_dump(), + organisation=self.organisation.id, ) schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule) @@ -130,7 +101,7 @@ def test_push_tasks_for_rescheduling_item_on_queue(self): self.mock_get_schedules.return_value = ([schedule_db], 1) # Act - self.scheduler.push_tasks_for_rescheduling() + self.scheduler.process_rescheduling() # Assert: new item should be on queue self.assertEqual(1, self.scheduler.queue.qsize()) @@ -145,7 +116,7 @@ def test_push_tasks_for_rescheduling_item_on_queue(self): self.assertEqual(peek.id, task_db.id) # Act: push again - self.scheduler.push_tasks_for_rescheduling() + self.scheduler.process_rescheduling() # Should only be one task on queue self.assertEqual(1, self.scheduler.queue.qsize()) diff --git a/mula/tests/integration/test_schedule_store.py b/mula/tests/integration/test_schedule_store.py index df957b82171..e6bf2e894a1 100644 --- a/mula/tests/integration/test_schedule_store.py +++ b/mula/tests/integration/test_schedule_store.py @@ -6,6 +6,7 @@ from scheduler import config, models, storage from scheduler.storage import filters, stores +from tests.factories.organisation import OrganisationFactory from tests.utils import functions @@ -28,27 +29,40 @@ def setUp(self): } ) + # Organisation + self.organisation = OrganisationFactory() + def tearDown(self): models.Base.metadata.drop_all(self.dbconn.engine) self.dbconn.engine.dispose() def test_create_schedule_calculate_deadline_at(self): """When a schedule is created, the deadline_at should be calculated.""" - schedule = models.Schedule(scheduler_id="test_scheduler_id", schedule="* * * * *", data={}) + schedule = models.Schedule( + scheduler_id="test_scheduler_id", organisation=self.organisation.id, schedule="* * * * *", data={} + ) self.assertIsNotNone(schedule.deadline_at) def test_create_schedule_explicit_deadline_at(self): """When a schedule is created, the deadline_at should be set if it is provided.""" now = datetime.now(timezone.utc) - schedule = models.Schedule(scheduler_id="test_scheduler_id", data={}, deadline_at=now) + schedule = models.Schedule( + scheduler_id="test_scheduler_id", organisation=self.organisation.id, data={}, deadline_at=now + ) self.assertEqual(schedule.deadline_at, now) def test_create_schedule_deadline_at_takes_precedence(self): """When a schedule is created, the deadline_at should be set if it is provided.""" now = datetime.now(timezone.utc) - schedule = models.Schedule(scheduler_id="test_scheduler_id", schedule="* * * * *", data={}, deadline_at=now) + schedule = models.Schedule( + scheduler_id="test_scheduler_id", + schedule="* * * * *", + organisation=self.organisation.id, + data={}, + deadline_at=now, + ) self.assertEqual(schedule.deadline_at, now) @@ -56,8 +70,10 @@ def test_create_schedule(self): # Arrange scheduler_id = "test_scheduler_id" - task = functions.create_item(scheduler_id, 1) - schedule = models.Schedule(scheduler_id=scheduler_id, hash=task.hash, data=task.model_dump()) + task = functions.create_task(scheduler_id=scheduler_id, organisation=self.organisation.id, priority=1) + schedule = models.Schedule( + scheduler_id=scheduler_id, organisation=self.organisation.id, hash=task.hash, data=task.model_dump() + ) # Act schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule) @@ -69,14 +85,18 @@ def test_get_schedules(self): # Arrange scheduler_one = "test_scheduler_one" for i in range(5): - task = functions.create_item(scheduler_one, 1) - schedule = models.Schedule(scheduler_id=scheduler_one, hash=task.hash, data=task.model_dump()) + task = functions.create_task(scheduler_id=scheduler_one, organisation=self.organisation.id, priority=1) + schedule = models.Schedule( + scheduler_id=scheduler_one, organisation=self.organisation.id, hash=task.hash, data=task.model_dump() + ) self.mock_ctx.datastores.schedule_store.create_schedule(schedule) scheduler_two = "test_scheduler_two" for i in range(5): - task = functions.create_item(scheduler_two, 1) - schedule = models.Schedule(scheduler_id=scheduler_two, hash=task.hash, data=task.model_dump()) + task = functions.create_task(scheduler_id=scheduler_two, organisation=self.organisation.id, priority=1) + schedule = models.Schedule( + scheduler_id=scheduler_two, organisation=self.organisation.id, hash=task.hash, data=task.model_dump() + ) self.mock_ctx.datastores.schedule_store.create_schedule(schedule) schedules_scheduler_one, schedules_scheduler_one_count = self.mock_ctx.datastores.schedule_store.get_schedules( @@ -99,8 +119,10 @@ def test_get_schedules(self): def test_get_schedule(self): # Arrange scheduler_id = "test_scheduler_id" - task = functions.create_item(scheduler_id, 1) - schedule = models.Schedule(scheduler_id=scheduler_id, hash=task.hash, data=task.model_dump()) + task = functions.create_task(scheduler_id=scheduler_id, organisation=self.organisation.id, priority=1) + schedule = models.Schedule( + scheduler_id=scheduler_id, organisation=self.organisation.id, hash=task.hash, data=task.model_dump() + ) schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule) # Act @@ -113,7 +135,9 @@ def test_get_schedule_by_hash(self): # Arrange scheduler_id = "test_scheduler_id" data = functions.create_test_model() - schedule = models.Schedule(scheduler_id=scheduler_id, hash=data.hash, data=data.model_dump()) + schedule = models.Schedule( + scheduler_id=scheduler_id, organisation=self.organisation.id, hash=data.hash, data=data.model_dump() + ) schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule) # Act @@ -127,8 +151,10 @@ def test_get_schedule_by_hash(self): def test_update_schedule(self): # Arrange scheduler_id = "test_scheduler_id" - task = functions.create_item(scheduler_id, 1) - schedule = models.Schedule(scheduler_id=scheduler_id, hash=task.hash, data=task.model_dump()) + task = functions.create_task(scheduler_id=scheduler_id, organisation=self.organisation.id, priority=1) + schedule = models.Schedule( + scheduler_id=scheduler_id, organisation=self.organisation.id, hash=task.hash, data=task.model_dump() + ) schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule) # Assert @@ -145,8 +171,10 @@ def test_update_schedule(self): def test_delete_schedule(self): # Arrange scheduler_id = "test_scheduler_id" - task = functions.create_item(scheduler_id, 1) - schedule = models.Schedule(scheduler_id=scheduler_id, hash=task.hash, data=task.model_dump()) + task = functions.create_task(scheduler_id=scheduler_id, organisation=self.organisation.id, priority=1) + schedule = models.Schedule( + scheduler_id=scheduler_id, organisation=self.organisation.id, hash=task.hash, data=task.model_dump() + ) schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule) # Act @@ -160,8 +188,10 @@ def test_delete_schedule_ondelete(self): """When a schedule is deleted, its tasks should NOT be deleted.""" # Arrange scheduler_id = "test_scheduler_id" - task = functions.create_item(scheduler_id, 1) - schedule = models.Schedule(scheduler_id=scheduler_id, hash=task.hash, data=task.model_dump()) + task = functions.create_task(scheduler_id=scheduler_id, organisation=self.organisation.id, priority=1) + schedule = models.Schedule( + scheduler_id=scheduler_id, organisation=self.organisation.id, hash=task.hash, data=task.model_dump() + ) schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule) task.schedule_id = schedule_db.id @@ -178,11 +208,16 @@ def test_delete_schedule_ondelete(self): self.assertIsNotNone(is_task_deleted) self.assertIsNone(is_task_deleted.schedule_id) + # NOTE: skipping this test until task relationship is re-enabled, disabled + # it for now when we use the model relationship + @unittest.skip("Disabled until task relationship is re-enabled") def test_relationship_schedule_tasks(self): # Arrange scheduler_id = "test_scheduler_id" - task = functions.create_task(scheduler_id) - schedule = models.Schedule(scheduler_id=scheduler_id, hash=task.hash, data=task.model_dump()) + task = functions.create_task(scheduler_id=scheduler_id, organisation=self.organisation.id) + schedule = models.Schedule( + scheduler_id=scheduler_id, organisation=self.organisation.id, hash=task.hash, data=task.model_dump() + ) schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule) task.schedule_id = schedule_db.id @@ -198,8 +233,10 @@ def test_relationship_schedule_tasks(self): def test_get_tasks_filter_related(self): # Arrange scheduler_id = "test_scheduler_id" - task = functions.create_task(scheduler_id) - schedule = models.Schedule(scheduler_id=scheduler_id, hash=task.hash, data=task.model_dump()) + task = functions.create_task(scheduler_id=scheduler_id, organisation=self.organisation.id) + schedule = models.Schedule( + scheduler_id=scheduler_id, organisation=self.organisation.id, hash=task.hash, data=task.model_dump() + ) schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule) task.schedule_id = schedule_db.id diff --git a/mula/tests/integration/test_scheduler.py b/mula/tests/integration/test_scheduler.py index aecda637a09..bec6e2346bc 100644 --- a/mula/tests/integration/test_scheduler.py +++ b/mula/tests/integration/test_scheduler.py @@ -5,10 +5,10 @@ from unittest import mock from scheduler import config, models, storage -from scheduler.schedulers.queue import InvalidItemError, NotAllowedError, QueueEmptyError, QueueFullError +from scheduler.schedulers.queue import InvalidItemError, QueueEmptyError, QueueFullError from scheduler.storage import stores -from structlog.testing import capture_logs +from tests.factories import OrganisationFactory from tests.mocks import item as mock_item from tests.mocks import queue as mock_queue from tests.mocks import scheduler as mock_scheduler @@ -49,6 +49,9 @@ def setUp(self): ctx=self.mock_ctx, scheduler_id=identifier, queue=queue, create_schedule=True ) + # Organisation + self.organisation = OrganisationFactory() + def tearDown(self): self.scheduler.stop() models.Base.metadata.drop_all(self.dbconn.engine) @@ -58,7 +61,9 @@ def test_push_items_to_queue(self): # Arrange items = [] for i in range(10): - item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=i + 1) + item = functions.create_task( + scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=i + 1 + ) items.append(item) # Act @@ -84,7 +89,9 @@ def test_push_items_to_queue(self): def test_push_item_to_queue(self): # Arrange - item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1) + item = functions.create_task( + scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1 + ) # Act self.scheduler.push_item_to_queue(item) @@ -108,7 +115,9 @@ def test_push_item_to_queue_create_schedule_false(self): # Arrange self.scheduler.create_schedule = False - item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1) + item = functions.create_task( + scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1 + ) # Act self.scheduler.push_item_to_queue(item) @@ -130,7 +139,9 @@ def test_push_item_to_queue_create_schedule_false(self): def test_push_item_to_queue_full(self): # Arrange - item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1) + item = functions.create_task( + scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1 + ) self.scheduler.queue.maxsize = 1 @@ -147,7 +158,9 @@ def test_push_item_to_queue_full(self): def test_push_item_to_queue_invalid(self): # Arrange - item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1) + item = functions.create_task( + scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1 + ) item.data = {"invalid": "data"} # Assert @@ -156,16 +169,24 @@ def test_push_item_to_queue_invalid(self): def test_pop_item_from_queue(self): # Arrange - item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1) + item = functions.create_task( + scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1 + ) self.scheduler.push_item_to_queue(item) # Act - popped_item = self.scheduler.pop_item_from_queue() + popped_items, count = self.scheduler.pop_item_from_queue() # Assert self.assertEqual(0, self.scheduler.queue.qsize()) - self.assertEqual(item.id, popped_item.id) + self.assertEqual(1, count) + self.assertEqual(1, len(popped_items)) + self.assertEqual(popped_items[0].id, item.id) + + # Status should be dispatched + task_db = self.mock_ctx.datastores.task_store.get_task(str(item.id)) + self.assertEqual(task_db.status, models.TaskStatus.DISPATCHED) def test_pop_item_from_queue_empty(self): self.assertEqual(0, self.scheduler.queue.qsize()) @@ -175,7 +196,9 @@ def test_pop_item_from_queue_empty(self): def test_post_push(self): """When a task is added to the queue, it should be added to the database""" # Arrange - item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1) + item = functions.create_task( + scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1 + ) # Act self.scheduler.push_item_to_queue(item) @@ -207,7 +230,9 @@ def test_post_push(self): def test_post_push_schedule_enabled(self): # Arrange - item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1) + item = functions.create_task( + scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1 + ) # Act self.scheduler.push_item_to_queue(item) @@ -237,35 +262,11 @@ def test_post_push_schedule_enabled(self): # grace period self.assertGreater(schedule_db.deadline_at, datetime.now(timezone.utc)) - def test_post_push_schedule_disabled(self): - # Arrange - first_item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1) - - # Act - first_item_db = self.scheduler.push_item_to_queue(first_item) - - initial_schedule_db = self.mock_ctx.datastores.schedule_store.get_schedule(first_item_db.schedule_id) - - # Pop - self.scheduler.pop_item_from_queue() - - # Disable this schedule - initial_schedule_db.enabled = False - self.mock_ctx.datastores.schedule_store.update_schedule(initial_schedule_db) - - # Act - second_item = first_item_db.model_copy() - second_item.id = uuid.uuid4() - second_item_db = self.scheduler.push_item_to_queue(second_item) - - with capture_logs() as cm: - self.scheduler.post_push(second_item_db) - - self.assertIn("is disabled, not updating deadline", cm[-1].get("event")) - def test_post_push_schedule_update_schedule(self): # Arrange - first_item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1) + first_item = functions.create_task( + scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1 + ) # Act first_item_db = self.scheduler.push_item_to_queue(first_item) @@ -294,10 +295,16 @@ def test_post_push_schedule_update_schedule(self): def test_post_push_schedule_is_not_none(self): """When a schedule is provided, it should be used to set the deadline""" # Arrange - first_item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1) + first_item = functions.create_task( + scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1 + ) schedule = models.Schedule( - scheduler_id=self.scheduler.scheduler_id, schedule="0 0 * * *", hash=first_item.hash, data=first_item.data + scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, + schedule="0 0 * * *", + hash=first_item.hash, + data=first_item.data, ) schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule) @@ -317,9 +324,16 @@ def test_post_push_schedule_is_not_none(self): def test_post_push_schedule_is_none(self): """When a schedule is not provided, the deadline should be set to None""" # Arrange - first_item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1) + first_item = functions.create_task( + scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1 + ) - schedule = models.Schedule(scheduler_id=self.scheduler.scheduler_id, hash=first_item.hash, data=first_item.data) + schedule = models.Schedule( + scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, + hash=first_item.hash, + data=first_item.data, + ) schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule) first_item.schedule_id = schedule_db.id @@ -336,9 +350,16 @@ def test_post_push_schedule_auto_calculate_deadline(self): # Arrange self.scheduler.auto_calculate_deadline = True - first_item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1) + first_item = functions.create_task( + scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1 + ) - schedule = models.Schedule(scheduler_id=self.scheduler.scheduler_id, hash=first_item.hash, data=first_item.data) + schedule = models.Schedule( + scheduler_id=self.scheduler.scheduler_id, + organisation=self.organisation.id, + hash=first_item.hash, + data=first_item.data, + ) schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule) first_item.schedule_id = schedule_db.id @@ -354,10 +375,8 @@ def test_post_push_schedule_auto_calculate_deadline(self): def test_post_pop(self): """When a task is popped from the queue, it should be removed from the database""" # Arrange - item = functions.create_item( - scheduler_id=self.scheduler.scheduler_id, - priority=1, - task=functions.create_task(self.scheduler.scheduler_id), + item = functions.create_task( + scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1 ) # Act @@ -381,101 +400,3 @@ def test_post_pop(self): task_db = self.mock_ctx.datastores.task_store.get_task(str(item.id)) self.assertEqual(task_db.id, item.id) self.assertEqual(task_db.status, models.TaskStatus.DISPATCHED) - - def test_disable_scheduler(self): - # Arrange: start scheduler - self.scheduler.run() - - # Arrange: add tasks - item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1) - self.scheduler.push_item_to_queue(item) - - # Assert: task should be on priority queue - pq_item = self.scheduler.queue.peek(0) - self.assertEqual(1, self.scheduler.queue.qsize()) - self.assertEqual(pq_item.id, item.id) - - # Assert: task should be in datastore, and queued - task_db = self.mock_ctx.datastores.task_store.get_task(str(item.id)) - self.assertEqual(task_db.id, item.id) - self.assertEqual(task_db.status, models.TaskStatus.QUEUED) - - # Assert: listeners should be running - self.assertGreater(len(self.scheduler.listeners), 0) - - # Assert: threads should be running - self.assertGreater(len(self.scheduler.threads), 0) - - # Act - self.scheduler.disable() - - # Listeners should be stopped - self.assertEqual(0, len(self.scheduler.listeners)) - - # Threads should be stopped - self.assertEqual(0, len(self.scheduler.threads)) - - # Queue should be empty - self.assertEqual(0, self.scheduler.queue.qsize()) - - # All tasks on queue should be set to CANCELLED - tasks, _ = self.mock_ctx.datastores.task_store.get_tasks(self.scheduler.scheduler_id) - for task in tasks: - self.assertEqual(task.status, models.TaskStatus.CANCELLED) - - # Scheduler should be disabled - self.assertFalse(self.scheduler.is_enabled()) - - with self.assertRaises(NotAllowedError): - self.scheduler.push_item_to_queue(item) - - def test_enable_scheduler(self): - # Arrange: start scheduler - self.scheduler.run() - - # Arrange: add tasks - item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1) - self.scheduler.push_item_to_queue(item) - - # Assert: listeners should be running - self.assertGreater(len(self.scheduler.listeners), 0) - - # Assert: threads should be running - self.assertGreater(len(self.scheduler.threads), 0) - - # Disable scheduler first - self.scheduler.disable() - - # Listeners should be stopped - self.assertEqual(0, len(self.scheduler.listeners)) - - # Threads should be stopped - self.assertEqual(0, len(self.scheduler.threads)) - - # Queue should be empty - self.assertEqual(0, self.scheduler.queue.qsize()) - - # All tasks on queue should be set to CANCELLED - tasks, _ = self.mock_ctx.datastores.task_store.get_tasks(self.scheduler.scheduler_id) - for task in tasks: - self.assertEqual(task.status, models.TaskStatus.CANCELLED) - - # Re-enable scheduler - self.scheduler.enable() - - # Threads should be started - self.assertGreater(len(self.scheduler.threads), 0) - - # Scheduler should be enabled - self.assertTrue(self.scheduler.is_enabled()) - - # Push item to the queue - self.scheduler.push_item_to_queue(item) - - # Assert: task should be on priority queue - pq_item = self.scheduler.queue.peek(0) - self.assertEqual(1, self.scheduler.queue.qsize()) - self.assertEqual(pq_item.id, item.id) - - # Stop the scheduler - self.scheduler.stop() diff --git a/mula/tests/integration/test_task_store.py b/mula/tests/integration/test_task_store.py index c672fc78557..30d7cc81857 100644 --- a/mula/tests/integration/test_task_store.py +++ b/mula/tests/integration/test_task_store.py @@ -37,14 +37,14 @@ def tearDown(self): self.dbconn.engine.dispose() def test_create_task(self): - task = functions.create_task(scheduler_id=self.organisation.id) + task = functions.create_task(scheduler_id=self.organisation.id, organisation=self.organisation.id) created_task = self.mock_ctx.datastores.task_store.create_task(task) self.assertIsNotNone(created_task) def test_get_tasks(self): # Arrange for i in range(5): - task = functions.create_task(scheduler_id=self.organisation.id) + task = functions.create_task(scheduler_id=self.organisation.id, organisation=self.organisation.id) self.mock_ctx.datastores.task_store.create_task(task) # Act @@ -57,7 +57,7 @@ def test_get_tasks(self): def get_tasks_by_type(self): # Arrange for i in range(5): - task = functions.create_task(scheduler_id=self.organisation.id) + task = functions.create_task(scheduler_id=self.organisation.id, organisation=self.organisation.id) self.mock_ctx.datastores.task_store.create_task(task) # Act @@ -74,7 +74,9 @@ def test_get_tasks_by_hash(self): hashes = [] data = functions.create_test_model() for i in range(5): - task = functions.create_task(scheduler_id=self.organisation.id, data=data) + task = functions.create_task( + scheduler_id=self.organisation.id, organisation=self.organisation.id, data=data + ) self.mock_ctx.datastores.task_store.create_task(task) hashes.append(task.hash) @@ -89,7 +91,7 @@ def test_get_tasks_by_hash(self): def test_get_task(self): # Arrange - task = functions.create_task(scheduler_id=self.organisation.id) + task = functions.create_task(scheduler_id=self.organisation.id, organisation=self.organisation.id) created_task = self.mock_ctx.datastores.task_store.create_task(task) # Act @@ -103,7 +105,9 @@ def test_get_latest_task_by_hash(self): hashes = [] data = functions.create_test_model() for i in range(5): - task = functions.create_task(scheduler_id=self.organisation.id, data=data) + task = functions.create_task( + scheduler_id=self.organisation.id, organisation=self.organisation.id, data=data + ) self.mock_ctx.datastores.task_store.create_task(task) hashes.append(task.hash) @@ -118,7 +122,7 @@ def test_get_latest_task_by_hash(self): def test_update_task(self): # Arrange - task = functions.create_task(scheduler_id=self.organisation.id) + task = functions.create_task(scheduler_id=self.organisation.id, organisation=self.organisation.id) created_task = self.mock_ctx.datastores.task_store.create_task(task) # Act @@ -131,7 +135,7 @@ def test_update_task(self): def test_cancel_task(self): # Arrange - task = functions.create_task(scheduler_id=self.organisation.id) + task = functions.create_task(scheduler_id=self.organisation.id, organisation=self.organisation.id) created_task = self.mock_ctx.datastores.task_store.create_task(task) # Act @@ -163,6 +167,7 @@ def test_get_status_counts(self): data = functions.create_test_model() task = models.Task( scheduler_id=self.organisation.id, + organisation=self.organisation.id, priority=1, status=status, type=functions.TestModel.type, @@ -203,6 +208,7 @@ def test_get_status_count_per_hour(self): data = functions.create_test_model() task = models.Task( scheduler_id=self.organisation.id, + organisation=self.organisation.id, priority=1, status=status, type=functions.TestModel.type, diff --git a/mula/tests/unit/test_queue.py b/mula/tests/unit/test_queue.py index 2861d442257..c55e6a7947b 100644 --- a/mula/tests/unit/test_queue.py +++ b/mula/tests/unit/test_queue.py @@ -43,7 +43,7 @@ def _check_queue_empty(self): def test_push(self): """When adding an item to the priority queue, the item should be added""" - item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) self.pq.push(item) item_db = self.pq_store.get(self.pq.pq_id, item.id) @@ -57,7 +57,7 @@ def test_push_item_not_found_in_db(self, mock_push): """When adding an item to the priority queue, but the item is not found in the database, the item shouldn't be added. """ - item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) mock_push.return_value = None @@ -84,7 +84,7 @@ def test_push_invalid_item(self): """When pushing an item that can not be validated, the item shouldn't be pushed. """ - item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) item.data = {"invalid": "data"} with self.assertRaises(InvalidItemError): @@ -100,7 +100,7 @@ def test_push_replace_not_allowed(self): self.pq.allow_replace = False # Add an item to the queue - initial_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + initial_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) self.pq.push(initial_item) self.assertEqual(1, self.pq.qsize()) @@ -119,7 +119,7 @@ def test_push_replace_allowed(self): self.pq.allow_replace = True # Add an item to the queue - initial_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + initial_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) self.pq.push(initial_item) self.assertEqual(1, self.pq.qsize()) @@ -139,7 +139,7 @@ def test_push_updates_not_allowed(self): self.pq.allow_updates = False # Add an item to the queue - initial_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + initial_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) self.pq.push(initial_item) self.assertEqual(1, self.pq.qsize()) @@ -164,7 +164,7 @@ def test_push_updates_allowed(self): self.pq.allow_updates = True # Add an item to the queue - initial_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + initial_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) self.pq.push(initial_item) self.assertEqual(1, self.pq.qsize()) @@ -189,7 +189,7 @@ def test_push_priority_updates_not_allowed(self): self.pq.allow_priority_updates = False # Add an item to the queue - initial_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + initial_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) self.pq.push(initial_item) self.assertEqual(1, self.pq.qsize()) @@ -215,7 +215,7 @@ def test_push_priority_updates_allowed(self): self.pq.allow_priority_updates = True # Add an item to the queue - initial_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + initial_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) self.pq.push(initial_item) self.assertEqual(1, self.pq.qsize()) @@ -237,7 +237,7 @@ def test_remove_item(self): removed, and the item should be removed from the entry_finder. """ # Add an item to the queue - item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) self.pq.push(item) self.assertEqual(1, self.pq.qsize()) @@ -255,11 +255,11 @@ def test_push_maxsize_not_allowed(self): self.pq.maxsize = 1 # Add an item to the queue - first_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + first_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) self.pq.push(first_item) # Add another item to the queue - second_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=2) + second_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=2) with self.assertRaises(_queue.Full): self.pq.push(second_item) @@ -280,11 +280,11 @@ def test_push_maxsize_allowed(self): self.pq.maxsize = 0 # Add an item to the queue - first_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + first_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) self.pq.push(first_item) # Add another item to the queue - second_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=2) + second_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=2) self.pq.push(second_item) # The queue should now have 2 items @@ -310,11 +310,11 @@ def test_push_maxsize_allowed_high_priority(self): self.pq.maxsize = 1 # Add an item to the queue - first_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + first_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) self.pq.push(first_item) # Add another item to the queue - second_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + second_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) self.pq.push(second_item) # The queue should now have 2 items @@ -340,11 +340,11 @@ def test_push_maxsize_not_allowed_low_priority(self): self.pq.maxsize = 1 # Add an item to the queue - first_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + first_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) self.pq.push(first_item) # Add another item to the queue - second_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=2) + second_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=2) with self.assertRaises(_queue.Full): self.pq.push(second_item) @@ -362,15 +362,15 @@ def test_pop(self): it from the queue. """ # Add an item to the queue - first_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + first_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) self.pq.push(first_item) # The queue should now have 1 item self.assertEqual(1, self.pq.qsize()) # Pop the item - popped_item = self.pq.pop() - self.assertEqual(first_item.data, popped_item.data) + popped_items, _ = self.pq.pop() + self.assertEqual(first_item.data, popped_items[0].data) # The queue should now be empty self.assertEqual(0, self.pq.qsize()) @@ -380,8 +380,8 @@ def test_pop_with_lock(self): thread to pop an item. """ # Arrange - first_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) - second_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + first_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) + second_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) self.pq.push(first_item) self.pq.push(second_item) @@ -392,21 +392,21 @@ def test_pop_with_lock(self): # it will set a timeout so we can test the lock. def first_pop(event): with self.pq.lock: - item = self.pq_store.pop(self.pq.pq_id, None) + items, _ = self.pq_store.pop(self.pq.pq_id, None) event.set() time.sleep(5) - self.pq_store.remove(self.pq.pq_id, item.id) + self.pq_store.remove(self.pq.pq_id, items[0].id) - queue.put(item) + queue.put(items[0]) def second_pop(event): # Wait for thread 1 to set the event before continuing event.wait() - item = self.pq.pop() - queue.put(item) + items, _ = self.pq.pop() + queue.put(items[0]) # Act; with thread 1 we will create a lock on the queue, and then with # thread 2 we try to pop an item while the lock is active. @@ -430,8 +430,8 @@ def test_pop_without_lock(self): NOTE: Here we test the procedure when a lock isn't set. """ # Arrange - first_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) - second_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + first_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) + second_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) self.pq.push(first_item) self.pq.push(second_item) @@ -441,21 +441,21 @@ def test_pop_without_lock(self): # This function is similar to the pop() function of the queue, but # it will set a timeout. We have omitted the lock here. def first_pop(event): - item = self.pq_store.pop(self.pq.pq_id, None) + items, _ = self.pq_store.pop(self.pq.pq_id, None) event.set() time.sleep(5) - self.pq_store.remove(self.pq.pq_id, item.id) + self.pq_store.remove(self.pq.pq_id, items[0].id) - queue.put(item) + queue.put(items[0]) def second_pop(event): # Wait for thread 1 to set the event before continuing event.wait() - item = self.pq.pop() - queue.put(item) + items, _ = self.pq.pop() + queue.put(items[0]) # Act; with thread 1 we won't create a lock, and then with thread 2 we # try to pop an item while the timeout is active. @@ -484,26 +484,26 @@ def test_pop_highest_priority(self): priority """ # Add an item to the queue - first_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + first_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) self.pq.push(first_item) # Add another item to the queue - second_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=2) + second_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=2) self.pq.push(second_item) # The queue should now have 2 items self.assertEqual(2, self.pq.qsize()) # Pop the item - popped_item = self.pq.pop() - self.assertEqual(first_item.priority, popped_item.priority) + popped_items, _ = self.pq.pop() + self.assertEqual(first_item.priority, popped_items[0].priority) def test_is_item_on_queue(self): """When checking if an item is on the queue, it should return True if the item is on the queue, and False if it isn't. """ # Add an item to the queue - item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) self.pq.push(item) # Check if the item is on the queue @@ -514,7 +514,7 @@ def test_is_item_not_on_queue(self): the item is on the queue, and False if it isn't. """ # Add an item to the queue - item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1) + item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1) # Check if the item is on the queue self.assertFalse(self.pq.is_item_on_queue(item)) diff --git a/mula/tests/utils/functions.py b/mula/tests/utils/functions.py index 8eeeb875d2d..506cbf0bf52 100644 --- a/mula/tests/utils/functions.py +++ b/mula/tests/utils/functions.py @@ -34,23 +34,11 @@ def create_test_model() -> TestModel: return TestModel(id=uuid.uuid4().hex, name=uuid.uuid4().hex) -def create_task_in(priority: int, data: TestModel | None = None) -> str: +def create_task_in(priority: int, organisation: str, data: TestModel | None = None) -> str: if data is None: data = TestModel(id=uuid.uuid4().hex, name=uuid.uuid4().hex) - return json.dumps({"priority": priority, "data": data.model_dump()}) - - -def create_item(scheduler_id: str, priority: int, task: models.Task | None = None) -> models.Task: - if task is None: - task = create_task(scheduler_id) - - item = models.Task(**task.model_dump()) - - if priority is not None: - item.priority = priority - - return item + return json.dumps({"priority": priority, "organisation": organisation, "data": data.model_dump()}) def create_schedule(scheduler_id: str, data: Any | None = None) -> models.Schedule: @@ -58,11 +46,18 @@ def create_schedule(scheduler_id: str, data: Any | None = None) -> models.Schedu return models.Schedule(scheduler_id=scheduler_id, hash=item.hash, data=item.model_dump()) -def create_task(scheduler_id: str, data: Any | None = None) -> models.Task: +def create_task(scheduler_id: str, organisation: str, priority: int = 0, data: Any | None = None) -> models.Task: if data is None: data = TestModel(id=uuid.uuid4().hex, name=uuid.uuid4().hex) - return models.Task(scheduler_id=scheduler_id, type=TestModel.type, hash=data.hash, data=data.model_dump()) + return models.Task( + scheduler_id=scheduler_id, + organisation=organisation, + priority=priority, + type=TestModel.type, + hash=data.hash, + data=data.model_dump(), + ) def create_boefje() -> models.Boefje: diff --git a/octopoes/octopoes/core/service.py b/octopoes/octopoes/core/service.py index ce019172c25..1793bfd0269 100644 --- a/octopoes/octopoes/core/service.py +++ b/octopoes/octopoes/core/service.py @@ -264,21 +264,19 @@ def recalculate_scan_profiles(self, valid_time: datetime) -> None: # fetch all scan profiles all_scan_profiles = self.scan_profile_repository.list_scan_profiles(None, valid_time=valid_time) - # cache all declared - all_declared_scan_profiles = { - scan_profile for scan_profile in all_scan_profiles if isinstance(scan_profile, DeclaredScanProfile) - } - # cache all inherited - inherited_scan_profiles = { - scan_profile.reference: scan_profile - for scan_profile in all_scan_profiles - if isinstance(scan_profile, InheritedScanProfile) - } - - # track all scan level assignments - assigned_scan_levels: dict[Reference, ScanLevel] = { - scan_profile.reference: scan_profile.level for scan_profile in all_declared_scan_profiles - } + all_declared_scan_profiles: set[DeclaredScanProfile] = set() + inherited_scan_profiles: dict[Reference, InheritedScanProfile] = {} + assigned_scan_levels: dict[Reference, ScanLevel] = {} + source_scan_profile_references: set[Reference] = set() + + # fill profile caches + for scan_profile in all_scan_profiles: + if isinstance(scan_profile, DeclaredScanProfile): + all_declared_scan_profiles.add(scan_profile) + assigned_scan_levels[scan_profile.reference] = scan_profile.level + source_scan_profile_references.add(scan_profile.reference) + elif isinstance(scan_profile, InheritedScanProfile): + inherited_scan_profiles[scan_profile.reference] = scan_profile for current_level in range(4, 0, -1): # start point: all scan profiles with current level + all higher scan levels @@ -331,7 +329,6 @@ def recalculate_scan_profiles(self, valid_time: datetime) -> None: # Save all assigned scan levels update_count = 0 - source_scan_profile_references = {sp.reference for sp in all_declared_scan_profiles} for reference, scan_level in assigned_scan_levels.items(): # Skip source scan profiles if reference in source_scan_profile_references: diff --git a/octopoes/octopoes/events/manager.py b/octopoes/octopoes/events/manager.py index dc935b4c8a4..b5600da071e 100644 --- a/octopoes/octopoes/events/manager.py +++ b/octopoes/octopoes/events/manager.py @@ -26,6 +26,7 @@ class ScanProfileMutation(BaseModel): operation: OperationType primary_key: str value: AbstractOOI | None = None + client_id: str thread_local = threading.local() @@ -126,7 +127,9 @@ def _publish(self, event: DBEvent) -> None: ) # publish mutations - mutation = ScanProfileMutation(operation=event.operation_type, primary_key=event.primary_key) + mutation = ScanProfileMutation( + operation=event.operation_type, primary_key=event.primary_key, client_id=event.client + ) if event.operation_type != OperationType.DELETE: mutation.value = AbstractOOI( @@ -137,7 +140,7 @@ def _publish(self, event: DBEvent) -> None: self.channel.basic_publish( "", - f"{event.client}__scan_profile_mutations", + "scan_profile_mutations", mutation.model_dump_json().encode(), properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), ) @@ -165,4 +168,4 @@ def _try_connect(self): def _connect(self) -> None: self.channel = self.channel_factory(self.queue_uri) self.channel.queue_declare(queue=f"{self.client}__scan_profile_increments", durable=True) - self.channel.queue_declare(queue=f"{self.client}__scan_profile_mutations", durable=True) + self.channel.queue_declare(queue="scan_profile_mutations", durable=True) diff --git a/octopoes/octopoes/repositories/scan_profile_repository.py b/octopoes/octopoes/repositories/scan_profile_repository.py index c27954e0009..b8af824bc15 100644 --- a/octopoes/octopoes/repositories/scan_profile_repository.py +++ b/octopoes/octopoes/repositories/scan_profile_repository.py @@ -15,6 +15,8 @@ from octopoes.xtdb.client import XTDBSession from octopoes.xtdb.query_builder import generate_pull_query +scan_profile_adapter = TypeAdapter(ScanProfile) + class ScanProfileRepository(Repository): def __init__(self, event_manager: EventManager): @@ -62,7 +64,7 @@ def serialize(cls, scan_profile: ScanProfile) -> dict[str, Any]: @classmethod def deserialize(cls, data: dict[str, Any]) -> ScanProfileBase: - return TypeAdapter(ScanProfile).validate_python(data) + return scan_profile_adapter.validate_python(data) def list_scan_profiles(self, scan_profile_type: str | None, valid_time: datetime) -> list[ScanProfileBase]: where = {"type": self.object_type} diff --git a/octopoes/tests/robot/robot.resource b/octopoes/tests/robot/robot.resource index 44c41a9fe18..3109ffc5c53 100644 --- a/octopoes/tests/robot/robot.resource +++ b/octopoes/tests/robot/robot.resource @@ -18,7 +18,7 @@ ${RABBIT_MQ_API_URI} http://ci_user:ci_pass@localhost:29003/api ${VALID_TIME} 2022-01-01T00:00:00+00:00 ${SCAN_PROFILE_INCREMENT_QUEUE} _dev__scan_profile_increments -${SCAN_PROFILE_MUTATION_QUEUE} _dev__scan_profile_mutations +${SCAN_PROFILE_MUTATION_QUEUE} scan_profile_mutations *** Keywords *** @@ -73,7 +73,7 @@ Wait For XTDB Synced Get All Document Ids ${query} Set Variable {:query {:find [?e] :where [[?e :xt/id]]}} - ${headers} Create Dictionary Content-Type=application/edn Accept=application/json + ${headers} Create Dictionary Content-Type=application/end Accept=application/json ${response} Post ${XTDB_URI}/query data=${query} headers=${headers} ${rows} Set Variable ${response.json()} ${ids} Create List diff --git a/octopoes/tests/test_event_manager.py b/octopoes/tests/test_event_manager.py index 3d99c4f62c7..b3c5f938eff 100644 --- a/octopoes/tests/test_event_manager.py +++ b/octopoes/tests/test_event_manager.py @@ -77,10 +77,10 @@ def test_event_manager_create_empty_scan_profile(mocker, empty_scan_profile): channel_mock.basic_publish.assert_called_once_with( "", - "test__scan_profile_mutations", + "scan_profile_mutations", b'{"operation":"create","primary_key":"test|reference","value":{"primary_key":"test|reference",' b'"object_type":"test","scan_profile":{"scan_profile_type":"empty","reference":"test|reference",' - b'"level":0,"user_id":null}}}', + b'"level":0,"user_id":null}},"client_id":"test"}', properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), ) @@ -134,12 +134,12 @@ def test_event_manager_create_declared_scan_profile(mocker, declared_scan_profil ), mocker.call( "", - "test__scan_profile_mutations", + "scan_profile_mutations", b'{"operation": "create", "primary_key": "test|reference", ' b'"value": {"primary_key": "test|reference", ' b'"object_type": "test", ' b'"scan_profile": {"scan_profile_type": "declared", "reference": "test|reference",\ - "level": 2, "user_id": None}}}', + "level": 2, "user_id": None}}, "client_id": "test"}', properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), ), ) @@ -179,7 +179,7 @@ def test_event_manager_delete_empty_scan_profile(mocker, empty_scan_profile): channel_mock.basic_publish.assert_called_once_with( "", - "test__scan_profile_mutations", - b'{"operation":"delete","primary_key":"test|reference","value":null}', + "scan_profile_mutations", + b'{"operation":"delete","primary_key":"test|reference","value":null,"client_id":"test"}', properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), ) diff --git a/rocky/assets/css/themes/soft/manon/collapsing-element.scss b/rocky/assets/css/themes/soft/manon/collapsing-element.scss index ecf2afda0d2..4a11e20bd62 100644 --- a/rocky/assets/css/themes/soft/manon/collapsing-element.scss +++ b/rocky/assets/css/themes/soft/manon/collapsing-element.scss @@ -9,6 +9,29 @@ body header nav.collapsible { .collapsing-element { position: relative; + form { + &.inline { + width: 100%; + } + + button { + background: transparent; + border-radius: 0; + border: 0; + font-weight: normal; + width: 100%; + max-width: 100%; + height: var(--header-navigation-button-min-height); + padding-top: var(--collapsing-element-list-item-link-padding-top); + padding-right: var(--collapsing-element-list-item-link-padding-right); + padding-bottom: var(--collapsing-element-list-item-link-padding-bottom); + padding-left: var(--collapsing-element-list-item-link-padding-left); + color: var(--collapsing-element-list-item-link-text-color); + justify-content: flex-start; + line-height: var(--header-navigation-link-line-height); + } + } + .collapsible { position: static; } diff --git a/rocky/onboarding/views.py b/rocky/onboarding/views.py index cfc82434c2c..4aadb196b45 100644 --- a/rocky/onboarding/views.py +++ b/rocky/onboarding/views.py @@ -39,7 +39,6 @@ ) from rocky.exceptions import RockyError from rocky.messaging import clearance_level_warning_dns_report -from rocky.scheduler import scheduler_client from rocky.views.indemnification_add import IndemnificationAddView from rocky.views.ooi_view import SingleOOIMixin, SingleOOITreeMixin from rocky.views.scheduler import SchedulerView @@ -327,17 +326,12 @@ class OnboardingSetupScanOOIDetailView( permission_required = "tools.can_scan_organization" task_type = "report" - @staticmethod - def is_scheduler_enabled(organization: Organization) -> bool: - scheduler_id = f"report-{organization.code}" - return scheduler_client(organization.code).is_scheduler_ready(scheduler_id) - def post(self, request, *args, **kwargs): report_name_format = self.get_initial_report_name() parent_report_type = self.get_parent_report_type() report_recipe = self.create_report_recipe(report_name_format, parent_report_type, None) - if self.is_scheduler_enabled(self.organization): - self.create_report_schedule(report_recipe, datetime.now(timezone.utc) + timedelta(minutes=2)) + + self.create_report_schedule(report_recipe, datetime.now(timezone.utc) + timedelta(minutes=2)) return redirect( reverse("step_report", kwargs={"organization_code": self.organization.code}) diff --git a/rocky/reports/runner/worker.py b/rocky/reports/runner/worker.py index ecdff5d3261..ffcf880c369 100644 --- a/rocky/reports/runner/worker.py +++ b/rocky/reports/runner/worker.py @@ -8,7 +8,6 @@ import structlog from django.conf import settings from httpx import HTTPError -from pydantic import ValidationError from reports.runner.models import ReportRunner, WorkerManager from reports.runner.report_runner import LocalReportRunner @@ -76,57 +75,33 @@ def _fill_queue(self, task_queue: Queue): return try: - queues = self.scheduler.get_queues() + p_item = self.scheduler.pop_item("report") except HTTPError: - # Scheduler is having issues, so make note of it and try again - logger.exception("Getting the queues from the scheduler failed") - time.sleep(self.poll_interval) # But not immediately + logger.exception("Popping task from scheduler failed") + time.sleep(self.poll_interval) return - # We do not target a specific queue since we start one runtime for all organisations - # and queue ids contain the organisation_id - queues = [q for q in queues if q.id.startswith("report") and q.size > 0] - - logger.debug("Found queues: %s", [queue.id for queue in queues]) - - all_queues_empty = True - - for queue in queues: - logger.debug("Popping from queue %s", queue.id) - - try: - p_item = self.scheduler.pop_item(queue.id) - except (HTTPError, ValidationError): - logger.error("Popping task from scheduler failed") - time.sleep(self.poll_interval) - continue - - if not p_item: - logger.debug("Queue %s empty", queue.id) - continue + if not p_item: + logger.debug("Queue empty, sleeping %f seconds", self.poll_interval) + time.sleep(self.poll_interval) + return - all_queues_empty = False + logger.info("Handling task[%s]", p_item.id) - logger.info("Handling task[%s]", p_item.id) + try: + task_queue.put(p_item) + logger.info("Dispatched task[%s]", p_item.id) + except: # noqa + logger.error("Exiting worker...") + logger.info("Patching scheduler task[id=%s] to %s", p_item.id, TaskStatus.FAILED.value) try: - task_queue.put(p_item) - logger.info("Dispatched task[%s]", p_item.id) - except: # noqa - logger.error("Exiting worker...") - logger.info("Patching scheduler task[id=%s] to %s", p_item.id, TaskStatus.FAILED.value) - - try: - self.scheduler.patch_task(p_item.id, TaskStatus.FAILED) - logger.info("Set task status to %s in the scheduler for task[id=%s]", TaskStatus.FAILED, p_item.id) - except HTTPError: - logger.error("Could not patch scheduler task to %s", TaskStatus.FAILED.value) - - raise + self.scheduler.patch_task(p_item.id, TaskStatus.FAILED) + logger.info("Set task status to %s in the scheduler for task[id=%s]", TaskStatus.FAILED, p_item.id) + except HTTPError: + logger.error("Could not patch scheduler task to %s", TaskStatus.FAILED.value) - if all_queues_empty: - logger.debug("All queues empty, sleeping %f seconds", self.poll_interval) - time.sleep(self.poll_interval) + raise def _check_workers(self) -> None: new_workers = [] diff --git a/rocky/reports/templates/report_overview/report_history_table.html b/rocky/reports/templates/report_overview/report_history_table.html index c3d4e94f680..a8f6674ef54 100644 --- a/rocky/reports/templates/report_overview/report_history_table.html +++ b/rocky/reports/templates/report_overview/report_history_table.html @@ -87,24 +87,24 @@ {% for report in reports %} - {% if report.total_asset_reports >= 1 %} - - - - - - {{ report.report.name }} - - - + + + {% if report.total_objects == 1 %} + {{ report.report.input_oois.0.input_ooi|human_readable }} + {% else %} + {{ report.total_objects }} + {% endif %} + + {{ report.report.observed_at|date }} + {{ report.report.date_generated }} + + {% if report.total_asset_reports >= 1 %} - - - {% endif %} - - - - -
{% translate "Report types" %}
-

- {% blocktranslate count counter=report.total_asset_reports %} + {% endif %} + + + {% if report.total_asset_reports >= 1 %} +

+ - + + + {% for asset_report in report.asset_reports %} + + + + + + {% endfor %} + +
{% translate "Asset reports details:" %}
+ + +
{% translate "Report types" %}
+

+ {% blocktranslate count counter=report.total_asset_reports %} This report consists of {{counter}} asset report with the following report type and object: {% plural %} This report consists of {{counter}} asset reports with the following report types and objects: {% endblocktranslate %} -

- - - - - - - - {% for report_type, total_objects in report.report_type_summary.items %} +

+ - - + + - {% endfor %} - -
{% translate "Asset reports details:" %}
{% translate "Report type" %}{% translate "Objects" %}
-
    -
  • - {{ report_type|get_report_type_name }} -
  • -
-
{{ total_objects }}{% translate "Report type" %}{% translate "Objects" %}
- -
- {% translate "Asset reports" %} - ({{ report.asset_reports|length }}/{{ report.total_asset_reports }}) -
- - - - - - - - - {% for asset_report in report.asset_reports %} + + + {% for report_type, total_objects in report.report_type_summary.items %} + + + + + {% endfor %} + +
{% translate "Report type" %}{% translate "Object" %}{% translate "Report name" %}
+
    +
  • + {{ report_type|get_report_type_name }} +
  • +
+
{{ total_objects }}
+ +
+ {% translate "Asset reports" %} + ({{ report.asset_reports|length }}/{{ report.total_asset_reports }}) +
+ - - - + + + - {% endfor %} - -
-
    -
  • - {{ asset_report.report_type|get_report_type_name }} -
  • -
-
- {{ asset_report.input_ooi|human_readable }} - - {{ asset_report.name }} - {% translate "Report type" %}{% translate "Object" %}{% translate "Report name" %}
-
- {% if report.total_asset_reports > 5 %} - {% translate "View all asset reports" %} - {% endif %} -
-
+
    +
  • + {{ asset_report.report_type|get_report_type_name }} +
  • +
+
+ {{ asset_report.input_ooi|human_readable }} + + {{ asset_report.name }} +
+
+ {% if report.total_asset_reports > 5 %} + {% translate "View all asset reports" %} + {% endif %} +
+ + + {% endif %} {% endfor %} diff --git a/rocky/reports/viewsets.py b/rocky/reports/viewsets.py index 6d055cb9b43..e75fc302927 100644 --- a/rocky/reports/viewsets.py +++ b/rocky/reports/viewsets.py @@ -148,7 +148,8 @@ def perform_create(self, serializer: ReportRecipeSerializer) -> None: deadline_at = datetime.now(timezone.utc).date().isoformat() schedule_request = ScheduleRequest( - scheduler_id=f"report-{self.organization.code}", + scheduler_id="report", + organisation=self.organization.code, data=report_task, schedule=report_recipe.cron_expression, deadline_at=deadline_at, diff --git a/rocky/rocky/locale/de/LC_MESSAGES/django.po b/rocky/rocky/locale/de/LC_MESSAGES/django.po index 23d11cb032c..68ec54d186b 100644 --- a/rocky/rocky/locale/de/LC_MESSAGES/django.po +++ b/rocky/rocky/locale/de/LC_MESSAGES/django.po @@ -1283,8 +1283,12 @@ msgstr "" msgid "Age" msgstr "" -#: katalogus/templates/plugin_container_image.html tools/forms/boefje.py -msgid "Scan frequency" +#: katalogus/templates/plugin_container_image.html +msgid "Scan interval" +msgstr "" + +#: katalogus/templates/plugin_container_image.html +msgid "Run on" msgstr "" #: katalogus/templates/plugin_container_image.html @@ -4766,10 +4770,32 @@ msgstr "" msgid "Output mime types" msgstr "" +#: tools/forms/boefje.py +msgid "Scan type" +msgstr "" + +#: tools/forms/boefje.py +msgid "Interval amount" +msgstr "" + #: tools/forms/boefje.py msgid "" -"Specify the scanning frequency for this Boefje in minutes. The default is 24 " -"hours. For example: 5 minutes will let the boefje scan every 5 minutes." +"Specify the scanning interval for this Boefje. The default is 24 hours. For " +"example: 5 minutes will let the Boefje scan every 5 minutes." +msgstr "" + +#: tools/forms/boefje.py +msgid "Interval frequency" +msgstr "" + +#: tools/forms/boefje.py +msgid "Object creation/change" +msgstr "" + +#: tools/forms/boefje.py +msgid "" +"Choose weather a the Boefje should run after creating and/or changing an " +"object. " msgstr "" #: tools/forms/finding_type.py @@ -5111,6 +5137,12 @@ msgid "" "

" msgstr "" +#: tools/forms/settings.py +msgid "" +"Choose when this Boefje will scan objects. It can run on a given interval or " +"it can run every time an object has been created or changed. " +msgstr "" + #: tools/forms/settings.py msgid "Depth of the tree." msgstr "" diff --git a/rocky/rocky/locale/en@pirate/LC_MESSAGES/django.po b/rocky/rocky/locale/en@pirate/LC_MESSAGES/django.po index 2a509743b8e..549eb22ee84 100644 --- a/rocky/rocky/locale/en@pirate/LC_MESSAGES/django.po +++ b/rocky/rocky/locale/en@pirate/LC_MESSAGES/django.po @@ -1231,8 +1231,12 @@ msgstr "" msgid "Age" msgstr "" -#: katalogus/templates/plugin_container_image.html tools/forms/boefje.py -msgid "Scan frequency" +#: katalogus/templates/plugin_container_image.html +msgid "Scan interval" +msgstr "" + +#: katalogus/templates/plugin_container_image.html +msgid "Run on" msgstr "" #: katalogus/templates/plugin_container_image.html @@ -4584,10 +4588,32 @@ msgstr "" msgid "Output mime types" msgstr "" +#: tools/forms/boefje.py +msgid "Scan type" +msgstr "" + +#: tools/forms/boefje.py +msgid "Interval amount" +msgstr "" + #: tools/forms/boefje.py msgid "" -"Specify the scanning frequency for this Boefje in minutes. The default is 24 " -"hours. For example: 5 minutes will let the boefje scan every 5 minutes." +"Specify the scanning interval for this Boefje. The default is 24 hours. For " +"example: 5 minutes will let the Boefje scan every 5 minutes." +msgstr "" + +#: tools/forms/boefje.py +msgid "Interval frequency" +msgstr "" + +#: tools/forms/boefje.py +msgid "Object creation/change" +msgstr "" + +#: tools/forms/boefje.py +msgid "" +"Choose weather a the Boefje should run after creating and/or changing an " +"object. " msgstr "" #: tools/forms/finding_type.py @@ -4929,6 +4955,12 @@ msgid "" "

" msgstr "" +#: tools/forms/settings.py +msgid "" +"Choose when this Boefje will scan objects. It can run on a given interval or " +"it can run every time an object has been created or changed. " +msgstr "" + #: tools/forms/settings.py msgid "Depth of the tree." msgstr "" diff --git a/rocky/rocky/locale/fr/LC_MESSAGES/django.po b/rocky/rocky/locale/fr/LC_MESSAGES/django.po index 5e49db00fb3..39b39ae1cfd 100644 --- a/rocky/rocky/locale/fr/LC_MESSAGES/django.po +++ b/rocky/rocky/locale/fr/LC_MESSAGES/django.po @@ -1228,8 +1228,12 @@ msgstr "" msgid "Age" msgstr "" -#: katalogus/templates/plugin_container_image.html tools/forms/boefje.py -msgid "Scan frequency" +#: katalogus/templates/plugin_container_image.html +msgid "Scan interval" +msgstr "" + +#: katalogus/templates/plugin_container_image.html +msgid "Run on" msgstr "" #: katalogus/templates/plugin_container_image.html @@ -4581,10 +4585,32 @@ msgstr "" msgid "Output mime types" msgstr "" +#: tools/forms/boefje.py +msgid "Scan type" +msgstr "" + +#: tools/forms/boefje.py +msgid "Interval amount" +msgstr "" + #: tools/forms/boefje.py msgid "" -"Specify the scanning frequency for this Boefje in minutes. The default is 24 " -"hours. For example: 5 minutes will let the boefje scan every 5 minutes." +"Specify the scanning interval for this Boefje. The default is 24 hours. For " +"example: 5 minutes will let the Boefje scan every 5 minutes." +msgstr "" + +#: tools/forms/boefje.py +msgid "Interval frequency" +msgstr "" + +#: tools/forms/boefje.py +msgid "Object creation/change" +msgstr "" + +#: tools/forms/boefje.py +msgid "" +"Choose weather a the Boefje should run after creating and/or changing an " +"object. " msgstr "" #: tools/forms/finding_type.py @@ -4926,6 +4952,12 @@ msgid "" "

" msgstr "" +#: tools/forms/settings.py +msgid "" +"Choose when this Boefje will scan objects. It can run on a given interval or " +"it can run every time an object has been created or changed. " +msgstr "" + #: tools/forms/settings.py msgid "Depth of the tree." msgstr "" diff --git a/rocky/rocky/locale/fy/LC_MESSAGES/django.po b/rocky/rocky/locale/fy/LC_MESSAGES/django.po index 2ff1f4e67c8..bbe171a9a0d 100644 --- a/rocky/rocky/locale/fy/LC_MESSAGES/django.po +++ b/rocky/rocky/locale/fy/LC_MESSAGES/django.po @@ -1345,9 +1345,13 @@ msgstr "Status" msgid "Age" msgstr "Leeftiid" -#: katalogus/templates/plugin_container_image.html tools/forms/boefje.py -msgid "Scan frequency" -msgstr "Scanfrekwinsje" +#: katalogus/templates/plugin_container_image.html +msgid "Scan interval" +msgstr "" + +#: katalogus/templates/plugin_container_image.html +msgid "Run on" +msgstr "" #: katalogus/templates/plugin_container_image.html msgid "current" @@ -4921,10 +4925,32 @@ msgstr "" msgid "Output mime types" msgstr "" +#: tools/forms/boefje.py +msgid "Scan type" +msgstr "" + +#: tools/forms/boefje.py +msgid "Interval amount" +msgstr "" + #: tools/forms/boefje.py msgid "" -"Specify the scanning frequency for this Boefje in minutes. The default is 24 " -"hours. For example: 5 minutes will let the boefje scan every 5 minutes." +"Specify the scanning interval for this Boefje. The default is 24 hours. For " +"example: 5 minutes will let the Boefje scan every 5 minutes." +msgstr "" + +#: tools/forms/boefje.py +msgid "Interval frequency" +msgstr "" + +#: tools/forms/boefje.py +msgid "Object creation/change" +msgstr "" + +#: tools/forms/boefje.py +msgid "" +"Choose weather a the Boefje should run after creating and/or changing an " +"object. " msgstr "" #: tools/forms/finding_type.py @@ -5266,6 +5292,12 @@ msgid "" "

" msgstr "" +#: tools/forms/settings.py +msgid "" +"Choose when this Boefje will scan objects. It can run on a given interval or " +"it can run every time an object has been created or changed. " +msgstr "" + #: tools/forms/settings.py msgid "Depth of the tree." msgstr "" @@ -7741,6 +7773,9 @@ msgstr "Raw-bestân kin net nei Bytes oplaad wurde: %s" msgid "Raw file successfully added." msgstr "Raw-bestân mei sukses tafoege." +#~ msgid "Scan frequency" +#~ msgstr "Scanfrekwinsje" + #~ msgid "Concatenated Report" #~ msgstr "Gearfoege rapport" diff --git a/rocky/rocky/locale/it/LC_MESSAGES/django.po b/rocky/rocky/locale/it/LC_MESSAGES/django.po index 5a4422c91e8..02702d4a4ef 100644 --- a/rocky/rocky/locale/it/LC_MESSAGES/django.po +++ b/rocky/rocky/locale/it/LC_MESSAGES/django.po @@ -1331,8 +1331,12 @@ msgstr "Stato" msgid "Age" msgstr "" -#: katalogus/templates/plugin_container_image.html tools/forms/boefje.py -msgid "Scan frequency" +#: katalogus/templates/plugin_container_image.html +msgid "Scan interval" +msgstr "" + +#: katalogus/templates/plugin_container_image.html +msgid "Run on" msgstr "" #: katalogus/templates/plugin_container_image.html @@ -4918,10 +4922,32 @@ msgstr "" msgid "Output mime types" msgstr "" +#: tools/forms/boefje.py +msgid "Scan type" +msgstr "" + +#: tools/forms/boefje.py +msgid "Interval amount" +msgstr "" + #: tools/forms/boefje.py msgid "" -"Specify the scanning frequency for this Boefje in minutes. The default is 24 " -"hours. For example: 5 minutes will let the boefje scan every 5 minutes." +"Specify the scanning interval for this Boefje. The default is 24 hours. For " +"example: 5 minutes will let the Boefje scan every 5 minutes." +msgstr "" + +#: tools/forms/boefje.py +msgid "Interval frequency" +msgstr "" + +#: tools/forms/boefje.py +msgid "Object creation/change" +msgstr "" + +#: tools/forms/boefje.py +msgid "" +"Choose weather a the Boefje should run after creating and/or changing an " +"object. " msgstr "" #: tools/forms/finding_type.py @@ -5270,6 +5296,12 @@ msgid "" "

" msgstr "" +#: tools/forms/settings.py +msgid "" +"Choose when this Boefje will scan objects. It can run on a given interval or " +"it can run every time an object has been created or changed. " +msgstr "" + #: tools/forms/settings.py msgid "Depth of the tree." msgstr "Profondità dell'albero." diff --git a/rocky/rocky/locale/nl/LC_MESSAGES/django.po b/rocky/rocky/locale/nl/LC_MESSAGES/django.po index 9a853090549..bdd92a0cd37 100644 --- a/rocky/rocky/locale/nl/LC_MESSAGES/django.po +++ b/rocky/rocky/locale/nl/LC_MESSAGES/django.po @@ -1,8 +1,8 @@ # Brenno de Winter , 2023. # Darwinkel , 2023. -# jan klopper , 2023, 2024. +# jan klopper , 2023, 2024, 2025. # LibreTranslate , 2023, 2024. -# Weblate Translation Memory , 2023, 2024. +# Weblate Translation Memory , 2023, 2024, 2025. # 跨性别 , 2023. # Madelon Dohmen , 2023, 2024, 2025. # PAUL MICHIEL VAN DER BLONK , 2024. @@ -15,8 +15,9 @@ msgstr "" "Project-Id-Version: \n" "Report-Msgid-Bugs-To: \n" "POT-Creation-Date: 2025-02-06 13:51+0000\n" -"PO-Revision-Date: 2025-02-06 18:02+0000\n" -"Last-Translator: Madelon Dohmen \n" +"PO-Revision-Date: 2025-02-11 13:01+0000\n" +"Last-Translator: Weblate Translation Memory \n" "Language-Team: Dutch \n" "Language: nl\n" @@ -678,6 +679,7 @@ msgstr "" #: katalogus/client.py msgid "An HTTP error occurred. Check logs for more info." msgstr "" +"Er is een HTTP-fout opgetreden. Controleer de logboeken voor meer informatie." #: katalogus/client.py msgid "Boefje with this name already exists." @@ -1353,9 +1355,13 @@ msgstr "Status" msgid "Age" msgstr "Leeftijd" -#: katalogus/templates/plugin_container_image.html tools/forms/boefje.py -msgid "Scan frequency" -msgstr "Scanfrequentie" +#: katalogus/templates/plugin_container_image.html +msgid "Scan interval" +msgstr "" + +#: katalogus/templates/plugin_container_image.html +msgid "Run on" +msgstr "" #: katalogus/templates/plugin_container_image.html msgid "current" @@ -1562,7 +1568,7 @@ msgstr "Instellingen configureren" #: katalogus/templates/plugin_settings_list.html msgid "Overview of settings" -msgstr "Instellingenoverzicht" +msgstr "Overzicht van instellingen" #: katalogus/templates/plugin_settings_list.html msgid "Variable" @@ -2817,7 +2823,7 @@ msgstr "Herhaling" #: reports/forms.py msgid "No recurrence, just once" -msgstr "" +msgstr "Geen herhaling, maar één keer" #: reports/forms.py msgid "Daily" @@ -3161,7 +3167,7 @@ msgstr "Kwetsbaarheden" #: reports/report_types/aggregate_organisation_report/report.html #: reports/report_types/aggregate_organisation_report/report_design.html msgid "Vulnerabilities found are grouped per system." -msgstr "Gevonden kwetsbaarheden zijn per systeem gegroepeerd." +msgstr "Gevonden kwetsbaarheden zijn gegroepeerd per systeem." #: reports/report_types/aggregate_organisation_report/report.html #: reports/report_types/multi_organization_report/vulnerabilities.html @@ -3191,7 +3197,7 @@ msgstr "kwetsbaarheden op dit systeem" #: reports/report_types/aggregate_organisation_report/summary.html msgid "Critical Vulnerabilities" -msgstr "Kritische kwetsbaarheden" +msgstr "Kritieke kwetsbaarheden" #: reports/report_types/aggregate_organisation_report/summary.html msgid "IPs scanned" @@ -3486,7 +3492,7 @@ msgstr "Nee" #: reports/report_types/dns_report/report.html #: reports/report_types/findings_report/report.html msgid "Other findings found" -msgstr "Andere bevindingen gevonden" +msgstr "Andere gevonden bevindingen" #: reports/report_types/dns_report/report.html #: rocky/templates/findings/finding_list.html @@ -3718,7 +3724,7 @@ msgstr "Rapport downloaden" #: reports/report_types/multi_organization_report/introduction.html msgid "This is the OpenKAT" -msgstr "Dit is de OpenKAT" +msgstr "Dit is het OpenKAT" #: reports/report_types/multi_organization_report/introduction.html msgid "Created with date from: " @@ -3811,7 +3817,7 @@ msgstr "Open poorten" #: reports/report_types/multi_organization_report/open_ports.html msgid "Occurrences (IP addresses)" -msgstr "Aantal (IP-adressen)" +msgstr "Voorkomingen (IP-adressen)" #: reports/report_types/multi_organization_report/open_ports.html #: reports/templates/summary/service_health.html @@ -3952,7 +3958,7 @@ msgstr "Open-poortenrapport" #: reports/report_types/open_ports_report/report.py msgid "Find open ports of IP addresses" -msgstr "Zoek open poorten van IP-adressen" +msgstr "Zoek open poorten op IP-adressen" #: reports/report_types/rpki_report/report.html msgid "" @@ -3985,7 +3991,7 @@ msgstr "RPKI-record bestaat niet." #: reports/report_types/rpki_report/report.html #: reports/report_types/safe_connections_report/report.html msgid "No IPs have been found on this system." -msgstr "Er zijn geen IP’s op dit systeem gevonden." +msgstr "Er zijn geen IP’s voor dit systeem gevonden." #: reports/report_types/rpki_report/report.py msgid "RPKI Report" @@ -4193,7 +4199,7 @@ msgstr "" #: reports/report_types/web_system_report/report.html msgid "Web system compliance" -msgstr "Compliance websysteem" +msgstr "Compliance webservers" #: reports/report_types/web_system_report/report.html msgid "CSP Present" @@ -4233,7 +4239,7 @@ msgstr "Certificaat verloopt niet binnenkort" #: reports/report_types/web_system_report/report.html msgid "No webservers have been found on this system." -msgstr "Er zijn geen webservers op dit systeem gevonden." +msgstr "Er zijn geen webservers op gevonden dit systeem." #: reports/report_types/web_system_report/report.py msgid "Web System Report" @@ -4246,7 +4252,7 @@ msgstr "" #: reports/templates/partials/export_report_settings.html msgid "Report schedule" -msgstr "Rapportschema" +msgstr "Rapport-agenda" #: reports/templates/partials/export_report_settings.html msgid "" @@ -4310,7 +4316,7 @@ msgstr "Overzicht" #: reports/templates/partials/plugin_overview_table.html msgid "Plugin overview table" -msgstr "Plug-inoverzichtstabel" +msgstr "Plugin-overzichtstabel" #: reports/templates/partials/plugin_overview_table.html #: reports/templates/partials/report_setup_scan.html @@ -4443,7 +4449,7 @@ msgstr "" #: reports/templates/partials/report_ooi_list.html msgid "Continue with live set" -msgstr "Doorgaan met liveset" +msgstr "Doorgaan met live set" #: reports/templates/partials/report_ooi_list.html #: reports/templates/summary/report_asset_overview.html @@ -4537,8 +4543,7 @@ msgid "" "that don't have their requirements met will be skipped." msgstr "" "Bepaalde plug-ins zijn verplicht, omdat ze cruciaal zijn voor een " -"rapporttype. Rapporten die de vereisten niet hebben, zullen worden " -"overgeslagen." +"rapporttype. Rapporten die niet voldoen, zullen worden overgeslagen." #: reports/templates/partials/report_setup_scan.html msgid "Warning! Before you proceed read the following points:" @@ -5167,13 +5172,33 @@ msgstr "Objecttype invoeren" msgid "Output mime types" msgstr "Uitvoer mimetypes" +#: tools/forms/boefje.py +msgid "Scan type" +msgstr "" + +#: tools/forms/boefje.py +msgid "Interval amount" +msgstr "" + #: tools/forms/boefje.py msgid "" -"Specify the scanning frequency for this Boefje in minutes. The default is 24 " -"hours. For example: 5 minutes will let the boefje scan every 5 minutes." +"Specify the scanning interval for this Boefje. The default is 24 hours. For " +"example: 5 minutes will let the Boefje scan every 5 minutes." +msgstr "" + +#: tools/forms/boefje.py +msgid "Interval frequency" +msgstr "" + +#: tools/forms/boefje.py +msgid "Object creation/change" +msgstr "" + +#: tools/forms/boefje.py +msgid "" +"Choose weather a the Boefje should run after creating and/or changing an " +"object. " msgstr "" -"Bepaal de scannerfrequentie voor dit Boefje in minuten. De standaard is 24 " -"uur. Bijvoorbeeld: 5 minuten laat het boefje om de 5 minuten scannen." #: tools/forms/finding_type.py msgid "KAT-ID" @@ -5544,6 +5569,12 @@ msgstr "" "manual/usermanual.html#scan-levels-clearance-indemnities'> documentatie." "

" +#: tools/forms/settings.py +msgid "" +"Choose when this Boefje will scan objects. It can run on a given interval or " +"it can run every time an object has been created or changed. " +msgstr "" + #: tools/forms/settings.py msgid "Depth of the tree." msgstr "Diepte van de boom." @@ -8231,6 +8262,16 @@ msgstr "Raw-bestand kan niet naar Bytes worden geüpload: %s" msgid "Raw file successfully added." msgstr "Raw-bestand met succes toegevoegd." +#~ msgid "Scan frequency" +#~ msgstr "Scanfrequentie" + +#~ msgid "" +#~ "Specify the scanning frequency for this Boefje in minutes. The default is " +#~ "24 hours. For example: 5 minutes will let the boefje scan every 5 minutes." +#~ msgstr "" +#~ "Bepaal de scannerfrequentie voor dit Boefje in minuten. De standaard is " +#~ "24 uur. Bijvoorbeeld: 5 minuten laat het boefje om de 5 minuten scannen." + #, python-format #~ msgid "" #~ "\n" diff --git a/rocky/rocky/locale/pap/LC_MESSAGES/django.po b/rocky/rocky/locale/pap/LC_MESSAGES/django.po index 1c8a306ea9f..3a0d6d66cc0 100644 --- a/rocky/rocky/locale/pap/LC_MESSAGES/django.po +++ b/rocky/rocky/locale/pap/LC_MESSAGES/django.po @@ -1304,8 +1304,12 @@ msgstr "Estádo" msgid "Age" msgstr "" -#: katalogus/templates/plugin_container_image.html tools/forms/boefje.py -msgid "Scan frequency" +#: katalogus/templates/plugin_container_image.html +msgid "Scan interval" +msgstr "" + +#: katalogus/templates/plugin_container_image.html +msgid "Run on" msgstr "" #: katalogus/templates/plugin_container_image.html @@ -4860,10 +4864,32 @@ msgstr "" msgid "Output mime types" msgstr "" +#: tools/forms/boefje.py +msgid "Scan type" +msgstr "" + +#: tools/forms/boefje.py +msgid "Interval amount" +msgstr "" + #: tools/forms/boefje.py msgid "" -"Specify the scanning frequency for this Boefje in minutes. The default is 24 " -"hours. For example: 5 minutes will let the boefje scan every 5 minutes." +"Specify the scanning interval for this Boefje. The default is 24 hours. For " +"example: 5 minutes will let the Boefje scan every 5 minutes." +msgstr "" + +#: tools/forms/boefje.py +msgid "Interval frequency" +msgstr "" + +#: tools/forms/boefje.py +msgid "Object creation/change" +msgstr "" + +#: tools/forms/boefje.py +msgid "" +"Choose weather a the Boefje should run after creating and/or changing an " +"object. " msgstr "" #: tools/forms/finding_type.py @@ -5210,6 +5236,12 @@ msgid "" "

" msgstr "" +#: tools/forms/settings.py +msgid "" +"Choose when this Boefje will scan objects. It can run on a given interval or " +"it can run every time an object has been created or changed. " +msgstr "" + #: tools/forms/settings.py msgid "Depth of the tree." msgstr "Profundidát di mapa" diff --git a/rocky/rocky/locale/ta/LC_MESSAGES/django.po b/rocky/rocky/locale/ta/LC_MESSAGES/django.po index c8379094e31..1cde1276c18 100644 --- a/rocky/rocky/locale/ta/LC_MESSAGES/django.po +++ b/rocky/rocky/locale/ta/LC_MESSAGES/django.po @@ -1326,9 +1326,13 @@ msgstr "நிலை" msgid "Age" msgstr "அகவை" -#: katalogus/templates/plugin_container_image.html tools/forms/boefje.py -msgid "Scan frequency" -msgstr "அதிர்வெண் ச்கேன்" +#: katalogus/templates/plugin_container_image.html +msgid "Scan interval" +msgstr "" + +#: katalogus/templates/plugin_container_image.html +msgid "Run on" +msgstr "" #: katalogus/templates/plugin_container_image.html msgid "current" @@ -5144,14 +5148,33 @@ msgstr "உள்ளீட்டு பொருள் வகை" msgid "Output mime types" msgstr "வெளியீட்டு மைம் வகைகள்" +#: tools/forms/boefje.py +msgid "Scan type" +msgstr "" + +#: tools/forms/boefje.py +msgid "Interval amount" +msgstr "" + +#: tools/forms/boefje.py +msgid "" +"Specify the scanning interval for this Boefje. The default is 24 hours. For " +"example: 5 minutes will let the Boefje scan every 5 minutes." +msgstr "" + +#: tools/forms/boefje.py +msgid "Interval frequency" +msgstr "" + +#: tools/forms/boefje.py +msgid "Object creation/change" +msgstr "" + #: tools/forms/boefje.py msgid "" -"Specify the scanning frequency for this Boefje in minutes. The default is 24 " -"hours. For example: 5 minutes will let the boefje scan every 5 minutes." +"Choose weather a the Boefje should run after creating and/or changing an " +"object. " msgstr "" -"இந்த போஃப்சேவுக்கான ச்கேனிங் அதிர்வெண்ணை நிமிடங்களில் குறிப்பிடவும். இயல்புநிலை 24 " -"மணிநேரம். உதாரணமாக: ஒவ்வொரு 5 நிமிடங்களுக்கும் 5 நிமிடங்கள் போஃப்சே ச்கேன் செய்ய " -"அனுமதிக்கும்." #: tools/forms/finding_type.py msgid "KAT-ID" @@ -5520,6 +5543,12 @@ msgstr "" "பற்றிய கூடுதல் தகவலுக்கு, ஆவணங்களில் காணலாம்.

" +#: tools/forms/settings.py +msgid "" +"Choose when this Boefje will scan objects. It can run on a given interval or " +"it can run every time an object has been created or changed. " +msgstr "" + #: tools/forms/settings.py msgid "Depth of the tree." msgstr "மரத்தின் ஆழம்." @@ -8166,6 +8195,17 @@ msgstr "மூல கோப்பை பைட்டுகளில் பதி msgid "Raw file successfully added." msgstr "மூல கோப்பு வெற்றிகரமாக சேர்க்கப்பட்டது." +#~ msgid "Scan frequency" +#~ msgstr "அதிர்வெண் ச்கேன்" + +#~ msgid "" +#~ "Specify the scanning frequency for this Boefje in minutes. The default is " +#~ "24 hours. For example: 5 minutes will let the boefje scan every 5 minutes." +#~ msgstr "" +#~ "இந்த போஃப்சேவுக்கான ச்கேனிங் அதிர்வெண்ணை நிமிடங்களில் குறிப்பிடவும். இயல்புநிலை 24 " +#~ "மணிநேரம். உதாரணமாக: ஒவ்வொரு 5 நிமிடங்களுக்கும் 5 நிமிடங்கள் போஃப்சே ச்கேன் செய்ய " +#~ "அனுமதிக்கும்." + #, python-format #~ msgid "" #~ "\n" diff --git a/rocky/rocky/scheduler.py b/rocky/rocky/scheduler.py index b5b387b9ecc..3353551d2ea 100644 --- a/rocky/rocky/scheduler.py +++ b/rocky/rocky/scheduler.py @@ -3,7 +3,6 @@ import collections import datetime import logging -import time import uuid from enum import Enum from functools import cached_property @@ -125,6 +124,7 @@ class Task(BaseModel): id: uuid.UUID = Field(default_factory=uuid.uuid4) scheduler_id: str schedule_id: str | None = None + organisation: str priority: int status: TaskStatus | None = TaskStatus.PENDING type: str | None = None @@ -150,6 +150,7 @@ class ScheduleRequest(BaseModel): model_config = ConfigDict(from_attributes=True) scheduler_id: str + organisation: str data: dict schedule: str | None = None deadline_at: str @@ -159,11 +160,12 @@ class ScheduleResponse(BaseModel): model_config = ConfigDict(from_attributes=True) id: uuid.UUID + scheduler_id: str + organisation: str hash: str data: dict enabled: bool schedule: str | None - tasks: list[Task] deadline_at: datetime.datetime | None created_at: datetime.datetime modified_at: datetime.datetime @@ -188,13 +190,6 @@ class PaginatedSchedulesResponse(BaseModel): results: list[ScheduleResponse] -class SchedulerResponse(BaseModel): - id: str - enabled: bool - priority_queue: dict[str, Any] - last_activity: str | None - - class LazyTaskList: HARD_LIMIT = 500 @@ -303,23 +298,6 @@ def post_schedule_search(self, filters: dict[str, list[dict[str, str]]]) -> Pagi except ConnectError: raise SchedulerConnectError() - def is_scheduler_ready(self, scheduler_id: str) -> bool: - """Max trials is 100 seconds""" - trials = 0 - interval = 10 # in seconds - while trials < 10: - try: - res = self._client.get(f"/schedulers/{scheduler_id}") - res.raise_for_status() - break - except HTTPStatusError as http_error: - if http_error.response.status_code == codes.NOT_FOUND: - trials += 1 - time.sleep(interval) - continue - raise SchedulerHTTPError() - return SchedulerResponse.model_validate_json(res.content).enabled - def patch_schedule(self, schedule_id: str, params: dict[str, Any]) -> None: try: response = self._client.patch(f"/schedules/{schedule_id}", json=params) @@ -329,10 +307,13 @@ def patch_schedule(self, schedule_id: str, params: dict[str, Any]) -> None: raise SchedulerHTTPError() def post_schedule(self, schedule: ScheduleRequest) -> ScheduleResponse: + logger.info("Creating schedule", schedule=schedule) try: res = self._client.post("/schedules", json=schedule.model_dump(exclude_none=True)) + logger.info(res.content) res.raise_for_status() logger.info("Schedule created", event_code=800081, schedule=schedule) + return ScheduleResponse.model_validate_json(res.content) except (ValidationError, HTTPStatusError, ConnectError): raise SchedulerValidationError(extra_message="Report schedule failed: ") @@ -367,7 +348,7 @@ def get_task_details(self, task_id: str) -> Task: def push_task(self, item: Task) -> None: try: res = self._client.post( - f"/queues/{item.scheduler_id}/push", + f"/schedulers/{item.scheduler_id}/push", content=item.model_dump_json(exclude_none=True), headers={"Content-Type": "application/json"}, ) @@ -389,11 +370,20 @@ def get_queues(self) -> list[Queue]: return TypeAdapter(list[Queue]).validate_json(response.content) - def pop_item(self, queue: str) -> Task | None: - response = self._client.post(f"/queues/{queue}/pop") + def pop_item(self, scheduler_id: str) -> Task | None: + response = self._client.post(f"/schedulers/{scheduler_id}/pop?limit=1") response.raise_for_status() - return TypeAdapter(Task | None).validate_json(response.content) + page = TypeAdapter(PaginatedTasksResponse | None).validate_json(response.content) + if page.count == 0 or len(page.results) == 0: + return None + + return page.results[0] + + def pop_items(self, scheduler_id: str, filters: dict[str, Any]) -> PaginatedTasksResponse | None: + response = self._client.post(f"/schedulers/{scheduler_id}/pop", json=filters) + + return TypeAdapter(PaginatedTasksResponse | None).validate_json(response.content) def patch_task(self, task_id: uuid.UUID, status: TaskStatus) -> None: response = self._client.patch(f"/tasks/{task_id}", json={"status": status.value}) @@ -402,13 +392,16 @@ def patch_task(self, task_id: uuid.UUID, status: TaskStatus) -> None: def health(self) -> ServiceHealth: return ServiceHealth.model_validate_json(self._get("/health", return_type="content")) - def _get_task_stats(self, scheduler_id: str) -> dict: + def _get_task_stats(self, scheduler_id: str, organisation_id: str | None = None) -> dict: """Return task stats for specific scheduler.""" - return self._get(f"/tasks/stats/{scheduler_id}") # type: ignore + if organisation_id is None: + return self._get(f"/tasks/stats?=scheduler_id={scheduler_id}") # type: ignore + + return self._get(f"/tasks/stats?=scheduler_id={scheduler_id}&organisation_id={organisation_id}") # type: ignore def get_task_stats(self, task_type: str) -> dict: """Return task stats for specific task type.""" - return self._get_task_stats(scheduler_id=f"{task_type}-{self.organization_code}") + return self._get_task_stats(scheduler_id=task_type, organisation_id=self.organization_code) @staticmethod def _merge_stat_dicts(dicts: list[dict]) -> dict: @@ -419,10 +412,10 @@ def _merge_stat_dicts(dicts: list[dict]) -> dict: stat_sum[timeslot].update(counts) return dict(stat_sum) - def get_combined_schedulers_stats(self, scheduler_ids: list) -> dict: + def get_combined_schedulers_stats(self, scheduler_id: str, organization_codes: list[str]) -> dict: """Return merged stats for a set of scheduler ids.""" return SchedulerClient._merge_stat_dicts( - dicts=[self._get_task_stats(scheduler_id=scheduler_id) for scheduler_id in scheduler_ids] + dicts=[self._get_task_stats(scheduler_id, org_code) for org_code in organization_codes] ) def _get(self, path: str, return_type: str = "json") -> dict | bytes: diff --git a/rocky/rocky/templates/partials/secondary-menu.html b/rocky/rocky/templates/partials/secondary-menu.html index fb356b323f4..adf0a05929f 100644 --- a/rocky/rocky/templates/partials/secondary-menu.html +++ b/rocky/rocky/templates/partials/secondary-menu.html @@ -27,7 +27,10 @@ {% endif %}
  • - {% translate "Logout" %} +
    + {% csrf_token %} + +
  • diff --git a/rocky/rocky/templates/tasks/partials/stats.html b/rocky/rocky/templates/tasks/partials/stats.html index d7ea383d151..8a0fdff0c0c 100644 --- a/rocky/rocky/templates/tasks/partials/stats.html +++ b/rocky/rocky/templates/tasks/partials/stats.html @@ -4,8 +4,8 @@

    {% translate "Task statistics - Last 24 hours" %}

    {% if not stats_error %} -
    - +
    +
    @@ -23,25 +23,25 @@

    {% translate "Task statistics - Last 24 hours" %}

    {% for timestamp, values in stats.items %} - - - - - - - diff --git a/rocky/rocky/views/scheduler.py b/rocky/rocky/views/scheduler.py index 35fccd4b0ad..9633d2fe992 100644 --- a/rocky/rocky/views/scheduler.py +++ b/rocky/rocky/views/scheduler.py @@ -55,7 +55,7 @@ class SchedulerView(OctopoesView): def setup(self, request, *args, **kwargs): super().setup(request, *args, **kwargs) self.scheduler_client = scheduler_client(self.organization.code) - self.scheduler_id = f"{self.task_type}-{self.organization.code}" + self.scheduler_id = self.task_type def get_task_filters(self) -> dict[str, Any]: return { @@ -63,7 +63,14 @@ def get_task_filters(self) -> dict[str, Any]: "task_type": self.task_type, "plugin_id": None, # plugin_id present and set at plugin detail **self.get_task_filter_form_data(), - } + } | self.get_organization_specific_tasks() + + def get_organization_specific_tasks(self) -> dict[str, dict[str, list[dict[str, str]]]]: + if self.organization.code: + return { + "filters": {"filters": [{"column": "organisation", "operator": "==", "value": self.organization.code}]} + } + return {} def get_task_filter_form_data(self) -> dict[str, Any]: form_data = self.get_task_filter_form().data.dict() @@ -119,6 +126,7 @@ def create_report_schedule(self, report_recipe: ReportRecipe, deadline_at: datet schedule_request = ScheduleRequest( scheduler_id=self.scheduler_id, + organisation=self.organization.code, data=report_task, schedule=report_recipe.cron_expression, deadline_at=deadline_at.isoformat(), @@ -218,7 +226,13 @@ def reschedule_task(self, task_id: str) -> None: new_id = uuid.uuid4() task.data.id = new_id - new_task = Task(id=new_id, scheduler_id=task.scheduler_id, priority=1, data=task.data) + new_task = Task( + id=new_id, + scheduler_id=task.scheduler_id, + organisation=self.organization.code, + priority=1, + data=task.data, + ) self.schedule_task(new_task) else: @@ -232,7 +246,9 @@ def run_normalizer(self, katalogus_normalizer: Normalizer, raw_data: RawData) -> normalizer=SchedulerNormalizer.model_validate(katalogus_normalizer.model_dump()), raw_data=raw_data ) - new_task = Task(priority=1, data=normalizer_task, scheduler_id=f"normalizer-{self.organization.code}") + new_task = Task( + priority=1, data=normalizer_task, scheduler_id="normalizer", organisation=self.organization.code + ) self.schedule_task(new_task) except SchedulerError as error: @@ -246,7 +262,7 @@ def run_boefje(self, katalogus_boefje: Boefje, ooi: OOI | None) -> None: organization=self.organization.code, ) - new_task = Task(priority=1, data=boefje_task, scheduler_id=f"boefje-{self.organization.code}") + new_task = Task(priority=1, data=boefje_task, scheduler_id="boefje", organisation=self.organization.code) self.schedule_task(new_task) diff --git a/rocky/rocky/views/tasks.py b/rocky/rocky/views/tasks.py index be1df45ec56..2aced6c7556 100644 --- a/rocky/rocky/views/tasks.py +++ b/rocky/rocky/views/tasks.py @@ -122,19 +122,27 @@ class AllTaskListView(SchedulerListView, PageActionsView): client = scheduler_client(None) task_filter_form = TaskFilterForm + def get_user_organizations(self) -> list[str]: + return [org.code for org in self.request.user.organizations] + + def get_all_organizations_tasks(self) -> dict[str, dict[str, list[dict[str, str | list[str]]]]]: + if not self.request.user.is_anonymous: + return { + "filters": { + "filters": [{"column": "organisation", "operator": "in", "value": self.get_user_organizations()}] + } + } + return {} + + def get_task_type(self) -> str: + return self.request.GET.get("type", self.task_type) + def get_queryset(self): - task_type = self.request.GET.get("type", self.task_type) - self.schedulers = [f"{task_type}-{o.code}" for o in self.request.user.organizations] form_data = self.task_filter_form(self.request.GET).data.dict() - kwargs = {k: v for k, v in form_data.items() if v} + kwargs = {k: v for k, v in form_data.items() if v} | self.get_all_organizations_tasks() try: - return LazyTaskList( - self.client, - task_type=task_type, - filters={"filters": [{"column": "scheduler_id", "operator": "in", "value": self.schedulers}]}, - **kwargs, - ) + return LazyTaskList(self.client, task_type=self.get_task_type(), **kwargs) except HTTPError as error: error_message = _(f"Fetching tasks failed: no connection with scheduler: {error}") @@ -147,7 +155,9 @@ def get_queryset(self): def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) context["task_filter_form"] = self.task_filter_form(self.request.GET) - context["stats"] = self.client.get_combined_schedulers_stats(scheduler_ids=self.schedulers) + context["stats"] = self.client.get_combined_schedulers_stats( + self.get_task_type(), self.get_user_organizations() + ) context["breadcrumbs"] = [{"url": reverse("all_task_list", kwargs={}), "text": _("All Tasks")}] return context diff --git a/rocky/tests/conftest.py b/rocky/tests/conftest.py index 128df861d9b..c57e59007a6 100644 --- a/rocky/tests/conftest.py +++ b/rocky/tests/conftest.py @@ -327,6 +327,7 @@ def task() -> Task: "hash": "19ed51514b37d42f79c5e95469956b05", "scheduler_id": "boefje-test", "schedule_id": None, + "organisation": "test", "type": "boefje", "priority": 1, "data": { @@ -1843,6 +1844,7 @@ def reports_task_list(): id=UUID("7f9d5b00-dbab-45f3-93a6-dd44cc20c359"), scheduler_id="report-_rieven", schedule_id="86032b20-f7ae-4a48-9093-87ec5a56e939", + organisation="test", priority=1738747928, status=TaskStatus.FAILED, type="report", @@ -1857,6 +1859,7 @@ def reports_task_list(): id=UUID("9e23611d-36c2-4972-82f0-077bcb1a8941"), scheduler_id="report-_rieven", schedule_id="bd821e6e-6680-4215-8557-e049deeb0175", + organisation="test 2", priority=1738684879, status=TaskStatus.COMPLETED, type="report", diff --git a/rocky/tests/onboarding/test_onboarding_organization_steps.py b/rocky/tests/onboarding/test_onboarding_organization_steps.py index d46b8e18475..54f6c382ea5 100644 --- a/rocky/tests/onboarding/test_onboarding_organization_steps.py +++ b/rocky/tests/onboarding/test_onboarding_organization_steps.py @@ -305,12 +305,11 @@ def test_onboarding_ooi_detail_scan( @pytest.mark.parametrize("member", ["superuser_member", "admin_member", "redteam_member", "client_member"]) def test_onboarding_ooi_detail_scan_create_report_schedule( - request, mocker, member, mock_bytes_client, rf, mock_organization_view_octopoes, url + request, mocker, member, mock_scheduler, mock_bytes_client, rf, mock_organization_view_octopoes, url ): member = request.getfixturevalue(member) mocker.patch("account.mixins.OrganizationView.get_katalogus") - mocker.patch("onboarding.views.scheduler_client") mock_organization_view_octopoes().get.return_value = url mock_bytes_client().upload_raw.return_value = "raw_id"
    {% translate "All times in UTC, blocks of 1 hour." %}
    {{ timestamp }}: + {{ values.pending }} + {{ values.queued }} + {{ values.dispatched }} + {{ values.running }} + {{ values.completed }} + {{ values.failed }} + {{ values.cancelled }}