diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 0dc569df8be..913e464abb7 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,3 +1,2 @@
-# These owners will be the default owners for everything in
-# the repo. Unless a later match takes precedence,
* @minvws/kat-managers
+.github/CODEOWNERS @minvws/irealisatie-operations
diff --git a/.github/workflows/build-rdo-package.yml b/.github/workflows/build-rdo-package.yml
index 032e7709b5e..911642089f0 100644
--- a/.github/workflows/build-rdo-package.yml
+++ b/.github/workflows/build-rdo-package.yml
@@ -203,7 +203,7 @@ jobs:
- name: Octopoes Upload whl package
uses: actions/upload-artifact@v4
with:
- name: "octopoes-${{env.RELEASE_VERSION}}"
+ name: "octopoes-${{env.RELEASE_VERSION}}_python${{ matrix.python_version }}"
path: "${{ github.workspace }}/octopoes/dist/octopoes*.whl"
- name: Octopoes Upload venv tar
diff --git a/boefjes/boefjes/app.py b/boefjes/boefjes/app.py
index af30d6319da..6a12ca463cf 100644
--- a/boefjes/boefjes/app.py
+++ b/boefjes/boefjes/app.py
@@ -85,60 +85,33 @@ def _fill_queue(self, task_queue: Queue, queue_type: WorkerManager.Queue) -> Non
time.sleep(self.settings.worker_heartbeat)
return
+ logger.debug("Popping from queue %s", queue_type.value)
+
try:
- queues = self.scheduler_client.get_queues()
- except HTTPError:
- # Scheduler is having issues, so make note of it and try again
- logger.exception("Getting the queues from the scheduler failed")
- time.sleep(self.settings.poll_interval) # But not immediately
+ p_item = self.scheduler_client.pop_item(queue_type.value)
+ except (HTTPError, ValidationError):
+ logger.exception("Popping task from scheduler failed, sleeping 10 seconds")
+ time.sleep(self.settings.worker_heartbeat)
return
- # We do not target a specific queue since we start one runtime for all organisations
- # and queue ids contain the organisation_id
- queues = [q for q in queues if q.id.startswith(queue_type.value) and q.size > 0]
-
- logger.debug("Found queues: %s", [queue.id for queue in queues])
-
- all_queues_empty = True
-
- for queue in queues:
- logger.debug("Popping from queue %s", queue.id)
-
- try:
- p_item = self.scheduler_client.pop_item(queue.id)
- except (HTTPError, ValidationError):
- logger.exception("Popping task from scheduler failed, sleeping 10 seconds")
- time.sleep(10)
- continue
-
- if not p_item:
- logger.debug("Queue %s empty", queue.id)
- continue
+ if p_item is None:
+ time.sleep(self.settings.worker_heartbeat)
+ return
- all_queues_empty = False
+ logger.info("Handling task[%s]", p_item.data.id)
- logger.info("Handling task[%s]", p_item.data.id)
+ try:
+ task_queue.put(p_item)
+ logger.info("Dispatched task[%s]", p_item.data.id)
+ except: # noqa
+ logger.exception("Exiting worker...")
+ logger.info("Patching scheduler task[id=%s] to %s", p_item.data.id, TaskStatus.FAILED.value)
try:
- task_queue.put(p_item)
- logger.info("Dispatched task[%s]", p_item.data.id)
- except: # noqa
- logger.exception("Exiting worker...")
- logger.info("Patching scheduler task[id=%s] to %s", p_item.data.id, TaskStatus.FAILED.value)
-
- try:
- self.scheduler_client.patch_task(p_item.id, TaskStatus.FAILED)
- logger.info(
- "Set task status to %s in the scheduler for task[id=%s]", TaskStatus.FAILED, p_item.data.id
- )
- except HTTPError:
- logger.exception("Could not patch scheduler task to %s", TaskStatus.FAILED.value)
-
- raise
-
- if all_queues_empty:
- logger.debug("All queues empty, sleeping %f seconds", self.settings.poll_interval)
- time.sleep(self.settings.poll_interval)
+ self.scheduler_client.patch_task(p_item.id, TaskStatus.FAILED)
+ logger.info("Set task status to %s in the scheduler for task[id=%s]", TaskStatus.FAILED, p_item.data.id)
+ except HTTPError:
+ logger.exception("Could not patch scheduler task to %s", TaskStatus.FAILED.value)
def _check_workers(self) -> None:
new_workers = []
@@ -279,9 +252,4 @@ def get_runtime_manager(settings: Settings, queue: WorkerManager.Queue, log_leve
LocalNormalizerJobRunner(local_repository), bytes_api_client, settings.scan_profile_whitelist
)
- return SchedulerWorkerManager(
- item_handler,
- SchedulerAPIClient(str(settings.scheduler_api)), # Do not share a session between workers
- settings,
- log_level,
- )
+ return SchedulerWorkerManager(item_handler, SchedulerAPIClient(str(settings.scheduler_api)), settings, log_level)
diff --git a/boefjes/boefjes/clients/scheduler_client.py b/boefjes/boefjes/clients/scheduler_client.py
index 095804b6001..edb5c02ac5c 100644
--- a/boefjes/boefjes/clients/scheduler_client.py
+++ b/boefjes/boefjes/clients/scheduler_client.py
@@ -1,6 +1,7 @@
import datetime
import uuid
from enum import Enum
+from typing import Any
from httpx import Client, HTTPTransport, Response
from pydantic import BaseModel, TypeAdapter
@@ -29,7 +30,8 @@ class TaskStatus(Enum):
class Task(BaseModel):
id: uuid.UUID
scheduler_id: str
- schedule_id: str | None
+ schedule_id: uuid.UUID | None = None
+ organisation: str
priority: int
status: TaskStatus
type: str
@@ -39,11 +41,21 @@ class Task(BaseModel):
modified_at: datetime.datetime
+class PaginatedTasksResponse(BaseModel):
+ count: int
+ next: str | None = None
+ previous: str | None = None
+ results: list[Task]
+
+
class SchedulerClientInterface:
def get_queues(self) -> list[Queue]:
raise NotImplementedError()
- def pop_item(self, queue_id: str) -> Task | None:
+ def pop_item(self, scheduler_id: str) -> Task | None:
+ raise NotImplementedError()
+
+ def pop_items(self, scheduler_id: str, filters: dict[str, Any]) -> PaginatedTasksResponse | None:
raise NotImplementedError()
def patch_task(self, task_id: uuid.UUID, status: TaskStatus) -> None:
@@ -66,20 +78,24 @@ def __init__(self, base_url: str):
def _verify_response(response: Response) -> None:
response.raise_for_status()
- def get_queues(self) -> list[Queue]:
- response = self._session.get("/queues")
+ def pop_item(self, scheduler_id: str) -> Task | None:
+ response = self._session.post(f"/schedulers/{scheduler_id}/pop?limit=1")
self._verify_response(response)
- return TypeAdapter(list[Queue]).validate_json(response.content)
+ page = TypeAdapter(PaginatedTasksResponse | None).validate_json(response.content)
+ if page.count == 0:
+ return None
+
+ return page.results[0]
- def pop_item(self, queue_id: str) -> Task | None:
- response = self._session.post(f"/queues/{queue_id}/pop")
+ def pop_items(self, scheduler_id: str, filters: dict[str, Any]) -> PaginatedTasksResponse | None:
+ response = self._session.post(f"/schedulers/{scheduler_id}/pop", json=filters)
self._verify_response(response)
- return TypeAdapter(Task | None).validate_json(response.content)
+ return TypeAdapter(PaginatedTasksResponse | None).validate_json(response.content)
def push_item(self, p_item: Task) -> None:
- response = self._session.post(f"/queues/{p_item.scheduler_id}/push", content=p_item.model_dump_json())
+ response = self._session.post(f"/schedulers/{p_item.scheduler_id}/push", content=p_item.model_dump_json())
self._verify_response(response)
def patch_task(self, task_id: uuid.UUID, status: TaskStatus) -> None:
diff --git a/boefjes/boefjes/config.py b/boefjes/boefjes/config.py
index a3947ed399c..7ccf3f78e1f 100644
--- a/boefjes/boefjes/config.py
+++ b/boefjes/boefjes/config.py
@@ -3,7 +3,7 @@
from pathlib import Path
from typing import Any, Literal
-from pydantic import AmqpDsn, AnyHttpUrl, Field, FilePath, IPvAnyAddress, PostgresDsn, conint
+from pydantic import AnyHttpUrl, Field, FilePath, IPvAnyAddress, PostgresDsn, conint
from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict
from pydantic_settings.sources import EnvSettingsSource
@@ -63,9 +63,6 @@ class Settings(BaseSettings):
examples=['{"kat_external_db_normalize": 3, "kat_dns_normalize": 1}'],
)
- # Queue configuration
- queue_uri: AmqpDsn = Field(..., description="KAT queue URI", examples=["amqp://"], validation_alias="QUEUE_URI")
-
katalogus_db_uri: PostgresDsn = Field(
...,
examples=["postgresql://xx:xx@host:5432/katalogus"],
diff --git a/boefjes/packaging/deb/data/etc/kat/boefjes.conf b/boefjes/packaging/deb/data/etc/kat/boefjes.conf
index 8baa0e64fd4..6d986354bfa 100644
--- a/boefjes/packaging/deb/data/etc/kat/boefjes.conf
+++ b/boefjes/packaging/deb/data/etc/kat/boefjes.conf
@@ -1,5 +1,3 @@
-QUEUE_URI=
-
# OCTOPOES_API=http://localhost:8001
# BYTES_API=http://localhost:8002
BYTES_USERNAME=bytes
diff --git a/boefjes/pyproject.toml b/boefjes/pyproject.toml
index 223bf59937f..1a5073aad75 100644
--- a/boefjes/pyproject.toml
+++ b/boefjes/pyproject.toml
@@ -97,7 +97,6 @@ markers = ["slow: marks tests as slow"]
addopts = "-m 'not slow'"
env = [
"D:KATALOGUS_DB_URI=postgresql://postgres:postgres@ci_katalogus-db:5432/ci_katalogus",
- "D:QUEUE_URI=amqp://placeholder",
"D:BOEFJES_API=http://placeholder:8006",
"D:KATALOGUS_API=http://placeholder:8000",
"D:OCTOPOES_API=http://placeholder:8001",
diff --git a/boefjes/tests/conftest.py b/boefjes/tests/conftest.py
index 202d3d0a778..5b598f36907 100644
--- a/boefjes/tests/conftest.py
+++ b/boefjes/tests/conftest.py
@@ -15,7 +15,7 @@
from boefjes.app import SchedulerWorkerManager
from boefjes.clients.bytes_client import BytesAPIClient
-from boefjes.clients.scheduler_client import Queue, SchedulerClientInterface, Task, TaskStatus
+from boefjes.clients.scheduler_client import PaginatedTasksResponse, SchedulerClientInterface, Task, TaskStatus
from boefjes.config import Settings, settings
from boefjes.dependencies.plugins import PluginService, get_plugin_service
from boefjes.job_handler import bytes_api_client
@@ -50,7 +50,6 @@
class MockSchedulerClient(SchedulerClientInterface):
def __init__(
self,
- queue_response: bytes,
boefje_responses: list[bytes],
normalizer_responses: list[bytes],
log_path: Path,
@@ -58,7 +57,6 @@ def __init__(
iterations_to_wait_for_exception: int = 0,
sleep_time: float = 0.1,
):
- self.queue_response = queue_response
self.boefje_responses = boefje_responses
self.normalizer_responses = normalizer_responses
@@ -73,26 +71,25 @@ def __init__(
self._popped_items: dict[str, Task] = multiprocessing.Manager().dict()
self._pushed_items: dict[str, Task] = multiprocessing.Manager().dict()
- def get_queues(self) -> list[Queue]:
- time.sleep(self.sleep_time)
- return TypeAdapter(list[Queue]).validate_json(self.queue_response)
-
def pop_item(self, queue: str) -> Task | None:
time.sleep(self.sleep_time)
try:
if WorkerManager.Queue.BOEFJES.value in queue:
- p_item = TypeAdapter(Task).validate_json(self.boefje_responses.pop(0))
+ response = TypeAdapter(PaginatedTasksResponse).validate_json(self.boefje_responses.pop(0))
+ p_item = response.results[0]
self._popped_items[str(p_item.id)] = p_item
self._tasks[str(p_item.id)] = self._task_from_id(p_item.id)
return p_item
if WorkerManager.Queue.NORMALIZERS.value in queue:
- p_item = TypeAdapter(Task).validate_json(self.normalizer_responses.pop(0))
+ response = TypeAdapter(PaginatedTasksResponse).validate_json(self.normalizer_responses.pop(0))
+ p_item = response.results[0]
self._popped_items[str(p_item.id)] = p_item
self._tasks[str(p_item.id)] = self._task_from_id(p_item.id)
return p_item
except IndexError:
+ time.sleep(3 * self.sleep_time)
raise self.raise_on_empty_queue
def patch_task(self, task_id: UUID, status: TaskStatus) -> None:
@@ -126,7 +123,8 @@ def __init__(self, exception=Exception):
def handle(self, item: BoefjeMeta | NormalizerMeta):
time.sleep(self.sleep_time)
- if str(item.id) == "9071c9fd-2b9f-440f-a524-ef1ca4824fd4":
+ if str(item.id) in ["9071c9fd-2b9f-440f-a524-ef1ca4824fd4", "2071c9fd-2b9f-440f-a524-ef1ca4824fd4"]:
+ time.sleep(self.sleep_time)
raise self.exception()
self.queue.put(item)
@@ -151,7 +149,6 @@ def item_handler(tmp_path: Path):
@pytest.fixture
def manager(item_handler: MockHandler, tmp_path: Path) -> SchedulerWorkerManager:
scheduler_client = MockSchedulerClient(
- queue_response=get_dummy_data("scheduler/queues_response.json"),
boefje_responses=[
get_dummy_data("scheduler/pop_response_boefje.json"),
get_dummy_data("scheduler/pop_response_boefje_2.json"),
diff --git a/boefjes/tests/examples/scheduler/pop_response_boefje.json b/boefjes/tests/examples/scheduler/pop_response_boefje.json
index 29e7d5dfb72..4f668582726 100644
--- a/boefjes/tests/examples/scheduler/pop_response_boefje.json
+++ b/boefjes/tests/examples/scheduler/pop_response_boefje.json
@@ -1,25 +1,33 @@
{
- "id": "70da7d4f-f41f-4940-901b-d98a92e9014b",
- "priority": 1,
- "scheduler_id": "boefje-_dev",
- "schedule_id": null,
- "status": "dispatched",
- "type": "boefje",
- "hash": "70da7d4f-f41f-4940-901b-d98a92e9014b",
- "data": {
- "id": "70da7d4f-f41f-4940-901b-d98a92e9014b",
- "boefje": {
- "id": "dns-records",
- "version": null
- },
- "input_ooi": "Hostname|internet|test.test",
- "organization": "_dev",
- "arguments": {},
- "started_at": null,
- "runnable_hash": null,
- "environment": null,
- "ended_at": null
- },
- "created_at": "2021-06-29T14:00:00",
- "modified_at": "2021-06-29T14:00:00"
+ "count": 1,
+ "next": null,
+ "previous": null,
+ "results": [
+ {
+ "id": "70da7d4f-f41f-4940-901b-d98a92e9014b",
+ "priority": 1,
+ "scheduler_id": "boefje",
+ "organisation": "_dev",
+ "schedule_id": null,
+ "status": "dispatched",
+ "type": "boefje",
+ "hash": "70da7d4f-f41f-4940-901b-d98a92e9014b",
+ "data": {
+ "id": "70da7d4f-f41f-4940-901b-d98a92e9014b",
+ "boefje": {
+ "id": "dns-records",
+ "version": null
+ },
+ "input_ooi": "Hostname|internet|test.test",
+ "organization": "_dev",
+ "arguments": {},
+ "started_at": null,
+ "runnable_hash": null,
+ "environment": null,
+ "ended_at": null
+ },
+ "created_at": "2021-06-29T14:00:00",
+ "modified_at": "2021-06-29T14:00:00"
+ }
+ ]
}
diff --git a/boefjes/tests/examples/scheduler/pop_response_boefje_2.json b/boefjes/tests/examples/scheduler/pop_response_boefje_2.json
index 762be767cc9..8c40569c9c2 100644
--- a/boefjes/tests/examples/scheduler/pop_response_boefje_2.json
+++ b/boefjes/tests/examples/scheduler/pop_response_boefje_2.json
@@ -1,25 +1,33 @@
{
- "id": "70da7d4f-f41f-4940-901b-d98a92e9014c",
- "priority": 1,
- "scheduler_id": "boefje-_dev",
- "schedule_id": null,
- "status": "dispatched",
- "type": "boefje",
- "hash": "70da7d4f-f41f-4940-901b-d98a92e9014c",
- "data": {
- "id": "70da7d4f-f41f-4940-901b-d98a92e9014c",
- "boefje": {
- "id": "dns-records",
- "version": null
- },
- "input_ooi": "Hostname|internet|test.test",
- "organization": "_dev",
- "arguments": {},
- "started_at": null,
- "runnable_hash": null,
- "environment": null,
- "ended_at": null
- },
- "created_at": "2021-06-29T14:00:00",
- "modified_at": "2021-06-29T14:00:00"
+ "count": 1,
+ "next": null,
+ "previous": null,
+ "results": [
+ {
+ "id": "70da7d4f-f41f-4940-901b-d98a92e9014c",
+ "priority": 1,
+ "scheduler_id": "boefje",
+ "organisation": "_dev",
+ "schedule_id": null,
+ "status": "dispatched",
+ "type": "boefje",
+ "hash": "70da7d4f-f41f-4940-901b-d98a92e9014c",
+ "data": {
+ "id": "70da7d4f-f41f-4940-901b-d98a92e9014c",
+ "boefje": {
+ "id": "dns-records",
+ "version": null
+ },
+ "input_ooi": "Hostname|internet|test.test",
+ "organization": "_dev",
+ "arguments": {},
+ "started_at": null,
+ "runnable_hash": null,
+ "environment": null,
+ "ended_at": null
+ },
+ "created_at": "2021-06-29T14:00:00",
+ "modified_at": "2021-06-29T14:00:00"
+ }
+ ]
}
diff --git a/boefjes/tests/examples/scheduler/pop_response_boefje_no_ooi.json b/boefjes/tests/examples/scheduler/pop_response_boefje_no_ooi.json
index 5ce2ad45c9e..522737bbd27 100644
--- a/boefjes/tests/examples/scheduler/pop_response_boefje_no_ooi.json
+++ b/boefjes/tests/examples/scheduler/pop_response_boefje_no_ooi.json
@@ -1,25 +1,33 @@
{
- "id": "70da7d4f-f41f-4940-901b-d98a92e9014b",
- "priority": 1,
- "scheduler_id": "boefje-_dev",
- "schedule_id": null,
- "status": "dispatched",
- "type": "boefje",
- "hash": "70da7d4f-f41f-4940-901b-d98a92e9014b",
- "data": {
- "id": "70da7d4f-f41f-4940-901b-d98a92e9014b",
- "boefje": {
- "id": "dns-records",
- "version": null
- },
- "input_ooi": "",
- "organization": "_dev",
- "arguments": {},
- "started_at": null,
- "runnable_hash": null,
- "environment": null,
- "ended_at": null
- },
- "created_at": "2021-06-29T14:00:00",
- "modified_at": "2021-06-29T14:00:00"
+ "count": 1,
+ "next": null,
+ "previous": null,
+ "results": [
+ {
+ "id": "70da7d4f-f41f-4940-901b-d98a92e9014b",
+ "priority": 1,
+ "scheduler_id": "boefje",
+ "organisation": "_dev",
+ "schedule_id": null,
+ "status": "dispatched",
+ "type": "boefje",
+ "hash": "70da7d4f-f41f-4940-901b-d98a92e9014b",
+ "data": {
+ "id": "70da7d4f-f41f-4940-901b-d98a92e9014b",
+ "boefje": {
+ "id": "dns-records",
+ "version": null
+ },
+ "input_ooi": "",
+ "organization": "_dev",
+ "arguments": {},
+ "started_at": null,
+ "runnable_hash": null,
+ "environment": null,
+ "ended_at": null
+ },
+ "created_at": "2021-06-29T14:00:00",
+ "modified_at": "2021-06-29T14:00:00"
+ }
+ ]
}
diff --git a/boefjes/tests/examples/scheduler/pop_response_normalizer.json b/boefjes/tests/examples/scheduler/pop_response_normalizer.json
index 274a9798c28..f197961c532 100644
--- a/boefjes/tests/examples/scheduler/pop_response_normalizer.json
+++ b/boefjes/tests/examples/scheduler/pop_response_normalizer.json
@@ -1,57 +1,65 @@
{
- "id": "60da7d4ff41f4940901bd98a92e9014b",
- "priority": 1,
- "scheduler_id": "normalizer-_dev",
- "schedule_id": null,
- "status": "dispatched",
- "type": "normalizer",
- "hash": "7e698c377cfd85015c0d7086b76b76b4",
- "data": {
- "id": "60da7d4ff41f4940901bd98a92e9014b",
- "raw_data": {
- "id": "60da7d4ff41f4940901bd98a92e9014a",
- "boefje_meta": {
- "id": "70da7d4ff41f4940901bd98a92e9014b",
- "boefje": {
- "id": "dns-records",
- "name": "DnsRecords",
- "description": "Fetch the DNS record(s) of a hostname",
- "version": null,
- "scan_level": 1,
- "consumes": [
- "Hostname"
- ],
- "produces": [
- "DNSAAAARecord",
- "IPAddressV6",
- "NXDOMAIN",
- "Hostname",
- "Network",
- "DNSNSRecord",
- "DNSTXTRecord",
- "IPAddressV4",
- "DNSMXRecord",
- "DNSZone",
- "DNSARecord",
- "DNSSOARecord",
- "DNSCNAMERecord"
- ],
- "dispatches": null
+ "count": 1,
+ "next": null,
+ "previous": null,
+ "results": [
+ {
+ "id": "60da7d4ff41f4940901bd98a92e9014b",
+ "priority": 1,
+ "scheduler_id": "normalizer",
+ "organisation": "_dev",
+ "schedule_id": null,
+ "status": "dispatched",
+ "type": "normalizer",
+ "hash": "7e698c377cfd85015c0d7086b76b76b4",
+ "data": {
+ "id": "60da7d4ff41f4940901bd98a92e9014b",
+ "raw_data": {
+ "id": "60da7d4ff41f4940901bd98a92e9014a",
+ "boefje_meta": {
+ "id": "70da7d4ff41f4940901bd98a92e9014b",
+ "boefje": {
+ "id": "dns-records",
+ "name": "DnsRecords",
+ "description": "Fetch the DNS record(s) of a hostname",
+ "version": null,
+ "scan_level": 1,
+ "consumes": [
+ "Hostname"
+ ],
+ "produces": [
+ "DNSAAAARecord",
+ "IPAddressV6",
+ "NXDOMAIN",
+ "Hostname",
+ "Network",
+ "DNSNSRecord",
+ "DNSTXTRecord",
+ "IPAddressV4",
+ "DNSMXRecord",
+ "DNSZone",
+ "DNSARecord",
+ "DNSSOARecord",
+ "DNSCNAMERecord"
+ ],
+ "dispatches": null
+ },
+ "input_ooi": "Hostname|internet|test.test",
+ "organization": "_dev",
+ "dispatches": []
+ },
+ "mime_types": [
+ {
+ "value": "boefje/dns-records"
+ }
+ ]
},
- "input_ooi": "Hostname|internet|test.test",
- "organization": "_dev",
- "dispatches": []
- },
- "mime_types": [
- {
- "value": "boefje/dns-records"
+ "normalizer": {
+ "id": "kat_dns_normalize"
}
- ]
- },
- "normalizer": {
- "id": "kat_dns_normalize"
+ },
+ "created_at": "2021-06-29T14:00:00",
+ "modified_at": "2021-06-29T14:00:00"
}
- },
- "created_at": "2021-06-29T14:00:00",
- "modified_at": "2021-06-29T14:00:00"
+ ]
}
diff --git a/boefjes/tests/examples/scheduler/queues_response.json b/boefjes/tests/examples/scheduler/queues_response.json
deleted file mode 100644
index ae5f68f8ab0..00000000000
--- a/boefjes/tests/examples/scheduler/queues_response.json
+++ /dev/null
@@ -1,20 +0,0 @@
-[
- {
- "id": "boefje-_dev",
- "size": 1,
- "maxsize": 1000,
- "allow_replace": false,
- "allow_updates": false,
- "allow_priority_updates": true,
- "pq": []
- },
- {
- "id": "normalizer-_dev",
- "size": 1,
- "maxsize": 1000,
- "allow_replace": false,
- "allow_updates": false,
- "allow_priority_updates": true,
- "pq": []
- }
-]
diff --git a/boefjes/tests/examples/scheduler/should_crash.json b/boefjes/tests/examples/scheduler/should_crash.json
index f267bf5fc57..ca3ba0f35e8 100644
--- a/boefjes/tests/examples/scheduler/should_crash.json
+++ b/boefjes/tests/examples/scheduler/should_crash.json
@@ -1,43 +1,51 @@
{
- "id": "9071c9fd-2b9f-440f-a524-ef1ca4824fd4",
- "priority": 1,
- "scheduler_id": "boefje-_dev",
- "schedule_id": null,
- "status": "dispatched",
- "type": "boefje",
- "hash": "7e698c377cfd85015c0d7086b76b76b4",
- "data": {
- "id": "9071c9fd-2b9f-440f-a524-ef1ca4824fd4",
- "boefje": {
- "id": "dns-records",
- "name": "DnsRecords",
- "description": "Fetch the DNS record(s) of a hostname",
- "version": null,
- "scan_level": 1,
- "consumes": [
- "Hostname"
- ],
- "produces": [
- "DNSAAAARecord",
- "IPAddressV6",
- "NXDOMAIN",
- "Hostname",
- "Network",
- "DNSNSRecord",
- "DNSTXTRecord",
- "IPAddressV4",
- "DNSMXRecord",
- "DNSZone",
- "DNSARecord",
- "DNSSOARecord",
- "DNSCNAMERecord"
- ],
- "dispatches": null
- },
- "input_ooi": "Hostname|internet|test.test",
- "organization": "_dev",
- "dispatches": []
- },
- "created_at": "2021-06-29T14:00:00",
- "modified_at": "2021-06-29T14:00:00"
+ "count": 1,
+ "next": null,
+ "previous": null,
+ "results": [
+ {
+ "id": "9071c9fd-2b9f-440f-a524-ef1ca4824fd4",
+ "priority": 1,
+ "scheduler_id": "boefje",
+ "organisation": "_dev",
+ "schedule_id": null,
+ "status": "dispatched",
+ "type": "boefje",
+ "hash": "7e698c377cfd85015c0d7086b76b76b4",
+ "data": {
+ "id": "9071c9fd-2b9f-440f-a524-ef1ca4824fd4",
+ "boefje": {
+ "id": "dns-records",
+ "name": "DnsRecords",
+ "description": "Fetch the DNS record(s) of a hostname",
+ "version": null,
+ "scan_level": 1,
+ "consumes": [
+ "Hostname"
+ ],
+ "produces": [
+ "DNSAAAARecord",
+ "IPAddressV6",
+ "NXDOMAIN",
+ "Hostname",
+ "Network",
+ "DNSNSRecord",
+ "DNSTXTRecord",
+ "IPAddressV4",
+ "DNSMXRecord",
+ "DNSZone",
+ "DNSARecord",
+ "DNSSOARecord",
+ "DNSCNAMERecord"
+ ],
+ "dispatches": null
+ },
+ "input_ooi": "Hostname|internet|test.test",
+ "organization": "_dev",
+ "dispatches": []
+ },
+ "created_at": "2021-06-29T14:00:00",
+ "modified_at": "2021-06-29T14:00:00"
+ }
+ ]
}
diff --git a/boefjes/tests/examples/scheduler/should_crash_2.json b/boefjes/tests/examples/scheduler/should_crash_2.json
new file mode 100644
index 00000000000..913ea5c86a0
--- /dev/null
+++ b/boefjes/tests/examples/scheduler/should_crash_2.json
@@ -0,0 +1,51 @@
+{
+ "count": 1,
+ "next": null,
+ "previous": null,
+ "results": [
+ {
+ "id": "2071c9fd-2b9f-440f-a524-ef1ca4824fd4",
+ "priority": 1,
+ "scheduler_id": "boefje",
+ "organisation": "_dev",
+ "schedule_id": null,
+ "status": "dispatched",
+ "type": "boefje",
+ "hash": "7e698c377cfd85015c0d7086b76b76b4",
+ "data": {
+ "id": "2071c9fd-2b9f-440f-a524-ef1ca4824fd4",
+ "boefje": {
+ "id": "dns-records",
+ "name": "DnsRecords",
+ "description": "Fetch the DNS record(s) of a hostname",
+ "version": null,
+ "scan_level": 1,
+ "consumes": [
+ "Hostname"
+ ],
+ "produces": [
+ "DNSAAAARecord",
+ "IPAddressV6",
+ "NXDOMAIN",
+ "Hostname",
+ "Network",
+ "DNSNSRecord",
+ "DNSTXTRecord",
+ "IPAddressV4",
+ "DNSMXRecord",
+ "DNSZone",
+ "DNSARecord",
+ "DNSSOARecord",
+ "DNSCNAMERecord"
+ ],
+ "dispatches": null
+ },
+ "input_ooi": "Hostname|internet|test.test",
+ "organization": "_dev",
+ "dispatches": []
+ },
+ "created_at": "2021-06-29T14:00:00",
+ "modified_at": "2021-06-29T14:00:00"
+ }
+ ]
+}
diff --git a/boefjes/tests/test_api.py b/boefjes/tests/test_api.py
index 71287655f1d..4d6aedaf835 100644
--- a/boefjes/tests/test_api.py
+++ b/boefjes/tests/test_api.py
@@ -11,7 +11,6 @@
def _mocked_scheduler_client(tmp_path: Path):
return MockSchedulerClient(
- queue_response=get_dummy_data("scheduler/queues_response.json"),
boefje_responses=[get_dummy_data("scheduler/pop_response_boefje_no_ooi.json")],
normalizer_responses=[],
log_path=tmp_path / "patch_task_log",
diff --git a/boefjes/tests/test_app.py b/boefjes/tests/test_app.py
index 8cd124940f6..26bcd41cfb6 100644
--- a/boefjes/tests/test_app.py
+++ b/boefjes/tests/test_app.py
@@ -56,7 +56,6 @@ def test_two_processes(manager: SchedulerWorkerManager, item_handler: MockHandle
def test_two_processes_exception(manager: SchedulerWorkerManager, item_handler: MockHandler, tmp_path) -> None:
manager.scheduler_client = MockSchedulerClient(
- get_dummy_data("scheduler/queues_response.json"),
[get_dummy_data("scheduler/should_crash.json")],
[get_dummy_data("scheduler/pop_response_normalizer.json")],
tmp_path / "patch_task_log",
@@ -72,12 +71,16 @@ def test_two_processes_exception(manager: SchedulerWorkerManager, item_handler:
def test_two_processes_handler_exception(manager: SchedulerWorkerManager, item_handler: MockHandler, tmp_path) -> None:
manager.scheduler_client = MockSchedulerClient(
- get_dummy_data("scheduler/queues_response.json"),
- [get_dummy_data("scheduler/pop_response_boefje.json")] + 2 * [get_dummy_data("scheduler/should_crash.json")],
+ [
+ get_dummy_data("scheduler/pop_response_boefje.json"),
+ get_dummy_data("scheduler/should_crash.json"),
+ get_dummy_data("scheduler/should_crash_2.json"),
+ ],
[get_dummy_data("scheduler/pop_response_normalizer.json")],
tmp_path / "patch_task_log",
)
+ item_handler.sleep_time = 0.1
manager.settings.pool_size = 2
manager.task_queue = Manager().Queue()
with pytest.raises(KeyboardInterrupt):
@@ -96,20 +99,17 @@ def test_two_processes_handler_exception(manager: SchedulerWorkerManager, item_h
# We expect the first two patches to set the task status to running of both task and then process 1 to finish, as
# the exception has been set up with a small delay.
assert len(patched_tasks) == 6
- assert sorted(patched_tasks[:3]) == sorted(
- [
- ("70da7d4f-f41f-4940-901b-d98a92e9014b", "running"), # Process 1
- ("70da7d4f-f41f-4940-901b-d98a92e9014b", "completed"), # Process 1
- ("9071c9fd-2b9f-440f-a524-ef1ca4824fd4", "running"), # Process 2
- ]
+ assert sorted(patched_tasks[:2]) == sorted(
+ [("70da7d4f-f41f-4940-901b-d98a92e9014b", "running"), ("9071c9fd-2b9f-440f-a524-ef1ca4824fd4", "running")]
)
# The process completing status then to be set to completed/failed for both tasks.
- assert sorted(patched_tasks[3:]) == sorted(
+ assert sorted(patched_tasks[2:]) == sorted(
[
- ("9071c9fd-2b9f-440f-a524-ef1ca4824fd4", "running"), # Process 1
- ("9071c9fd-2b9f-440f-a524-ef1ca4824fd4", "failed"), # Process 2
- ("9071c9fd-2b9f-440f-a524-ef1ca4824fd4", "failed"), # Process 1
+ ("9071c9fd-2b9f-440f-a524-ef1ca4824fd4", "failed"),
+ ("2071c9fd-2b9f-440f-a524-ef1ca4824fd4", "running"),
+ ("2071c9fd-2b9f-440f-a524-ef1ca4824fd4", "failed"),
+ ("70da7d4f-f41f-4940-901b-d98a92e9014b", "completed"),
]
)
@@ -126,10 +126,7 @@ def test_two_processes_cleanup_unfinished_tasks(
"""
manager.scheduler_client = MockSchedulerClient(
- get_dummy_data("scheduler/queues_response.json"),
- 3 * [get_dummy_data("scheduler/pop_response_boefje.json")],
- [],
- tmp_path / "patch_task_log",
+ 3 * [get_dummy_data("scheduler/pop_response_boefje.json")], [], tmp_path / "patch_task_log"
)
manager.settings.pool_size = 2
manager.task_queue = Manager().Queue()
@@ -153,10 +150,11 @@ def test_two_processes_cleanup_unfinished_tasks(
}
# Tasks (one with the same id) was still unhandled the queue and pushed back to the scheduler by the main process
- assert manager.scheduler_client._pushed_items["70da7d4f-f41f-4940-901b-d98a92e9014b"].scheduler_id == "boefje-_dev"
- assert json.loads(
- manager.scheduler_client._pushed_items["70da7d4f-f41f-4940-901b-d98a92e9014b"].json()
- ) == json.loads(get_dummy_data("scheduler/pop_response_boefje.json"))
+ assert manager.scheduler_client._pushed_items["70da7d4f-f41f-4940-901b-d98a92e9014b"].scheduler_id == "boefje"
+ assert (
+ json.loads(manager.scheduler_client._pushed_items["70da7d4f-f41f-4940-901b-d98a92e9014b"].json())
+ == json.loads(get_dummy_data("scheduler/pop_response_boefje.json")).get("results")[0]
+ )
def test_normalizer_queue(manager: SchedulerWorkerManager, item_handler: MockHandler) -> None:
@@ -170,7 +168,6 @@ def test_normalizer_queue(manager: SchedulerWorkerManager, item_handler: MockHan
def test_null(manager: SchedulerWorkerManager, tmp_path: Path, item_handler: MockHandler):
manager.scheduler_client = MockSchedulerClient(
- get_dummy_data("scheduler/queues_response.json"),
3 * [get_dummy_data("scheduler/pop_response_boefje.json")],
[get_dummy_data("scheduler/pop_response_normalizer.json")],
tmp_path / "patch_task_log",
diff --git a/bytes/bytes/rabbitmq.py b/bytes/bytes/rabbitmq.py
index 8ae5b83f446..f40a042ef1f 100644
--- a/bytes/bytes/rabbitmq.py
+++ b/bytes/bytes/rabbitmq.py
@@ -53,7 +53,7 @@ def _check_connection(self) -> None:
@staticmethod
def _queue_name(event: Event) -> str:
- return f"{event.organization}__{event.event_id}"
+ return event.event_id
class NullManager(EventManager):
diff --git a/bytes/tests/conftest.py b/bytes/tests/conftest.py
index f30b18e8544..049cb2c81c1 100644
--- a/bytes/tests/conftest.py
+++ b/bytes/tests/conftest.py
@@ -105,5 +105,8 @@ def raw_repository(tmp_path: Path) -> FileRawRepository:
@pytest.fixture
-def event_manager(settings: Settings) -> RabbitMQEventManager:
- return RabbitMQEventManager(str(settings.queue_uri))
+def event_manager(settings: Settings) -> Iterator[RabbitMQEventManager]:
+ manager = RabbitMQEventManager(str(settings.queue_uri))
+ manager.channel.queue_delete("raw_file_received")
+
+ yield manager
diff --git a/bytes/tests/integration/test_bytes_api.py b/bytes/tests/integration/test_bytes_api.py
index 046a0d74919..143592ba3ee 100644
--- a/bytes/tests/integration/test_bytes_api.py
+++ b/bytes/tests/integration/test_bytes_api.py
@@ -227,7 +227,7 @@ def test_raw(bytes_api_client: BytesAPIClient, event_manager: RabbitMQEventManag
assert retrieved_raw == raw
- method, properties, body = event_manager.connection.channel().basic_get("test__raw_file_received")
+ method, properties, body = event_manager.connection.channel().basic_get("raw_file_received")
event_manager.connection.channel().basic_ack(method.delivery_tag)
assert str(boefje_meta.id) in body.decode()
@@ -244,7 +244,7 @@ def test_raw_big(bytes_api_client: BytesAPIClient, event_manager: RabbitMQEventM
assert retrieved_raw == raw
- method, properties, body = event_manager.connection.channel().basic_get("test__raw_file_received")
+ method, properties, body = event_manager.connection.channel().basic_get("raw_file_received")
event_manager.connection.channel().basic_ack(method.delivery_tag)
assert str(boefje_meta.id) in body.decode()
diff --git a/bytes/tests/integration/test_event.py b/bytes/tests/integration/test_event.py
index 015f1f3f811..706c7f48edc 100644
--- a/bytes/tests/integration/test_event.py
+++ b/bytes/tests/integration/test_event.py
@@ -7,7 +7,7 @@
def test_event_published_successfully(event_manager: RabbitMQEventManager) -> None:
- test_organization = "event-test"
+ test_organization = "test"
raw_data_meta = get_raw_data_meta()
# We use an isolated queue this way to not conflict with other integration tests
@@ -23,5 +23,5 @@ def test_event_published_successfully(event_manager: RabbitMQEventManager) -> No
event_manager.connection.channel().basic_ack(method.delivery_tag)
assert response["organization"] == test_organization
- assert response["raw_data"] == json.loads(event.raw_data.json())
+ assert response["raw_data"] == json.loads(event.raw_data.model_dump_json())
assert response["created_at"] == "2000-10-10T10:00:00"
diff --git a/docs/source/release-notes/1.18.rst b/docs/source/release-notes/1.18.rst
index 735efb0334b..5036623fd72 100644
--- a/docs/source/release-notes/1.18.rst
+++ b/docs/source/release-notes/1.18.rst
@@ -1,26 +1,336 @@
============================================
-OpenKAT 1.18
+OpenKAT 1.18 - Sneeuwkat
============================================
-New Features
-============
+This release adds report scheduling, which implements periodic report generation: by adding an interval to a report, it will automatically update with the latest information. With our new Dashboarding feature, these reports can be added to custom dashboards.
+Dashboard and Report data also have historical versions available. Future versions of the user interface will include moving back and forth in time and comparing these historic versions, highlighting changes and trends.
+We also included a new HTTP export boefje that you can use to export all objects in the graph to an external API either on an interval (e.g. every hour), or when the OOI is either created or changed, using our new Run-On functionality. This can be used to alert that findings have been created or their score has been updated.
+There's also a new S3 backend for Bytes and various new boefjes, normalizers and fixes to bits from our growing community. Thanks! Docs on how to setup S3 for your (new) install can be found here: https://docs.openkat.nl/installation-and-deployment/s3-buckets.html
-Bug fixes
-=========
+The language Tamil has been added via the hard work of a community volunteer. Since we have not yet tested it ourselves, it's currently only available if you add it to the languages list manually.
+If you want to add a language to OpenKAT, or just help translate smaller parts, please take a look at our weblate: https://hosted.weblate.org/projects/openkat/nl-kat-coordination/ any help is much appreciated!
+
+The Keiko module (formerly used to generate reports Via LaTeX) has been removed as we are now using full html reports that can also be exported as PDF.
+
+In total 30 contributors have made 267 commits to Main in which 1,332 Files where changed.
+
+New Features and Bug fixes
+==========================
+
+* Feature: improve settings and environment logic and phase out redundant environment keys by @Donnype in #3384
+* feat: adds notification styling and icons by @HeleenSG in #3461
+* Make the "name" field for plugins mandatory by @Donnype in #3471
+* Feature/upload multiple files at once to bytes by @Donnype in #3476
+* Add report scheduler functionality to scheduler by @jpbruinsslot in #3352
+* Add json download to report export by @Rieven in #3460
+* feat: multi select dropdown by @HeleenSG in #3446
+* Add timezone to valid time by @noamblitz in #3429
+* Exclude OOIs creation from the OOI add form by OOI-types by @Rieven in #3490
+* Hotfix for normalizer API bug by @Donnype in #3475
+* fix: toggle styling by @HeleenSG in #3449
+* Dont yield all snyk findings when no version was found by @noamblitz in #3431
+* Handle empty normalizer results by @Donnype in #3482
+* Fix enabling normalizers from Rocky by @Donnype in #3481
+* Fix report types selection not being overridden by @Rieven in #3436
+* Add new Boefje by @madelondohmen in #3400
+* Fix hanging upload of large files by @noamblitz in #3489
+* Check if the task is still running according to the scheduler before changing the status by @Donnype in #3506
+* Use the right variable name in the template's if-statement by @Donnype in #3519
+* Add regex pattern check to PORTS setting of nmap-ports by @Donnype in #3516
+* Update xtdb-http-multinode to the latest version by @dekkers in #3523
+* Updated findings in the findings database by @stephanie0x00 in #3427
+* remove unneeded column from filtered plugin table view by @underdarknl in #3515
+* Also delete self-affirming or self-infered objects by @originalsouth in #3498
+* Support valid_time and the like for queries in xtdb tools by @originalsouth in #3430
+* Chore: use only Pytest in the boefjes by @Donnype in #3536
+* Invert findings, add source URLs. by @stephanie0x00 in #3538
+* Fix JSON line logging by @ammar92 in #3511
+* Fix xtdb-cli by @originalsouth in #3543
+* Create boefje variant by @madelondohmen in #3456
+* make session commit less chatty by @underdarknl in #3544
+* Fix duplicate OOI references in result in origin by @originalsouth in #3531
+* a bit more detailed erroring in the scheduler client. by @underdarknl in #3546
+* Show proper error message instead of stacktrace if boefje API is unreachable by @dekkers in #3550
+* Fix headings by @madelondohmen in #3528
+* Feat/bit domain ownership pending by @underdarknl in #3290
+* Improve boefje runner error messages on container failure by @dekkers in #3548
+* Translations update from Hosted Weblate by @weblate in #3567
+* Clean more stale origins by @originalsouth in #3561
+* Fix Pydantic warnings by @ammar92 in #3557
+* Prevent race conditions between Octopoes' event manager and the scheduler from recreating already deleted OOIs through affirmations by @originalsouth in #3564
+* burpsuite fix by @underdarknl in #3381
+* Fix boefje schema on Boefje Setup page by @madelondohmen in #3574
+* Set default findingtype risk in model instead of in bit by @noamblitz in #3562
+* Add permission that grants access to all organizations by @dekkers in #3532
+* Add unique constraint to database plugin names by @Donnype in #3556
+* Feature/add boefje scheduling fields by @Donnype in #3555
+* Refactor and fix faulty save_origin exception code by @originalsouth in #3577
+* Dont show manual tasks in normalizer list by @noamblitz in #3580
+* Update a Boefje by @madelondohmen in #3521
+* Explicitly use the fork context for multiprocessing to fix running boefjes on macOS by @Donnype in #3576
+* fix: button style by @HeleenSG in #3565
+* Use stdlib instead of dateutil to parse ISO datetime by @dekkers in #3590
+* Do not store the hypothetically produced mime-types always by @Donnype in #3583
+* Remove old Findings Report by @madelondohmen in #3560
+* Add 'set start date' functionality to scheduler by @jpbruinsslot in #3589
+* Make API usable by non-admin users and check specific permissions by @dekkers in #3571
+* fix: button styling by @HeleenSG in #3591
+* Add interval to Boefje by @madelondohmen in #3579
+* Add boefje interval and cron check for deadline in scheduler by @jpbruinsslot in #3529
+* Always redirect to katalogus when enabling plugins by @noamblitz in #3584
+* Fixes notification alignment by @HeleenSG in #3522
+* REST API to recalculate bits and clone katalogus settings by @dekkers in #3572
+* fix: form styling by @HeleenSG in #3588
+* Remove an erroneously generated request body from an object history GET call in Octopoes' router by @originalsouth in #3605
+* RFD 0002: Code of Conduct: Code Reviews by @jpbruinsslot in #3425
+* Fix grace period is being used instead of interval for boefjes that have interval specified in scheduler by @jpbruinsslot in #3602
+* Use identifiers on modal triggers and modal component instead of integral trigger by @TwistMeister in #3541
+* Refactoring for Report Recipe, Report Task Runner and Scheduling by @Rieven in #3597
+* Handle existing Boefje name by @madelondohmen in #3573
+* Update boefje interval texts to make functionality more clear by @stephanie0x00 in #3609
+* Translations update from Hosted Weblate by @weblate in #3610
+* Feature/sort ooi type clearance level by @HeleenSG in #3300
+* Feature/report runner integration by @Donnype in #3607
+* Report Schedules List by @Rieven in #3608
+* Add s3 functionality in Bytes by @Souf149 in #3505
+* Implement SonarCloud integrations by @ammar92 in #3001
+* Fixed references in SonarCloud workflow by @ammar92 in #3620
+* Update filter unit and integration tests by @jpbruinsslot in #3595
+* Enable ruff format skip-magic-trailing-comma by @dekkers in #2975
+* Fixes for xtdb-cli by @originalsouth in #3624
+* Give python-docker-boefjes the possibility to use modules that are not part of OpenKAT by @Souf149 in #3621
+* fix tagging list in scheduled_reports_table.html by @underdarknl in #3615
+* Revert 1b4aed6 by @originalsouth in #3647
+* Add audit trail logging to boefje crud actions in boefje by @madelondohmen in #3613
+* use correct error mimetype by @noamblitz in #3646
+* Update katalogus client, input sanitization / validation by @underdarknl in #3396
+* Bug fixes for the reports flow by @Rieven in #3630
+* Remove source link in Findings Report when source is none by @madelondohmen in #3642
+* add CA bundle env var to dadb boefje schema. by @underdarknl in #3618
+* Fix nmap-ports regex pattern not allowing 80 by @Donnype in #3651
+* Fix boefje container image url by @madelondohmen in #3622
+* Fix description on plugin page when all plugins are enabled by @madelondohmen in #3644
+* Fix for downloading PDF by @madelondohmen in #3664
+* Fix sorting plugins list by @Rieven in #3659
+* fix the boefje id check for uuid's. A cleaner match regex would probably be better. by @underdarknl in #3665
+* Fix table in DNS Report by @madelondohmen in #3650
+* Pass bytes instead of string to BytesClient.upload_raw() by @Donnype in #3670
+* make some things look better by @Rieven in #3661
+* Fix/yielded objects by @Donnype in #3669
+* Add rocky worker service to debian packages by @Donnype in #3619
+* Update upload_raw.py by @underdarknl in #3645
+* Translations update from Hosted Weblate by @weblate in #3673
+* Add plugins to findings report by @Rieven in #3657
+* Fix jsonb 'contained by' query by @jpbruinsslot in #3643
+* Fix empty vulnerability reports by @madelondohmen in #3662
+* Silence KATFindingType not found error in JobHandler by @originalsouth in #3686
+* Github action should trigger if workflow definition changes by @dekkers in #3680
+* Do not run dh_strip_nondeterminism in Debian packaging by @dekkers in #3674
+* Fix first order dangling affirmation delete by @originalsouth in #3682
+* Fix javascript and component template in prod environments by @dekkers in #3672
+* Add delete schedule functionality for schedules in the scheduler by @jpbruinsslot in #3678
+* Fix/report naming by @Donnype in #3666
+* Add search endpoint for schedules for scheduler by @jpbruinsslot in #3695
+* feat: ✨ add Shodan InternetDB boefje by @zcrt in #2615
+* Add sterr to output list by @noamblitz in #3649
+* Rework workflow for variable python version, add python 3.11 by @sigio in #3721
+* Fixes in Report Overview by @madelondohmen in #3707
+* Add REST API to list report and download pdf report by @dekkers in #3689
+* Add start date to report schedule by @madelondohmen in #3701
+* Edit report recipe by @madelondohmen in #3690
+* Fix Multi Report recursion error by @Rieven in #3714
+* Fix report names for scheduled reports by @madelondohmen in #3726
+* Refactor Multi Report to comply to the new report flow by @Rieven in #3705
+* Add exception handling to the rest api by @jpbruinsslot in #3708
+* Add rocky REST API for report recipes by @dekkers in #3746
+* Fix auth token middleware with wrong format header by @dekkers in #3755
+* Fix vulnerability chapters in Aggregate table of content by @madelondohmen in #3780
+* Make systemctl call for kat-rocky-worker conditional by @dekkers in #3782
+* Fix scheduled Aggregate Report naming by @madelondohmen in #3748
+* Fixes for dropdowns by @Rieven in #3732
+* Exclude Report from ooi list by @Rieven in #3768
+* Fix reports with organization tags by @noamblitz in #3790
+* Silence staticfiles warning by @dekkers in #3795
+* Add configurable httpx request timeout and increase default by @dekkers in #3786
+* fix: Long links within tables by @HeleenSG in #3724
+* Translations update from Hosted Weblate by @weblate in #3762
+* Update web system report to make "certificate valid" check positive by @stephanie0x00 in #3798
+* Add live set (filter/query) to ReportRecipe by @madelondohmen in #3769
+* Add reports to scheduled table by @madelondohmen in #3787
+* fix: Adds code element styling by @HeleenSG in #3722
+* Fix filtered ooi types for reports by @Rieven in #3807
+* Replace finding description 'None' with the id by @madelondohmen in #3806
+* Button styling by @HeleenSG in #3772
+* Fix settings boefje settings via system env vars by @dekkers in #3766
+* Update normalizer texts in katalogus for some normalizers. by @stephanie0x00 in #3821
+* Add searching and sorting to Findings page by @madelondohmen in #3804
+* Fix typo in InternetDB boefje name by @dekkers in #3828
+* Refactor KATalogus client in Rocky by @Donnype in #3717
+* Check queue size before polling by @Donnype in #3829
+* Do not fail silently when deleting non-existing objects in octopoes by @Donnype in #3813
+* Add bulk actions on report overview by @TwistMeister in #3777
+* Upgrade script notes and fix for 1.16 on Debian by @Donnype in #3824
+* Bug fix: When opening subreports it throws index error by @Rieven in #3775
+* Delete log.txt by @underdarknl in #3851
+* Support a Schedule without a schedule in scheduler by @jpbruinsslot in #3834
+* Report types listed in a modal @ report plugins by @Rieven in #3718
+* Skip empty queues in the Rocky worker by @Donnype in #3860
+* Let local plugins (files) take precedence over database entries by @Donnype in #3858
+* Limit requesting prior tasks for ranking in scheduler by @jpbruinsslot in #3836
+* Add configuration setting for number of octopoes workers by @dekkers in #3796
+* Add start time to scheduled reports by @madelondohmen in #3809
+* Sub reports for Aggregate Report by @Rieven in #3852
+* Fix cron for last day of the month by @madelondohmen in #3831
+* Fixes for empty tables by @madelondohmen in #3844
+* optimize locking in katalogus.py, reuse available data by @underdarknl in #3752
+* Enable/disable scheduled reports by @madelondohmen in #3871
+* Fix rocky katalogus tests and delete unused fixtures by @dekkers in #3884
+* Change plugins enabling in report flow to checkboxes by @noamblitz in #3747
+* Let mailserver inherit l1 by @noamblitz in #3704
+* Ignore specific url parameters when following location headers by @noamblitz in #3856
+* Add auto_calculate_deadline attribute to Scheduler by @jpbruinsslot in #3869
+* Fix for task id as valid UUID by @Rieven in #3744
+* Increase max number of PostgreSQL connections by @dekkers in #3889
+* Translations update from Hosted Weblate by @weblate in #3870
+* Update scheduler folder structure by @jpbruinsslot in #3883
+* Feature/improve rename bulk modal by @TwistMeister in #3885
+* fix: 🐛 allow boefje completion with 404 by @zcrt in #3893
+* Create separate finding for Microsoft RDP port by @stephanie0x00 in #3882
+* Add additional check if task already run for report scheduler by @jpbruinsslot in #3900
+* Adds loggers to report flow by @madelondohmen in #3872
+* Fix mula migrations Debian package by @dekkers in #3919
+* Bug fix: KAT-alogus parameter is now organization member instead of organization code by @Rieven in #3895
+* Fix call to get_katalogus by @dekkers in #3924
+* add support for detecting Lame dns delegations on ip ranges by @underdarknl in #3899
+* Add bgp.jsonl and bgp-meta.json to .gitignore by @dekkers in #3928
+* Improve the KATalogus /plugins endpoint performance by @Donnype in #3892
+* Create scheduled report with zero objects selectable by @madelondohmen in #3907
+* Fix layout issues on scheduled reports page by @TwistMeister in #3930
+* Add export http boefje by @noamblitz in #3901
+* Update website_discovery.py by @underdarknl in #3921
+* add unpkg.com to disallowed hostnames in CSP by @underdarknl in #3927
+* Dont check for Locations on local Ip's. by @underdarknl in #3894
+* fix: 🔨 do not store CDN findings by @zcrt in #3931
+* Boefje runonce functionality in scheduler by @jpbruinsslot in #3906
+* Fix report recipe API by @dekkers in #3942
+* Translations update from Hosted Weblate by @weblate in #3939
+* Report flaws by @madelondohmen in #3880
+* Fix typing in more places and configure mypy to follow imports by @dekkers in #3932
+* Do not let enabling plugins affect the global plugin cache by @Donnype in #3944
+* fix typos in description.md by @underdarknl in #3952
+* Add documentation for S3 Support by @Souf149 in #3953
+* fix/Makes expando row chevron buttons sticky in report history and scheduled reports tables by @TwistMeister in #3954
+* Move event codes logging to KATalogus client by @Donnype in #3956
+* Translations update from Hosted Weblate by @weblate in #3969
+* Add cron parser to make cron human readable. Add "next scan" to object table on boefje detail view by @TwistMeister in #3960
+* Upsert report recipe in REST API by @dekkers in #3968
+* Translations update from Hosted Weblate by @weblate in #3984
+* Fix test_report_runner.py by @originalsouth in #4003
+* minor changes to onboarding, remove header, make preferred route more visible. by @underdarknl in #3986
+* Move the NXDomain catch to look at the results now that we dont raise… by @underdarknl in #3997
+* Add SPF optional machnism qualifier to model and parser. fix Human readable formatting for various mechanisms by @underdarknl in #3999
+* Changes to schedule all reports, even for once by @Rieven in #3840
+* Documentation Export HTTP API boefje by @stephanie0x00 in #4030
+* catch the schema mismatch error and produce an error raw file by @underdarknl in #3995
+* Fix pagination in the history API by @Donnype in #4041
+* Fix/remove unneeded lookups for inference params by @underdarknl in #4031
+* Update dropdown.scss, add scrolling / max height by @underdarknl in #4040
+* Fix/remove unneeded tree lookups on ooi views by @underdarknl in #4032
+* Fix/ooi detail fixes by @underdarknl in #4024
+* Update organization_list and settings page, remove unused stuff, add tags + direct settings link by @underdarknl in #4039
+* Fix/reuse report ooi entities by @Donnype in #4047
+* make reference parsing more strict in init.py by @underdarknl in #4065
+* Add normalizer name to tasklist on object details page, observation table. by @underdarknl in #4034
+* Feat/plugin selection toggler by @underdarknl in #4063
+* Report Task List by @Rieven in #4059
+* Add one-off jobs for report scheduler by @jpbruinsslot in #4045
+* Remove the keiko report module by @dekkers in #4066
+* Translations update from Hosted Weblate by @weblate in #4046
+* Add run-on to Boefje Setup page by @madelondohmen in #4061
+Documentation
+=============
+
+* Docs/update userguide objects tasks members settings by @stephanie0x00 in #3957
+* Add risk level severities to docs by @stephanie0x00 in #4037
+* Docs: adding Questions and Configs by @stephanie0x00 in #3975
+* Docs: adding Questions and Configs by @stephanie0x00 in #3975
+* Add Kubernetes and Ansible to docs by @stephanie0x00 in #3970
+* Fix docs target in Makefile by @ammar92 in #3987
+* Docs: adding Questions and Configs by @stephanie0x00 in #3975
+* Update intro.rst, fix security email address by @underdarknl in #3846
+* Update scheduler documentation by @jpbruinsslot in #3692
+* Update folder structure in scheduler architecture doc by @jpbruinsslot in #4002
+* Update docs for creating a new Boefje by @madelondohmen in #3540
+* update readme by @F3licity in #3648
+* Updates boefje clearances and descriptions by @stephanie0x00 in #3863
+* Update development tutorial documentation by @allan-firelay in #3611
+* Add docs for xtdb analyze bits. by @stephanie0x00 in #3688
+* Docs/add muted findings by @stephanie0x00 in #3699
+* Update helper text for report names by @madelondohmen in #3616
+* Update README.rst - Fix guidelines URLs by @Thijs0x57 in #3789
+* Add descriptions to katalogus by @stephanie0x00 in #3545
+
+Dependency Updates
+==================
+
+* Bump cryptography from 42.0.8 to 43.0.1 in /bytes by @dependabot in #3473
+* Bump django from 5.0.10 to 5.0.11 in /rocky by @dependabot in #4025
+* Bump django from 5.0.9 to 5.0.10 in /rocky by @dependabot in #3940*
+* Bump SonarSource/sonarcloud-github-action from 3.1.0 to 4.0.0 by @dependabot in #4001
+* Bump python-multipart from 0.0.9 to 0.0.18 in /bytes by @dependabot in #3925
+* Remove sigrid workflows by @dekkers in #3920
+* Update Sphinx and documentation by @ammar92 in #3710
+* Fix/pin pydicom dependency and revert irrelevant version bumps by @Donnype in #3553
+* Bump django from 5.0.8 to 5.0.9 in /rocky by @dependabot in #3653
+* Bump sphinx-rtd-theme from 2.0.0 to 3.0.0 by @dependabot in #3625
+* Bump waitress from 3.0.0 to 3.0.1 in /octopoes by @dependabot in #3760
+* Update Wappalyzer by @ammar92 in #3800
+* Update packages by @ammar92 in #3990
+* Updates CWE archive to 4.16 by @ammar92 in #3943
+* Update croniter by @ammar92 in #3767
+* Updated packages by @ammar92 in #3694
+* Update Packages by @ammar92 in #3563
+* Updated packages by @ammar92 in #3898
+* Update pre-commit and all hooks by @dekkers in #3923
Upgrading
=========
+Keiko has been removed. You should uninstall / remove the Keiko package or container.
+
+Containers
+----------
+
+When using docker with docker compose, you need to remove keiko from the
+docker-compose.yml file. You can then use `--remove-orphan` option to tell
+docker compose to remove containers that are no longer in the compose file:
+
+.. code-block:: sh
+
+ docker compose up -d --remove-orphans
+
Debian packages
---------------
If you are using the :doc:`/installation-and-deployment/scripts` we provide to install/upgrade OpenKAT you
need to get the latest version that includes the kat-rocky-worker service.
+You should also remove the kat-keiko package:
+
+.. code-block:: sh
+
+ apt purge kat-keiko
+
+Note that if you use the openkat-update.sh script to update to a newer 1.18
+version (for example from 1.18.0rc1 to 1.18.0), then the kat-keiko will be
+installed again because the script will update or install all the packages. In 1.19
+this won't happen because the kat-keiko package will not exist anymore.
+
+
Full Changelog
==============
The full changelog can be found on `Github
-
[system]"]
+ Katalogus["Katalogus
[system]"]
+ Bytes["Bytes
[system]"]
+ end
+ subgraph "Task creation services"
+ Rocky["Rocky
[webapp]"]
+ RabbitMQ["RabbitMQ
[message broker]"]
+ end
+
+ Scheduler["Scheduler
[system]"]
+
+ subgraph "Task handling services"
+ TaskRunner["Task Runner
[software system]"]
+ end
+
+ Rocky-->Scheduler
+ RabbitMQ-->Scheduler
-
+ Octopoes-->Scheduler
+ Katalogus-->Scheduler
+ Bytes-->Scheduler
+
+
+ Scheduler--"Pop task of queue"-->TaskRunner
+```
### C3 Component level
When we take a closer look at the `scheduler` system itself we can identify
-several components. The `SchedulerApp` directs the creation and maintenance
-of a multitude of schedulers.
-
-| Scheduler | Schedulers |
-| :-------------------------------- | --------------------------------------: |
-|  |  |
+several components. The `App` directs the creation and maintenance
+of several schedulers. And the `API` that is responsible for interfacing with
+the `Scheduler` system.
+
+```mermaid
+flowchart TB
+ subgraph "**Scheduler**
[system]"
+ direction TB
+ subgraph Server["**API**
[component]
REST API"]
+ end
+ subgraph App["**App**
[component]
Main python application"]
+ end
+ Server-->App
+ end
+```
-Typically in a OpenKAT installation 3 scheduler will be created per organisation:
+Typically in a OpenKAT installation 3 scheduler will be created
1. _boefje scheduler_
2. _normalizer scheduler_
3. _report scheduler_
Each scheduler type implements it's own priority queue, and can implement it's
-own processes of populating, and prioritization of its tasks.
-
-
-
-Interaction with the scheduler and access to the internals of the
-`SchedulerApp` can be accessed by the `Server` which implements a HTTP REST API
-interface.
+own processes of populating, and prioritization of its tasks. Interaction with
+the scheduler and access to the internals of the `App` can be achieved by
+interfacing with the `Server`. Which implements a HTTP REST API interface.
## Dataflows
@@ -92,7 +119,22 @@ responsible for maintaining a queue of tasks for `Task Runners` to pick up and
process. A `Scheduler` is responsible for creating `Task` objects and pushing
them onto the queue.
-
+```mermaid
+flowchart LR
+ subgraph "**Scheduler**
[system]"
+ direction LR
+ subgraph Scheduler["**Scheduler**
[component]
"]
+ direction LR
+ Process["Task creation process"]
+ subgraph PriorityQueue["PriorityQueue"]
+ Task0
+ Task1[...]
+ TaskN
+ end
+ end
+ Process-->PriorityQueue
+ end
+```
The `PriorityQueue` derives its state from the state of the `Task` objects that
are persisted in the database. In other words, the current state of the
@@ -102,13 +144,16 @@ are persisted in the database. In other words, the current state of the
A `Task` object contains the following fields:
-- `scheduler_id` - The id of the scheduler for which this task is created
-- `schedule_id` - Optional, the id of the `Schedule` that created the task
-- `priority` - The priority of the task
-- `status` - The status of the task
-- `type` - The type of the task
-- `data` - A JSON object containing the task data
-- `hash` - A unique hash generated by specific fields from the task data
+| Field | Description |
+| -------------- | ------------------------------------------------------------- |
+| `scheduler_id` | The id of the scheduler for which this task is created |
+| `schedule_id` | Optional, the id of the `Schedule` that created the task |
+| `priority` | The priority of the task |
+| `organisation` | The organisation for which the task is created |
+| `status` | The status of the task |
+| `type` | The type of the task |
+| `data` | A JSON object containing the task data |
+| `hash` | A unique hash generated by specific fields from the task data |
Important to note is the `data` field contains the object that a `Task Runner`
will use to execute the task. This field is a JSON field that allows any object
@@ -120,6 +165,35 @@ By doing this, it allows the scheduler to wrap whatever object within a `Task`,
and as a result we're able to create and extend more types of schedulers that
are not specifically bound to a type.
+A json representation of a `Task` object, for example a `BoefjeTask` object
+as the `data` field:
+
+```json
+{
+ "scheduler_id": "1",
+ "schedule_id": "1",
+ "priority": 1,
+ "organisation": "openkat-corp",
+ "status": "PENDING",
+ "type": "boefje",
+ "data": {
+ "ooi": "internet",
+ "boefje": {
+ "id": "dns-zone",
+ "scan_level": 1
+ }
+ },
+ "hash": "a1b2c3d4e5f6g7h8i9j0"
+}
+```
+
+A `Task` is a one-time execution of a task and is a unique instance of task that
+is present in the `data` object. This means that you will encounter several
+instances of the same task. We generate a unique hash for each task by hashing
+specific fields from the `data` object. This hash is used to identify the task
+within the `PriorityQueue` and is used to check if the same task is already on
+the queue.
+
This approach ensures that the historical record of each task's execution is
distinct, providing a clear and isolated view of each instance of the task's
lifecycle. This strategy enables maintaining accurate and unambiguous
@@ -153,29 +227,28 @@ that `Scheduler` can create `Schedule` objects for its `Task` objects. A
`Schedule` object is a way to define when a `Task` should be executed
automatically on a recurring schedule by the `Scheduler`.
-A `Schedule` will use the 'blueprint' that is defined in its `data` field (this
+A `Schedule` will use the _'blueprint'_ that is defined in its `data` field (this
is the same as the `data` field of a `Task`) to generate a `Task` object to be
pushed on the queue of a `Scheduler`.
-
-
A `Schedule` object contains the following fields:
-- `scheduler_id` - The id of the scheduler that created the schedule
-- `schedule` - A cron expression that defines when the task should be
- executed, this is used to update the value of `deadline_at`
-- `deadline_at` - A timestamp that defines when the task should be executed
-- `data` - A JSON object containing data for the schedule (this is the same as
- the `data` field in the `Task` object)
-- `hash` - A unique hash generated by specific fields from the schedule data
+| Field | Description |
+| -------------- | ------------------------------------------------------------------------------------------------------------------ |
+| `scheduler_id` | The id of the scheduler that created the schedule |
+| `schedule` | A cron expression that defines when the task should be executed, this is used to update the value of `deadline_at` |
+| `deadline_at` | A timestamp that defines when the task should be executed |
+| `data` | A JSON object containing data for the schedule (this is the same as the `data` field in the `Task` object) |
+| `hash` | A unique hash generated by specific fields from the schedule data |
A `Scheduler` can be extended by a process that checks if the `deadline_at`
of a `Schedule` has passed, and if so, creates a `Task` object for the
`Scheduler` to push onto the queue.
-When the `Task` object is pushed onto the queue, the new `deadline_at` value
-of the `Schedule` is calculated using the cron expression defined in the
-`schedule` field.
+Typically when the `Task` object is pushed onto the queue, the new
+`deadline_at` value of the `Schedule` is calculated using the cron expression
+defined in the `schedule` field. Refer to the specific `Scheduler` for more
+information on how this is implemented.
### `BoefjeScheduler`
@@ -221,21 +294,46 @@ Before a `BoefjeTask` and pushed on the queue we will check the following:
#### Processes
-
+```mermaid
+flowchart LR
+ subgraph "**Scheduler**
[system]"
+ direction LR
+ subgraph BoefjeScheduler["**BoefjeScheduler**
[component]
"]
+ direction LR
+ ProcessManual["Manual"]
+ ProcessMutations["Mutations"]
+ ProcessNewBoefjes["NewBoefjes"]
+ ProcessRescheduling["Rescheduling"]
+ subgraph PriorityQueue["PriorityQueue"]
+ Task0
+ Task1[...]
+ TaskN
+ end
+ ProcessManual-->PriorityQueue
+ ProcessMutations-->PriorityQueue
+ ProcessNewBoefjes-->PriorityQueue
+ ProcessRescheduling-->PriorityQueue
+ end
+ end
+```
In order to create a `BoefjeTask` and trigger the dataflow we described above
-we have 4 different processes running in threads within a `BoefjeScheduler`
+we have 3 different processes running in threads within a `BoefjeScheduler`
that can create boefje tasks. Namely:
-1. scan profile mutations
-2. enabling of boefjes
-3. rescheduling of prior tasks
-4. manual scan job
+| Process | Description |
+| ----------------------- | -------------------------------------------------------------------------------------------------- |
+| `process_mutations` | scan profile mutations received from RabbitMQ indicating that the scan level of an OOI has changed |
+| `process_new_boefjes` | enabling of boefjes will result in gathering of OOI's on which the boefje can be used |
+| `process_rescheduling ` | rescheduling of prior tasks |
+
+Additionally, a boefje task creation can be triggered by a manual scan job that
+is created by the user in Rocky.
##### 1. Scan profile mutations
When a scan level is increased on an OOI
-(`schedulers.boefje.push_tasks_for_scan_profile_mutations`) a message is pushed
+(`schedulers.boefje.process_mutations`) a message is pushed
on the RabbitMQ `{organization_id}__scan_profile_mutations` queue. The scheduler
continuously checks if new messages are posted on the queue. The resulting tasks
from this process will get the second highest priority of 2 on the queue.
@@ -336,7 +434,22 @@ queue we will check the following:
#### Processes
-
+```mermaid
+flowchart LR
+ subgraph "**Scheduler**
[system]"
+ direction LR
+ subgraph NormalizerScheduler["**NormalizerScheduler**
[component]
"]
+ direction LR
+ ProcessRawData["RawData"]
+ subgraph PriorityQueue["PriorityQueue"]
+ Task0
+ Task1[...]
+ TaskN
+ end
+ ProcessRawData-->PriorityQueue
+ end
+ end
+```
The following processes within a `NormalizerScheduler` will create a
`NormalizerTask` tasks:
@@ -345,7 +458,7 @@ The following processes within a `NormalizerScheduler` will create a
##### 1. Raw file creation in Bytes
-When a raw file is created (`schedulers.normalizer.create_tasks_for_raw_data`)
+When a raw file is created (`schedulers.normalizer.process_raw_data`)
- The `NormalizerScheduler` retrieves raw files that have been created in Bytes
from a message queue.
@@ -365,7 +478,22 @@ picked up and processed by the report task runner.
#### Processes
-
+```mermaid
+flowchart LR
+ subgraph "**Scheduler**
[system]"
+ direction LR
+ subgraph ReportScheduler["**ReportScheduler**
[component]
"]
+ direction LR
+ ProcessRescheduling["Rescheduling"]
+ subgraph PriorityQueue["PriorityQueue"]
+ Task0
+ Task1[...]
+ TaskN
+ end
+ ProcessRescheduling-->PriorityQueue
+ end
+ end
+```
The `ReportScheduler` will create a `ReportTask` for the `Task` that is
associated with a `Schedule` object.
diff --git a/mula/docs/img/boefje_scheduler.svg b/mula/docs/img/boefje_scheduler.svg
deleted file mode 100644
index 9f854ad21bf..00000000000
--- a/mula/docs/img/boefje_scheduler.svg
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
-
-
diff --git a/mula/docs/img/normalizer_scheduler.svg b/mula/docs/img/normalizer_scheduler.svg
deleted file mode 100644
index 18b53d70fe8..00000000000
--- a/mula/docs/img/normalizer_scheduler.svg
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
-
-
diff --git a/mula/docs/img/queue.svg b/mula/docs/img/queue.svg
deleted file mode 100644
index 1f7fdbfcdee..00000000000
--- a/mula/docs/img/queue.svg
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
-
-
diff --git a/mula/docs/img/report_scheduler.svg b/mula/docs/img/report_scheduler.svg
deleted file mode 100644
index c6a78c79e97..00000000000
--- a/mula/docs/img/report_scheduler.svg
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
-
-
diff --git a/mula/docs/img/scheduler.svg b/mula/docs/img/scheduler.svg
deleted file mode 100644
index 87fc74ee30a..00000000000
--- a/mula/docs/img/scheduler.svg
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
-
-
diff --git a/mula/docs/img/scheduler_system.svg b/mula/docs/img/scheduler_system.svg
deleted file mode 100644
index ac511569ad2..00000000000
--- a/mula/docs/img/scheduler_system.svg
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
-
-
diff --git a/mula/docs/img/schedulers.svg b/mula/docs/img/schedulers.svg
deleted file mode 100644
index d804fc43df1..00000000000
--- a/mula/docs/img/schedulers.svg
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
-
-
diff --git a/mula/docs/img/schedules.svg b/mula/docs/img/schedules.svg
deleted file mode 100644
index 2d4cf387854..00000000000
--- a/mula/docs/img/schedules.svg
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
-
-
diff --git a/mula/docs/img/tasks.svg b/mula/docs/img/tasks.svg
deleted file mode 100644
index 0b686533366..00000000000
--- a/mula/docs/img/tasks.svg
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
-
-
diff --git a/mula/logging.json b/mula/logging.json
index 2d7a02642d9..4e147b7e17c 100644
--- a/mula/logging.json
+++ b/mula/logging.json
@@ -16,65 +16,47 @@
},
"root": {
"level": "INFO",
- "handlers": [
- "console"
- ]
+ "handlers": ["console"]
},
"loggers": {
"alembic.runtime.migration": {
"level": "CRITICAL",
- "handlers": [
- "console"
- ],
+ "handlers": ["console"],
"propagate": 0
},
"urllib3.connectionpool": {
"level": "CRITICAL",
- "handlers": [
- "console"
- ],
+ "handlers": ["console"],
"propagate": 0
},
"uvicorn.error": {
"level": "CRITICAL",
- "handlers": [
- "console"
- ],
+ "handlers": ["console"],
"propagate": 0
},
"uvicorn.access": {
"level": "CRITICAL",
- "handlers": [
- "console"
- ],
+ "handlers": ["console"],
"propagate": 0
},
"pika": {
"level": "CRITICAL",
- "handlers": [
- "console"
- ],
+ "handlers": ["console"],
"propagate": 0
},
"sqlalchemy.engine": {
"level": "CRITICAL",
- "handlers": [
- "console"
- ],
+ "handlers": ["console"],
"propagate": 0
},
"httpx": {
"level": "CRITICAL",
- "handlers": [
- "console"
- ],
+ "handlers": ["console"],
"propagate": 0
},
"httpcore": {
"level": "CRITICAL",
- "handlers": [
- "console"
- ],
+ "handlers": ["console"],
"propagate": 0
}
}
diff --git a/mula/scheduler/app.py b/mula/scheduler/app.py
index d8770730762..5c8ca33a860 100644
--- a/mula/scheduler/app.py
+++ b/mula/scheduler/app.py
@@ -4,7 +4,7 @@
import structlog
from opentelemetry import trace
-from scheduler import clients, context, schedulers, server
+from scheduler import context, schedulers, server
from scheduler.utils import thread
tracer = trace.get_tracer(__name__)
@@ -26,34 +26,21 @@ class App:
through a REST API.
* Metrics: The collection of application specific metrics.
-
- Attributes:
- logger:
- The logger for the class.
- ctx:
- Application context of shared data (e.g. configuration, external
- services connections).
- stop_event: A threading.Event object used for communicating a stop
- event across threads.
- schedulers:
- A dict of schedulers, keyed by scheduler id.
- server:
- The http rest api server instance.
"""
def __init__(self, ctx: context.AppContext) -> None:
"""Initialize the application.
Args:
- ctx:
- Application context of shared data (e.g. configuration,
- external services connections).
+ ctx (context.AppContext): Application context of shared data (e.g.
+ configuration, external services connections).
"""
self.logger: structlog.BoundLogger = structlog.getLogger(__name__)
self.ctx: context.AppContext = ctx
+ self.server: server.Server | None = None
- threading.excepthook = self.unhandled_exception
+ threading.excepthook = self._unhandled_exception
self.stop_event: threading.Event = threading.Event()
self.lock: threading.Lock = threading.Lock()
@@ -64,147 +51,6 @@ def __init__(self, ctx: context.AppContext) -> None:
| schedulers.NormalizerScheduler
| schedulers.ReportScheduler,
] = {}
- self.server: server.Server | None = None
-
- @tracer.start_as_current_span("monitor_organisations")
- def monitor_organisations(self) -> None:
- """Monitor the organisations from the Katalogus service, and add/remove
- organisations from the schedulers.
- """
- current_schedulers = self.schedulers.copy()
-
- # We make a difference between the organisation id's that are used
- # by the schedulers, and the organisation id's that are in the
- # Katalogus service. We will add/remove schedulers based on the
- # difference between these two sets.
- scheduler_orgs: set[str] = {
- s.organisation.id for s in current_schedulers.values() if hasattr(s, "organisation")
- }
- try:
- orgs = self.ctx.services.katalogus.get_organisations()
- except clients.errors.ExternalServiceError:
- self.logger.exception("Failed to get organisations from Katalogus")
- return
-
- katalogus_orgs = {org.id for org in orgs}
-
- additions = katalogus_orgs.difference(scheduler_orgs)
- self.logger.debug("Organisations to add: %s", len(additions), additions=sorted(additions))
-
- removals = scheduler_orgs.difference(katalogus_orgs)
- self.logger.debug("Organisations to remove: %s", len(removals), removals=sorted(removals))
-
- # We need to get scheduler ids of the schedulers that are associated
- # with the removed organisations
- removal_scheduler_ids: set[str] = {
- s.scheduler_id
- for s in current_schedulers.values()
- if hasattr(s, "organisation") and s.organisation.id in removals
- }
-
- # Remove schedulers for removed organisations
- for scheduler_id in removal_scheduler_ids:
- if scheduler_id not in self.schedulers:
- continue
-
- self.schedulers[scheduler_id].stop()
-
- if removals:
- self.logger.debug("Removed %s organisations from scheduler", len(removals), removals=sorted(removals))
-
- # Add schedulers for organisation
- for org_id in additions:
- try:
- org = self.ctx.services.katalogus.get_organisation(org_id)
- except clients.errors.ExternalServiceError as e:
- self.logger.error("Failed to get organisation from Katalogus", error=e, org_id=org_id)
- continue
-
- scheduler_boefje = schedulers.BoefjeScheduler(
- ctx=self.ctx, scheduler_id=f"boefje-{org.id}", organisation=org, callback=self.remove_scheduler
- )
-
- scheduler_normalizer = schedulers.NormalizerScheduler(
- ctx=self.ctx, scheduler_id=f"normalizer-{org.id}", organisation=org, callback=self.remove_scheduler
- )
-
- scheduler_report = schedulers.ReportScheduler(
- ctx=self.ctx, scheduler_id=f"report-{org.id}", organisation=org, callback=self.remove_scheduler
- )
-
- with self.lock:
- self.schedulers[scheduler_boefje.scheduler_id] = scheduler_boefje
- self.schedulers[scheduler_normalizer.scheduler_id] = scheduler_normalizer
- self.schedulers[scheduler_report.scheduler_id] = scheduler_report
-
- scheduler_normalizer.run()
- scheduler_boefje.run()
- scheduler_report.run()
-
- if additions:
- # Flush katalogus caches when new organisations are added
- self.ctx.services.katalogus.flush_caches()
-
- self.logger.debug("Added %s organisations to scheduler", len(additions), additions=sorted(additions))
-
- @tracer.start_as_current_span("collect_metrics")
- def collect_metrics(self) -> None:
- """Collect application metrics
-
- This method that allows to collect metrics throughout the application.
- """
- with self.lock:
- for s in self.schedulers.copy().values():
- self.ctx.metrics_qsize.labels(scheduler_id=s.scheduler_id).set(s.queue.qsize())
-
- status_counts = self.ctx.datastores.task_store.get_status_counts(s.scheduler_id)
- for status, count in status_counts.items():
- self.ctx.metrics_task_status_counts.labels(scheduler_id=s.scheduler_id, status=status).set(count)
-
- def start_schedulers(self) -> None:
- # Initialize the schedulers
- try:
- orgs = self.ctx.services.katalogus.get_organisations()
- except clients.errors.ExternalServiceError as e:
- self.logger.error("Failed to get organisations from Katalogus", error=e)
- return
-
- for org in orgs:
- boefje_scheduler = schedulers.BoefjeScheduler(
- ctx=self.ctx, scheduler_id=f"boefje-{org.id}", organisation=org, callback=self.remove_scheduler
- )
- self.schedulers[boefje_scheduler.scheduler_id] = boefje_scheduler
-
- normalizer_scheduler = schedulers.NormalizerScheduler(
- ctx=self.ctx, scheduler_id=f"normalizer-{org.id}", organisation=org, callback=self.remove_scheduler
- )
- self.schedulers[normalizer_scheduler.scheduler_id] = normalizer_scheduler
-
- report_scheduler = schedulers.ReportScheduler(
- ctx=self.ctx, scheduler_id=f"report-{org.id}", organisation=org, callback=self.remove_scheduler
- )
- self.schedulers[report_scheduler.scheduler_id] = report_scheduler
-
- # Start schedulers
- for scheduler in self.schedulers.values():
- scheduler.run()
-
- def start_monitors(self) -> None:
- thread.ThreadRunner(
- name="App-monitor_organisations",
- target=self.monitor_organisations,
- stop_event=self.stop_event,
- interval=self.ctx.config.monitor_organisations_interval,
- ).start()
-
- def start_collectors(self) -> None:
- thread.ThreadRunner(
- name="App-metrics_collector", target=self.collect_metrics, stop_event=self.stop_event, interval=10
- ).start()
-
- def start_server(self) -> None:
- self.server = server.Server(self.ctx, self.schedulers)
- thread.ThreadRunner(name="App-server", target=self.server.run, stop_event=self.stop_event, loop=False).start()
def run(self) -> None:
"""Start the main scheduler application, and run in threads the
@@ -215,19 +61,12 @@ def run(self) -> None:
* metrics collecting
* api server
"""
- # Start schedulers
self.start_schedulers()
+ self.start_server(self.schedulers)
- # Start monitors
- self.start_monitors()
-
- # Start metrics collecting
if self.ctx.config.collect_metrics:
self.start_collectors()
- # API Server
- self.start_server()
-
# Main thread
while not self.stop_event.is_set():
self.stop_event.wait()
@@ -241,24 +80,55 @@ def run(self) -> None:
# Source: https://stackoverflow.com/a/1489838/1346257
os._exit(1)
+ def start_schedulers(self) -> None:
+ boefje = schedulers.BoefjeScheduler(ctx=self.ctx)
+ self.schedulers[boefje.scheduler_id] = boefje
+
+ normalizer = schedulers.NormalizerScheduler(ctx=self.ctx)
+ self.schedulers[normalizer.scheduler_id] = normalizer
+
+ report = schedulers.ReportScheduler(ctx=self.ctx)
+ self.schedulers[report.scheduler_id] = report
+
+ for s in self.schedulers.values():
+ s.run()
+
+ def start_server(
+ self,
+ schedulers: dict[
+ str,
+ schedulers.Scheduler
+ | schedulers.BoefjeScheduler
+ | schedulers.NormalizerScheduler
+ | schedulers.ReportScheduler,
+ ],
+ ) -> None:
+ self.server = server.Server(self.ctx, schedulers)
+ thread.ThreadRunner(name="App-server", target=self.server.run, stop_event=self.stop_event, loop=False).start()
+
+ def start_collectors(self) -> None:
+ thread.ThreadRunner(
+ name="App-metrics_collector", target=self._collect_metrics, stop_event=self.stop_event, interval=10
+ ).start()
+
def shutdown(self) -> None:
"""Shutdown the scheduler application, and all threads."""
self.logger.info("Shutdown initiated")
self.stop_event.set()
- # First stop schedulers
- for s in self.schedulers.copy().values():
+ # Stop all schedulers
+ for s in self.schedulers.values():
s.stop()
# Stop all threads that are still running, except the main thread.
# These threads likely have a blocking call and as such are not able
# to leverage a stop event.
- self.stop_threads()
+ self._stop_threads()
self.logger.info("Shutdown complete")
- def stop_threads(self) -> None:
+ def _stop_threads(self) -> None:
"""Stop all threads, except the main thread."""
for t in threading.enumerate():
if t is threading.current_thread():
@@ -272,23 +142,19 @@ def stop_threads(self) -> None:
t.join(5)
- def unhandled_exception(self, args: threading.ExceptHookArgs) -> None:
+ def _unhandled_exception(self, args: threading.ExceptHookArgs) -> None:
"""Gracefully shutdown the scheduler application, and all threads
when a unhandled exception occurs.
"""
self.logger.error("Unhandled exception occurred: %s", args.exc_value)
self.stop_event.set()
- def remove_scheduler(self, scheduler_id: str) -> None:
- """Remove a scheduler from the application. This method is passed
- as a callback to the scheduler, so that the scheduler can remove
- itself from the application.
-
- Args:
- scheduler_id: The id of the scheduler to remove.
- """
- with self.lock:
- if scheduler_id not in self.schedulers:
- return
+ def _collect_metrics(self) -> None:
+ """Collect application metrics throughout the application."""
+ for s in self.schedulers.values():
+ qsize = self.ctx.datastores.pq_store.qsize(s.scheduler_id)
+ self.ctx.metrics_qsize.labels(scheduler_id=s.scheduler_id).set(qsize)
- self.schedulers.pop(scheduler_id)
+ status_counts = self.ctx.datastores.task_store.get_status_counts(s.scheduler_id)
+ for status, count in status_counts.items():
+ self.ctx.metrics_task_status_counts.labels(scheduler_id=s.scheduler_id, status=status).set(count)
diff --git a/mula/scheduler/clients/amqp/listeners.py b/mula/scheduler/clients/amqp/listeners.py
index 6a8955a9107..2f93c2f1e80 100644
--- a/mula/scheduler/clients/amqp/listeners.py
+++ b/mula/scheduler/clients/amqp/listeners.py
@@ -95,7 +95,7 @@ def __init__(self, dsn: str, queue: str, func: Callable, durable: bool = True, p
self.func: Callable = func
self.executor: futures.ThreadPoolExecutor = futures.ThreadPoolExecutor(
- max_workers=10, thread_name_prefix=f"Listener-TPE-{self.__class__.__name__}"
+ max_workers=10, thread_name_prefix=f"TPE-Listener-{self.__class__.__name__}"
)
self.connection: pika.BlockingConnection | None = None
diff --git a/mula/scheduler/clients/http/external/katalogus.py b/mula/scheduler/clients/http/external/katalogus.py
index ba174259de5..e63e9a68860 100644
--- a/mula/scheduler/clients/http/external/katalogus.py
+++ b/mula/scheduler/clients/http/external/katalogus.py
@@ -63,7 +63,7 @@ def flush_plugin_cache(self) -> None:
self.plugin_cache.expiration_enabled = True
- self.logger.debug("Flushed the katalogus plugin cache for organisations")
+ self.logger.debug("Flushed the katalogus plugin cache for organisations", plugin_cache=self.plugin_cache.cache)
def flush_boefje_cache(self) -> None:
"""boefje.consumes -> plugin type boefje"""
@@ -99,7 +99,9 @@ def flush_boefje_cache(self) -> None:
self.boefje_cache.expiration_enabled = True
- self.logger.debug("Flushed the katalogus boefje type cache for organisations")
+ self.logger.debug(
+ "Flushed the katalogus boefje type cache for organisations", boefje_cache=self.boefje_cache.cache
+ )
def flush_normalizer_cache(self) -> None:
"""normalizer.consumes -> plugin type normalizer"""
@@ -129,7 +131,10 @@ def flush_normalizer_cache(self) -> None:
self.normalizer_cache.expiration_enabled = True
- self.logger.debug("Flushed the katalogus normalizer type cache for organisations")
+ self.logger.debug(
+ "Flushed the katalogus normalizer type cache for organisations",
+ normalizer_cache=self.normalizer_cache.cache,
+ )
@exception_handler
def get_boefjes(self) -> list[Boefje]:
@@ -203,6 +208,8 @@ def _get_from_cache() -> Plugin:
return dict_utils.deep_get(self.plugin_cache, [organisation_id, plugin_id])
try:
+ if self.plugin_cache.is_empty():
+ self.flush_plugin_cache()
return _get_from_cache()
except dict_utils.ExpiredError:
self.flush_plugin_cache()
@@ -214,6 +221,8 @@ def _get_from_cache() -> list[Plugin]:
return dict_utils.deep_get(self.boefje_cache, [organisation_id, boefje_type])
try:
+ if self.boefje_cache.is_empty():
+ self.flush_boefje_cache()
return _get_from_cache()
except dict_utils.ExpiredError:
self.flush_boefje_cache()
@@ -225,6 +234,8 @@ def _get_from_cache() -> list[Plugin]:
return dict_utils.deep_get(self.normalizer_cache, [organisation_id, normalizer_type])
try:
+ if self.normalizer_cache.is_empty():
+ self.flush_normalizer_cache()
return _get_from_cache()
except dict_utils.ExpiredError:
self.flush_normalizer_cache()
diff --git a/mula/scheduler/config/settings.py b/mula/scheduler/config/settings.py
index f095350b8bb..9db1d567e1f 100644
--- a/mula/scheduler/config/settings.py
+++ b/mula/scheduler/config/settings.py
@@ -130,7 +130,7 @@ class Settings(BaseSettings):
)
# Queue settings
- pq_maxsize: int = Field(1000, description="How many items a priority queue can hold (0 is infinite)")
+ pq_maxsize: int = Field(0, description="How many items a priority queue can hold (0 is infinite)")
pq_interval: int = Field(
60, description="Interval in seconds of the execution of the `` method of the `scheduler.Scheduler` class"
diff --git a/mula/scheduler/models/__init__.py b/mula/scheduler/models/__init__.py
index a5390ad6ede..9dd3cf8ffcc 100644
--- a/mula/scheduler/models/__init__.py
+++ b/mula/scheduler/models/__init__.py
@@ -3,10 +3,10 @@
from .events import RawData, RawDataReceivedEvent
from .health import ServiceHealth
from .normalizer import Normalizer
-from .ooi import OOI, MutationOperationType, ScanProfile, ScanProfileMutation
+from .ooi import OOI, MutationOperationType, RunOn, ScanProfile, ScanProfileMutation
from .organisation import Organisation
from .plugin import Plugin
from .queue import Queue
from .schedule import Schedule, ScheduleDB
-from .scheduler import Scheduler
+from .scheduler import Scheduler, SchedulerType
from .task import BoefjeTask, NormalizerTask, ReportTask, Task, TaskDB, TaskStatus
diff --git a/mula/scheduler/models/ooi.py b/mula/scheduler/models/ooi.py
index 94edb1570ce..4f7feed63ce 100644
--- a/mula/scheduler/models/ooi.py
+++ b/mula/scheduler/models/ooi.py
@@ -32,3 +32,4 @@ class ScanProfileMutation(BaseModel):
operation: MutationOperationType
primary_key: str
value: OOI | None
+ client_id: str
diff --git a/mula/scheduler/models/organisation.py b/mula/scheduler/models/organisation.py
index 58032cb5f30..dc819d297f6 100644
--- a/mula/scheduler/models/organisation.py
+++ b/mula/scheduler/models/organisation.py
@@ -3,4 +3,4 @@
class Organisation(BaseModel):
id: str
- name: str
+ name: str | None = None
diff --git a/mula/scheduler/models/schedule.py b/mula/scheduler/models/schedule.py
index 4ba0c54bc71..941150da3ec 100644
--- a/mula/scheduler/models/schedule.py
+++ b/mula/scheduler/models/schedule.py
@@ -10,26 +10,19 @@
from scheduler.utils import GUID, cron
from .base import Base
-from .task import Task
class Schedule(BaseModel):
model_config = ConfigDict(from_attributes=True, validate_assignment=True)
id: uuid.UUID = Field(default_factory=uuid.uuid4)
-
scheduler_id: str
-
+ organisation: str
hash: str | None = Field(None, max_length=32)
-
data: dict | None = None
-
enabled: bool = True
-
schedule: str | None = None
- tasks: list[Task] = []
-
deadline_at: datetime | None = None
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
modified_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
@@ -57,21 +50,14 @@ class ScheduleDB(Base):
__tablename__ = "schedules"
id = Column(GUID, primary_key=True)
-
scheduler_id = Column(String, nullable=False)
-
+ organisation = Column(String, nullable=False)
hash = Column(String(32), nullable=True, unique=True)
-
data = Column(JSONB, nullable=False)
-
enabled = Column(Boolean, nullable=False, default=True)
-
schedule = Column(String, nullable=True)
-
tasks = relationship("TaskDB", back_populates="schedule")
deadline_at = Column(DateTime(timezone=True), nullable=True)
-
created_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now())
-
modified_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now(), onupdate=func.now())
diff --git a/mula/scheduler/models/scheduler.py b/mula/scheduler/models/scheduler.py
index 9c75c923743..e1d0f7c7b77 100644
--- a/mula/scheduler/models/scheduler.py
+++ b/mula/scheduler/models/scheduler.py
@@ -1,14 +1,24 @@
+import enum
from datetime import datetime
from typing import Any
-from pydantic import BaseModel
+from pydantic import BaseModel, ConfigDict
+
+
+class SchedulerType(str, enum.Enum):
+ """Enum for scheduler types."""
+
+ UNKNOWN = "unknown"
+ BOEFJE = "boefje"
+ NORMALIZER = "normalizer"
+ REPORT = "report"
class Scheduler(BaseModel):
- """Representation of a schedulers.Scheduler instance. Used for
- unmarshalling of schedulers to a JSON representation."""
+ model_config = ConfigDict(from_attributes=True, use_enum_values=True)
- id: str | None = None
- enabled: bool | None = None
- priority_queue: dict[str, Any] | None = None
+ id: str
+ type: SchedulerType
+ item_type: Any
+ qsize: int = 0
last_activity: datetime | None = None
diff --git a/mula/scheduler/models/task.py b/mula/scheduler/models/task.py
index dee0014e86c..c438dc87760 100644
--- a/mula/scheduler/models/task.py
+++ b/mula/scheduler/models/task.py
@@ -46,19 +46,13 @@ class Task(BaseModel):
model_config = ConfigDict(from_attributes=True, use_enum_values=True)
id: uuid.UUID = Field(default_factory=uuid.uuid4)
-
- scheduler_id: str | None = None
-
+ scheduler_id: str
schedule_id: uuid.UUID | None = None
-
+ organisation: str
priority: int | None = 0
-
status: TaskStatus = TaskStatus.PENDING
-
type: str | None = None
-
hash: str | None = Field(None, max_length=32)
-
data: dict = Field(default_factory=dict)
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
@@ -69,24 +63,18 @@ class TaskDB(Base):
__tablename__ = "tasks"
id = Column(GUID, primary_key=True)
-
scheduler_id = Column(String, nullable=False)
-
schedule_id = Column(GUID, ForeignKey("schedules.id", ondelete="SET NULL"), nullable=True)
- schedule = relationship("ScheduleDB", back_populates="tasks")
-
+ organisation = Column(String, nullable=False)
type = Column(String, nullable=False)
-
hash = Column(String(32), index=True)
-
priority = Column(Integer)
-
data = Column(JSONB, nullable=False)
-
status = Column(Enum(TaskStatus), nullable=False, default=TaskStatus.PENDING)
- created_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now())
+ schedule = relationship("ScheduleDB", back_populates="tasks")
+ created_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now())
modified_at = Column(DateTime(timezone=True), nullable=False, server_default=func.now(), onupdate=func.now())
diff --git a/mula/scheduler/schedulers/errors.py b/mula/scheduler/schedulers/errors.py
new file mode 100644
index 00000000000..d20f03018e0
--- /dev/null
+++ b/mula/scheduler/schedulers/errors.py
@@ -0,0 +1,21 @@
+import functools
+
+from scheduler.clients.errors import ExternalServiceError
+from scheduler.schedulers.queue.errors import QueueFullError
+
+
+def exception_handler(func):
+ @functools.wraps(func)
+ def inner_function(self, *args, **kwargs):
+ try:
+ return func(self, *args, **kwargs)
+ except ExternalServiceError as exc:
+ self.logger.exception("An exception occurred", exc=exc)
+ return None
+ except QueueFullError as exc:
+ self.logger.exception("Queue is full", exc=exc)
+ return None
+ except Exception as exc:
+ raise exc
+
+ return inner_function
diff --git a/mula/scheduler/schedulers/queue/pq.py b/mula/scheduler/schedulers/queue/pq.py
index 4a1914451a6..1e7c0b77d29 100644
--- a/mula/scheduler/schedulers/queue/pq.py
+++ b/mula/scheduler/schedulers/queue/pq.py
@@ -97,7 +97,7 @@ def __init__(
self.pq_store: storage.stores.PriorityQueueStore = pq_store
self.lock: threading.Lock = threading.Lock()
- def pop(self, filters: storage.filters.FilterRequest | None = None) -> models.Task | None:
+ def pop(self, filters: storage.filters.FilterRequest | None = None) -> tuple[list[models.Task], int]:
"""Remove and return the highest priority item from the queue.
Optionally apply filters to the queue.
@@ -113,14 +113,13 @@ def pop(self, filters: storage.filters.FilterRequest | None = None) -> models.Ta
if self.empty():
raise QueueEmptyError(f"Queue {self.pq_id} is empty.")
- item = self.pq_store.pop(self.pq_id, filters)
- if item is None:
- return None
+ items, count = self.pq_store.pop(self.pq_id, filters)
+ if items is None:
+ return ([], 0)
- item.status = models.TaskStatus.DISPATCHED
- self.pq_store.update(self.pq_id, item)
+ self.pq_store.bulk_update_status(self.pq_id, [item.id for item in items], models.TaskStatus.DISPATCHED)
- return item
+ return items, count
def push(self, task: models.Task) -> models.Task:
"""Push an item onto the queue.
@@ -202,7 +201,17 @@ def push(self, task: models.Task) -> models.Task:
task.status = models.TaskStatus.QUEUED
item_db = self.pq_store.push(task)
else:
- self.pq_store.update(self.pq_id, task)
+ # Get the item from the queue and update it
+ stored_item_data = self.get_item_by_identifier(task)
+ if stored_item_data is None:
+ raise ItemNotFoundError(f"Item {task} not found in datastore {self.pq_id}")
+
+ # Update the item with the new data
+ patch_data = task.dict(exclude_unset=True)
+ updated_task = stored_item_data.model_copy(update=patch_data)
+
+ # Update the item in the queue
+ self.pq_store.update(self.pq_id, updated_task)
item_db = self.get_item_by_identifier(task)
if not item_db:
diff --git a/mula/scheduler/schedulers/rankers/boefje.py b/mula/scheduler/schedulers/rankers/boefje.py
index f951aea4f84..06b65e98cc1 100644
--- a/mula/scheduler/schedulers/rankers/boefje.py
+++ b/mula/scheduler/schedulers/rankers/boefje.py
@@ -1,4 +1,3 @@
-import random
from datetime import datetime, timedelta, timezone
from typing import Any
@@ -53,6 +52,4 @@ class BoefjeRankerTimeBased(Ranker):
"""
def rank(self, obj: Any) -> int:
- minimum = datetime.today() + timedelta(days=1)
- maximum = minimum + timedelta(days=7)
- return random.randint(int(minimum.timestamp()), int(maximum.timestamp())) # noqa: S311
+ return int(obj.created_at.timestamp())
diff --git a/mula/scheduler/schedulers/scheduler.py b/mula/scheduler/schedulers/scheduler.py
index a9115abbc9d..251360c23e2 100644
--- a/mula/scheduler/schedulers/scheduler.py
+++ b/mula/scheduler/schedulers/scheduler.py
@@ -18,41 +18,38 @@
class Scheduler(abc.ABC):
- """The Scheduler class combines the priority queue.
- The scheduler is responsible for populating the queue, and ranking tasks.
+ """The scheduler base class that all schedulers should inherit from.
Attributes:
logger:
- The logger for the class
+ The logger instance.
ctx:
Application context of shared data (e.g. configuration, external
services connections).
- queue:
- A queue.PriorityQueue instance
- callback:
- A callback function to call when the scheduler is stopped.
scheduler_id:
- The id of the scheduler.
+ The id of the scheduler.
max_tries:
The maximum number of retries for an item to be pushed to
the queue.
- enabled:
- Whether the scheduler is enabled or not.
- _last_activity:
+ create_schedule:
+ Whether to create a schedule for a task.
+ last_activity:
The last activity of the scheduler.
+ queue:
+ A queues.PriorityQueue instance
listeners:
- A dict of connector.Listener instances, used for listening to
- external events.
+ A dictionary of listeners, typically AMQP listeners on which
+ event messages are received.
+ threads:
+ A list of threads that are running, typically long running
+ processes.
lock:
- A threading.Lock instance used for locking
+ A threading lock
stop_event_threads:
- A threading.Event object used for communicating a stop
- event across threads.
- threads:
- A dict of ThreadRunner instances, used for running processes
- concurrently.
+ A threading event to stop the running threads.
"""
+ TYPE: models.SchedulerType = models.SchedulerType.UNKNOWN
ITEM_TYPE: Any = None
def __init__(
@@ -60,36 +57,16 @@ def __init__(
ctx: context.AppContext,
scheduler_id: str,
queue: PriorityQueue | None = None,
- callback: Callable[..., None] | None = None,
max_tries: int = -1,
create_schedule: bool = False,
auto_calculate_deadline: bool = True,
):
- """Initialize the Scheduler.
-
- Args:
- ctx:
- Application context of shared data (e.g. configuration, external
- services connections).
- scheduler_id:
- The id of the scheduler.
- queue:
- A queue.PriorityQueue instance
- callback:
- A callback function to call when the scheduler is stopped.
- max_tries:
- The maximum number of retries for an item to be pushed to
- the queue.
- """
-
self.logger: structlog.BoundLogger = structlog.getLogger(__name__)
self.ctx: context.AppContext = ctx
- self.callback: Callable[[], Any] | None = callback
# Properties
self.scheduler_id: str = scheduler_id
self.max_tries: int = max_tries
- self.enabled: bool = True
self.create_schedule: bool = create_schedule
self.auto_calculate_deadline: bool = auto_calculate_deadline
self._last_activity: datetime | None = None
@@ -106,9 +83,9 @@ def __init__(
self.listeners: dict[str, clients.amqp.Listener] = {}
# Threads
+ self.threads: list[thread.ThreadRunner] = []
self.lock: threading.Lock = threading.Lock()
self.stop_event_threads: threading.Event = threading.Event()
- self.threads: list[thread.ThreadRunner] = []
@abc.abstractmethod
def run(self) -> None:
@@ -184,6 +161,7 @@ def push_item_to_queue_with_timeout(
while not self.is_space_on_queue() and (tries < max_tries or max_tries == -1):
self.logger.debug(
"Queue %s is full, waiting for space",
+ self.queue.pq_id,
queue_id=self.queue.pq_id,
queue_qsize=self.queue.qsize(),
scheduler_id=self.scheduler_id,
@@ -207,16 +185,6 @@ def push_item_to_queue(self, item: models.Task, create_schedule: bool = True) ->
QueueFullError: When the queue is full.
InvalidItemError: When the item is invalid.
"""
- if not self.is_enabled():
- self.logger.warning(
- "Scheduler is disabled, not pushing item to queue %s",
- self.queue.pq_id,
- item_id=item.id,
- queue_id=self.queue.pq_id,
- scheduler_id=self.scheduler_id,
- )
- raise NotAllowedError("Scheduler is disabled")
-
try:
if item.type is None:
item.type = self.ITEM_TYPE.type
@@ -316,7 +284,9 @@ def post_push(self, item: models.Task, create_schedule: bool = True) -> models.T
schedule_db = self.ctx.datastores.schedule_store.get_schedule_by_hash(item.hash)
if schedule_db is None:
- schedule = models.Schedule(scheduler_id=self.scheduler_id, hash=item.hash, data=item.data)
+ schedule = models.Schedule(
+ scheduler_id=self.scheduler_id, hash=item.hash, data=item.data, organisation=item.organisation
+ )
schedule_db = self.ctx.datastores.schedule_store.create_schedule(schedule)
if schedule_db is None:
@@ -364,9 +334,10 @@ def post_push(self, item: models.Task, create_schedule: bool = True) -> models.T
return item
- def pop_item_from_queue(self, filters: storage.filters.FilterRequest | None = None) -> models.Task | None:
+ def pop_item_from_queue(
+ self, filters: storage.filters.FilterRequest | None = None
+ ) -> tuple[list[models.Task], int]:
"""Pop an item from the queue.
-
Args:
filters: Optional filters to apply when popping an item.
@@ -377,38 +348,26 @@ def pop_item_from_queue(self, filters: storage.filters.FilterRequest | None = No
NotAllowedError: When the scheduler is disabled.
QueueEmptyError: When the queue is empty.
"""
- if not self.is_enabled():
- self.logger.warning(
- "Scheduler is disabled, not popping item from queue",
- queue_id=self.queue.pq_id,
- queue_qsize=self.queue.qsize(),
- scheduler_id=self.scheduler_id,
- )
- raise NotAllowedError("Scheduler is disabled")
-
try:
- item = self.queue.pop(filters)
+ items, count = self.queue.pop(filters)
except QueueEmptyError as exc:
raise exc
- if item is not None:
+ if items is not None:
self.logger.debug(
- "Popped item %s from queue %s with priority %s",
- item.id,
+ "Popped %s item(s) from queue %s",
+ count,
self.queue.pq_id,
- item.priority,
- item_id=item.id,
queue_id=self.queue.pq_id,
scheduler_id=self.scheduler_id,
)
- self.post_pop(item)
+ self.post_pop(items)
- return item
+ return items, count
- def post_pop(self, item: models.Task) -> None:
+ def post_pop(self, items: list[models.Task]) -> None:
"""After an item is popped from the queue, we execute this function
-
Args:
item: An item from the queue
"""
@@ -435,54 +394,7 @@ def calculate_deadline(self, task: models.Task) -> datetime:
return adjusted_time
- def enable(self) -> None:
- """Enable the scheduler.
-
- This will start the scheduler, and start all listeners and threads.
- """
- if self.is_enabled():
- self.logger.debug("Scheduler is already enabled")
- return
-
- self.logger.info("Enabling scheduler: %s", self.scheduler_id, scheduler_id=self.scheduler_id)
- self.enabled = True
- self.stop_event_threads.clear()
- self.run()
-
- self.logger.info("Enabled scheduler: %s", self.scheduler_id, scheduler_id=self.scheduler_id)
-
- def disable(self) -> None:
- """Disable the scheduler.
-
- This will stop all listeners and threads, and clear the queue, and any
- tasks that were on the queue will be set to CANCELLED.
- """
- if not self.is_enabled():
- self.logger.warning("Scheduler already disabled: %s", self.scheduler_id, scheduler_id=self.scheduler_id)
- return
-
- self.logger.info("Disabling scheduler: %s", self.scheduler_id)
- self.enabled = False
-
- self.stop_listeners()
- self.stop_threads()
- self.queue.clear()
-
- # Get all tasks that were on the queue and set them to CANCELLED
- tasks, _ = self.ctx.datastores.task_store.get_tasks(
- scheduler_id=self.scheduler_id, status=models.TaskStatus.QUEUED
- )
- task_ids = [task.id for task in tasks]
- self.ctx.datastores.task_store.cancel_tasks(scheduler_id=self.scheduler_id, task_ids=task_ids)
-
- self.logger.info("Disabled scheduler: %s", self.scheduler_id, scheduler_id=self.scheduler_id)
-
- def stop(self, callback: bool = True) -> None:
- """Stop the scheduler.
-
- Args:
- callback: Whether to call the callback function.
- """
+ def stop(self) -> None:
self.logger.info("Stopping scheduler: %s", self.scheduler_id, scheduler_id=self.scheduler_id)
# First, stop the listeners, when those are running in a thread and
@@ -491,9 +403,6 @@ def stop(self, callback: bool = True) -> None:
self.stop_listeners()
self.stop_threads()
- if self.callback and callback:
- self.callback(self.scheduler_id) # type: ignore [call-arg]
-
self.logger.info("Stopped scheduler: %s", self.scheduler_id, scheduler_id=self.scheduler_id)
def stop_listeners(self) -> None:
@@ -510,14 +419,6 @@ def stop_threads(self) -> None:
self.threads = []
- def is_enabled(self) -> bool:
- """Check if the scheduler is enabled.
-
- Returns:
- True if the scheduler is enabled, False otherwise.
- """
- return self.enabled
-
def is_space_on_queue(self) -> bool:
"""Check if there is space on the queue.
@@ -550,15 +451,8 @@ def dict(self) -> dict[str, Any]:
"""Get a dict representation of the scheduler."""
return {
"id": self.scheduler_id,
- "enabled": self.enabled,
- "priority_queue": {
- "id": self.queue.pq_id,
- "item_type": self.queue.item_type.type,
- "maxsize": self.queue.maxsize,
- "qsize": self.queue.qsize(),
- "allow_replace": self.queue.allow_replace,
- "allow_updates": self.queue.allow_updates,
- "allow_priority_updates": self.queue.allow_priority_updates,
- },
+ "type": self.TYPE.value,
+ "item_type": self.ITEM_TYPE.__name__,
+ "qsize": self.queue.qsize(),
"last_activity": self.last_activity,
}
diff --git a/mula/scheduler/schedulers/schedulers/boefje.py b/mula/scheduler/schedulers/schedulers/boefje.py
index 7310853f26a..3b344f93711 100644
--- a/mula/scheduler/schedulers/schedulers/boefje.py
+++ b/mula/scheduler/schedulers/schedulers/boefje.py
@@ -1,85 +1,43 @@
import uuid
-from collections.abc import Callable
from concurrent import futures
from datetime import datetime, timedelta, timezone
-from types import SimpleNamespace
-from typing import Any
+from typing import Any, Literal
-import structlog
from opentelemetry import trace
+from pydantic import ValidationError
-from scheduler import clients, context, storage, utils
+from scheduler import clients, context, models, utils
from scheduler.clients.errors import ExternalServiceError
-from scheduler.models import (
- OOI,
- Boefje,
- BoefjeTask,
- MutationOperationType,
- Organisation,
- Plugin,
- ScanProfileMutation,
- Task,
- TaskStatus,
-)
+from scheduler.models import MutationOperationType
from scheduler.models.ooi import RunOn
-from scheduler.schedulers import Scheduler
-from scheduler.schedulers.queue import PriorityQueue, QueueFullError
-from scheduler.schedulers.rankers import BoefjeRanker
+from scheduler.schedulers import Scheduler, rankers
+from scheduler.schedulers.errors import exception_handler
from scheduler.storage import filters
+from scheduler.storage.errors import StorageError
tracer = trace.get_tracer(__name__)
class BoefjeScheduler(Scheduler):
- """A KAT specific implementation of a Boefje scheduler. It extends
- the `Scheduler` class by adding an `organisation` attribute.
+ """Scheduler implementation for the creation of BoefjeTask models.
Attributes:
- logger: A logger instance.
- organisation: The organisation that this scheduler is for.
+ ranker: The ranker to calculate the priority of a task.
"""
- ITEM_TYPE: Any = BoefjeTask
+ ID: Literal["boefje"] = "boefje"
+ TYPE: models.SchedulerType = models.SchedulerType.BOEFJE
+ ITEM_TYPE: Any = models.BoefjeTask
- def __init__(
- self,
- ctx: context.AppContext,
- scheduler_id: str,
- organisation: Organisation,
- queue: PriorityQueue | None = None,
- callback: Callable[..., None] | None = None,
- ):
+ def __init__(self, ctx: context.AppContext):
"""Initializes the BoefjeScheduler.
Args:
- ctx: The application context.
- scheduler_id: The id of the scheduler.
- organisation: The organisation that this scheduler is for.
- queue: The queue to use for this scheduler.
- callback: The callback function to call when a task is completed.
+ ctx (context.AppContext): Application context of shared data (e.g.
+ configuration, external services connections).
"""
- self.logger: structlog.BoundLogger = structlog.getLogger(__name__)
- self.organisation: Organisation = organisation
-
- self.queue = queue or PriorityQueue(
- pq_id=scheduler_id,
- maxsize=ctx.config.pq_maxsize,
- item_type=self.ITEM_TYPE,
- allow_priority_updates=True,
- pq_store=ctx.datastores.pq_store,
- )
-
- super().__init__(
- ctx=ctx,
- queue=self.queue,
- scheduler_id=scheduler_id,
- callback=callback,
- create_schedule=True,
- auto_calculate_deadline=True,
- )
-
- # Priority ranker
- self.priority_ranker = BoefjeRanker(self.ctx)
+ super().__init__(ctx=ctx, scheduler_id=self.ID, create_schedule=True, auto_calculate_deadline=True)
+ self.ranker = rankers.BoefjeRankerTimeBased(self.ctx)
def run(self) -> None:
"""The run method is called when the scheduler is started. It will
@@ -97,242 +55,174 @@ def run(self) -> None:
- Rescheduling; when a task has passed its deadline, we need to
reschedule it.
"""
- # Scan profile mutations
- self.listeners["scan_profile_mutations"] = clients.ScanProfileMutation(
+ self.listeners["mutations"] = clients.ScanProfileMutation(
dsn=str(self.ctx.config.host_raw_data),
- queue=f"{self.organisation.id}__scan_profile_mutations",
- func=self.push_tasks_for_scan_profile_mutations,
+ queue="scan_profile_mutations",
+ func=self.process_mutations,
prefetch_count=self.ctx.config.rabbitmq_prefetch_count,
)
- self.run_in_thread(
- name=f"BoefjeScheduler-{self.scheduler_id}-mutations",
- target=self.listeners["scan_profile_mutations"].listen,
- loop=False,
- )
-
- # New Boefjes
- self.run_in_thread(
- name=f"BoefjeScheduler-{self.scheduler_id}-new_boefjes",
- target=self.push_tasks_for_new_boefjes,
- interval=60.0,
- )
-
- # Rescheduling
- self.run_in_thread(
- name=f"scheduler-{self.scheduler_id}-reschedule", target=self.push_tasks_for_rescheduling, interval=60.0
- )
+ self.run_in_thread(name="BoefjeScheduler-mutations", target=self.listeners["mutations"].listen, loop=False)
+ self.run_in_thread(name="BoefjeScheduler-new_boefjes", target=self.process_new_boefjes, interval=60.0)
+ self.run_in_thread(name="BoefjeScheduler-rescheduling", target=self.process_rescheduling, interval=60.0)
self.logger.info(
- "Boefje scheduler started for %s",
- self.organisation.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- item_type=self.queue.item_type.__name__,
+ "Boefje scheduler started", scheduler_id=self.scheduler_id, item_type=self.queue.item_type.__name__
)
- @tracer.start_as_current_span("boefje_push_tasks_for_scan_profile_mutations")
- def push_tasks_for_scan_profile_mutations(self, body: bytes) -> None:
+ @tracer.start_as_current_span("BoefjeScheduler.process_mutations")
+ def process_mutations(self, body: bytes) -> None:
"""Create tasks for oois that have a scan level change.
Args:
mutation: The mutation that was received.
"""
- # Convert body into a ScanProfileMutation
- mutation = ScanProfileMutation.model_validate_json(body)
-
- self.logger.debug(
- "Received scan level mutation %s for: %s",
- mutation.operation,
- mutation.primary_key,
- ooi_primary_key=mutation.primary_key,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- )
-
- # There should be an OOI in value
- ooi = mutation.value
- if ooi is None:
+ try:
+ # Convert body into a ScanProfileMutation
+ mutation = models.ScanProfileMutation.model_validate_json(body)
self.logger.debug(
- "Mutation value is None, skipping", organisation_id=self.organisation.id, scheduler_id=self.scheduler_id
- )
- return
-
- if mutation.operation == MutationOperationType.DELETE:
- # When there are tasks of the ooi are on the queue, we need to
- # remove them from the queue.
- items, _ = self.ctx.datastores.pq_store.get_items(
+ "Received scan level mutation %s for: %s",
+ mutation.operation,
+ mutation.primary_key,
+ ooi_primary_key=mutation.primary_key,
scheduler_id=self.scheduler_id,
- filters=filters.FilterRequest(
- filters=[filters.Filter(column="data", field="input_ooi", operator="eq", value=ooi.primary_key)]
- ),
)
- # Delete all items for this ooi, update all tasks for this ooi
- # to cancelled.
- for item in items:
- task = self.ctx.datastores.task_store.get_task(item.id)
- if task is None:
- continue
+ # There should be an OOI in value
+ ooi = mutation.value
+ if ooi is None:
+ self.logger.debug("Mutation value is None, skipping", scheduler_id=self.scheduler_id)
+ return
+
+ # When the mutation is a delete operation, we need to remove all tasks
+ if mutation.operation == models.MutationOperationType.DELETE:
+ items, _ = self.ctx.datastores.pq_store.get_items(
+ scheduler_id=self.scheduler_id,
+ filters=filters.FilterRequest(
+ filters=[filters.Filter(column="data", field="input_ooi", operator="eq", value=ooi.primary_key)]
+ ),
+ )
- task.status = TaskStatus.CANCELLED
- self.ctx.datastores.task_store.update_task(task)
+ # Delete all items for this ooi, update all tasks for this ooi
+ # to cancelled.
+ for item in items:
+ task = self.ctx.datastores.task_store.get_task(item.id)
+ if task is None:
+ continue
- return
+ task.status = models.TaskStatus.CANCELLED
+ self.ctx.datastores.task_store.update_task(task)
- # What available boefjes do we have for this ooi?
- boefjes = self.get_boefjes_for_ooi(ooi)
- if not boefjes:
- self.logger.debug(
- "No boefjes available for %s",
- ooi.primary_key,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- )
+ return
+
+ # What available boefjes do we have for this ooi?
+ boefjes = self.get_boefjes_for_ooi(ooi, mutation.client_id)
+ if not boefjes:
+ self.logger.debug("No boefjes available for %s", ooi.primary_key, scheduler_id=self.scheduler_id)
+ return
+ except (StorageError, ValidationError):
+ self.logger.exception("Error occurred while processing mutation", scheduler_id=self.scheduler_id)
return
- with futures.ThreadPoolExecutor(
- thread_name_prefix=f"BoefjeScheduler-TPE-{self.scheduler_id}-mutations"
- ) as executor:
- for boefje in boefjes:
- # Is the boefje allowed to run on the ooi?
- if not self.has_boefje_permission_to_run(boefje, ooi):
- self.logger.debug(
- "Boefje not allowed to run on ooi",
- boefje_id=boefje.id,
- boefje_name=boefje.name,
- ooi_primary_key=ooi.primary_key,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- )
- continue
+ # Create tasks for the boefjes
+ boefje_tasks = []
+ for boefje in boefjes:
+ if not self.has_boefje_permission_to_run(boefje, ooi):
+ self.logger.debug(
+ "Boefje not allowed to run on ooi",
+ boefje_id=boefje.id,
+ ooi_primary_key=ooi.primary_key,
+ scheduler_id=self.scheduler_id,
+ )
+ continue
- create_schedule = True
- run_task = True
-
- # What type of run boefje is it?
- if boefje.run_on:
- create_schedule = False
- run_task = False
- if mutation.operation == MutationOperationType.CREATE:
- run_task = RunOn.CREATE in boefje.run_on
- elif mutation.operation == MutationOperationType.UPDATE:
- run_task = RunOn.UPDATE in boefje.run_on
-
- if not run_task:
- self.logger.debug(
- "Based on boefje run on type, skipping",
- boefje_id=boefje.id,
- ooi_primary_key=ooi.primary_key,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- )
- continue
+ create_schedule, run_task = True, True
- boefje_task = BoefjeTask(
- boefje=Boefje.model_validate(boefje.model_dump()),
+ # What type of run boefje is it?
+ if boefje.run_on:
+ create_schedule = False
+ run_task = False
+ if mutation.operation == MutationOperationType.CREATE:
+ run_task = RunOn.CREATE in boefje.run_on
+ elif mutation.operation == MutationOperationType.UPDATE:
+ run_task = RunOn.UPDATE in boefje.run_on
+
+ if not run_task:
+ self.logger.debug(
+ "Based on boefje run on type, skipping",
+ boefje_id=boefje.id,
+ ooi_primary_key=ooi.primary_key,
+ organisation_id=mutation.client_id,
+ scheduler_id=self.scheduler_id,
+ )
+ continue
+
+ boefje_tasks.append(
+ models.BoefjeTask(
+ boefje=models.Boefje.model_validate(boefje.model_dump()),
input_ooi=ooi.primary_key if ooi else None,
- organization=self.organisation.id,
+ organization=mutation.client_id,
)
+ )
+ with futures.ThreadPoolExecutor(thread_name_prefix=f"TPE-{self.scheduler_id}-mutations") as executor:
+ for boefje_task in boefje_tasks:
executor.submit(
self.push_boefje_task,
boefje_task,
+ mutation.client_id,
create_schedule,
- self.push_tasks_for_scan_profile_mutations.__name__,
+ self.process_mutations.__name__,
)
- @tracer.start_as_current_span("boefje_push_tasks_for_new_boefjes")
- def push_tasks_for_new_boefjes(self) -> None:
+ @tracer.start_as_current_span("BoefjeScheduler.process_new_boefjes")
+ def process_new_boefjes(self) -> None:
"""When new boefjes are added or enabled we find the ooi's that
boefjes can run on, and create tasks for it."""
- new_boefjes = None
+ boefje_tasks = []
+
+ # TODO: this should be optimized see #3357
try:
- new_boefjes = self.ctx.services.katalogus.get_new_boefjes_by_org_id(self.organisation.id)
+ orgs = self.ctx.services.katalogus.get_organisations()
except ExternalServiceError:
- self.logger.error(
- "Failed to get new boefjes for organisation: %s from katalogus",
- self.organisation.name,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- )
+ self.logger.exception("Error occurred while processing new boefjes", scheduler_id=self.scheduler_id)
return
- if new_boefjes is None or not new_boefjes:
- self.logger.debug(
- "No new boefjes for organisation: %s",
- self.organisation.name,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- )
- return
+ for org in orgs:
+ try:
+ # Get new boefjes for organisation
+ new_boefjes = self.ctx.services.katalogus.get_new_boefjes_by_org_id(org.id)
+ if not new_boefjes:
+ self.logger.debug("No new boefjes found for organisation", organisation_id=org.id)
+ continue
- self.logger.debug(
- "Received new boefjes",
- boefjes=[boefje.name for boefje in new_boefjes],
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- )
+ # Get all oois for the new boefjes
+ for boefje in new_boefjes:
+ oois = self.get_oois_for_boefje(boefje, org.id)
+ for ooi in oois:
+ boefje_task = models.BoefjeTask(
+ boefje=models.Boefje.model_validate(boefje.dict()),
+ input_ooi=ooi.primary_key,
+ organization=org.id,
+ )
- for boefje in new_boefjes:
- if not boefje.consumes:
- self.logger.debug(
- "No consumes found for boefje: %s",
- boefje.name,
- boefje_id=boefje.id,
- organisation_id=self.organisation.id,
+ boefje_tasks.append((boefje_task, org.id))
+ except ExternalServiceError:
+ self.logger.warning(
+ "Error occurred while processing new boefjes",
+ organisation_id=org.id,
scheduler_id=self.scheduler_id,
)
continue
- oois_by_object_type: list[OOI] = []
- try:
- oois_by_object_type = self.ctx.services.octopoes.get_objects_by_object_types(
- self.organisation.id, boefje.consumes, list(range(boefje.scan_level, 5))
- )
- except ExternalServiceError as exc:
- self.logger.error(
- "Could not get oois for organisation: %s from octopoes",
- self.organisation.name,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- exc_info=exc,
+ with futures.ThreadPoolExecutor(thread_name_prefix=f"TPE-{self.scheduler_id}-new_boefjes") as executor:
+ for boefje_task, org_id in boefje_tasks:
+ executor.submit(
+ self.push_boefje_task, boefje_task, org_id, self.create_schedule, self.process_new_boefjes.__name__
)
- continue
-
- with futures.ThreadPoolExecutor(
- thread_name_prefix=f"BoefjeScheduler-TPE-{self.scheduler_id}-new_boefjes"
- ) as executor:
- for ooi in oois_by_object_type:
- if not self.has_boefje_permission_to_run(boefje, ooi):
- self.logger.debug(
- "Boefje not allowed to run on ooi",
- boefje_id=boefje.id,
- ooi_primary_key=ooi.primary_key,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- )
- continue
-
- boefje_task = BoefjeTask(
- boefje=Boefje.model_validate(boefje.dict()),
- input_ooi=ooi.primary_key,
- organization=self.organisation.id,
- )
-
- executor.submit(self.push_boefje_task, boefje_task, self.push_tasks_for_new_boefjes.__name__)
-
- @tracer.start_as_current_span("boefje_push_tasks_for_rescheduling")
- def push_tasks_for_rescheduling(self):
- if self.queue.full():
- self.logger.warning(
- "Boefjes queue is full, not populating with new tasks",
- queue_qsize=self.queue.qsize(),
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- )
- return
+ @tracer.start_as_current_span("BoefjeScheduler.process_rescheduling")
+ def process_rescheduling(self):
try:
schedules, _ = self.ctx.datastores.schedule_store.get_schedules(
filters=filters.FilterRequest(
@@ -343,247 +233,168 @@ def push_tasks_for_rescheduling(self):
]
)
)
- except storage.errors.StorageError as exc_db:
- self.logger.error(
- "Could not get schedules for rescheduling %s",
- self.scheduler_id,
- scheduler_id=self.scheduler_id,
- organisation_id=self.organisation.id,
- exc_info=exc_db,
- )
- raise exc_db
-
- if not schedules:
- self.logger.debug(
- "No schedules tasks found for scheduler: %s",
- self.scheduler_id,
- scheduler_id=self.scheduler_id,
- organisation_id=self.organisation.id,
- )
+ if not schedules:
+ self.logger.debug(
+ "No schedules tasks found for scheduler: %s", self.scheduler_id, scheduler_id=self.scheduler_id
+ )
+ return
+ except StorageError:
+ self.logger.exception("Error occurred while processing rescheduling", scheduler_id=self.scheduler_id)
return
- with futures.ThreadPoolExecutor(
- thread_name_prefix=f"BoefjeScheduler-TPE-{self.scheduler_id}-rescheduling"
- ) as executor:
+ with futures.ThreadPoolExecutor(thread_name_prefix=f"TPE-{self.scheduler_id}-rescheduling") as executor:
for schedule in schedules:
- boefje_task = BoefjeTask.model_validate(schedule.data)
-
- # Plugin still exists?
try:
+ boefje_task = models.BoefjeTask.model_validate(schedule.data)
+
+ # Plugin still exists?
plugin = self.ctx.services.katalogus.get_plugin_by_id_and_org_id(
- boefje_task.boefje.id, self.organisation.id
+ boefje_task.boefje.id, schedule.organisation
)
if not plugin:
self.logger.info(
"Boefje does not exist anymore, skipping and disabling schedule",
boefje_id=boefje_task.boefje.id,
schedule_id=schedule.id,
- organisation_id=self.organisation.id,
scheduler_id=self.scheduler_id,
)
schedule.enabled = False
self.ctx.datastores.schedule_store.update_schedule(schedule)
continue
- except ExternalServiceError as exc_plugin:
- self.logger.error(
- "Could not get plugin %s from katalogus",
- boefje_task.boefje.id,
- boefje_id=boefje_task.boefje.id,
- schedule_id=schedule.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- exc_info=exc_plugin,
- )
- continue
- # Plugin still enabled?
- if not plugin.enabled:
- self.logger.debug(
- "Boefje is disabled, skipping",
- boefje_id=boefje_task.boefje.id,
- schedule_id=schedule.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- )
- schedule.enabled = False
- self.ctx.datastores.schedule_store.update_schedule(schedule)
- continue
-
- # Plugin a boefje?
- if plugin.type != "boefje":
- # We don't disable the schedule, since we should've gotten
- # schedules for boefjes only.
- self.logger.warning(
- "Plugin is not a boefje, skipping",
- plugin_id=plugin.id,
- schedule_id=schedule.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- )
- continue
-
- # When the boefje task has an ooi, we need to do some additional
- # checks.
- ooi = None
- if boefje_task.input_ooi:
- # OOI still exists?
- try:
- ooi = self.ctx.services.octopoes.get_object(boefje_task.organization, boefje_task.input_ooi)
- if not ooi:
- self.logger.info(
- "OOI does not exist anymore, skipping and disabling schedule",
- ooi_primary_key=boefje_task.input_ooi,
- schedule_id=schedule.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- )
- schedule.enabled = False
- self.ctx.datastores.schedule_store.update_schedule(schedule)
- continue
- except ExternalServiceError as exc_ooi:
- self.logger.error(
- "Could not get ooi %s from octopoes",
- boefje_task.input_ooi,
- ooi_primary_key=boefje_task.input_ooi,
- schedule_id=schedule.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- exc_info=exc_ooi,
- )
- continue
-
- # Boefje still consuming ooi type?
- if ooi.object_type not in plugin.consumes:
+ # Plugin still enabled?
+ if not plugin.enabled:
self.logger.debug(
- "Boefje does not consume ooi anymore, skipping",
+ "Boefje is disabled, skipping",
boefje_id=boefje_task.boefje.id,
- ooi_primary_key=ooi.primary_key,
- organisation_id=self.organisation.id,
+ schedule_id=schedule.id,
scheduler_id=self.scheduler_id,
)
schedule.enabled = False
self.ctx.datastores.schedule_store.update_schedule(schedule)
continue
- # TODO: do we want to disable the schedule when a
- # boefje is not allowed to scan an ooi?
-
- # Boefje allowed to scan ooi?
- if not self.has_boefje_permission_to_run(plugin, ooi):
- self.logger.info(
- "Boefje not allowed to scan ooi, skipping and disabling schedule",
- boefje_id=boefje_task.boefje.id,
- ooi_primary_key=ooi.primary_key,
+ # Plugin a boefje?
+ if plugin.type != "boefje":
+ # We don't disable the schedule, since we should've gotten
+ # schedules for boefjes only.
+ self.logger.warning(
+ "Plugin is not a boefje, skipping",
+ plugin_id=plugin.id,
schedule_id=schedule.id,
- organisation_id=self.organisation.id,
+ organisation_id=schedule.organisation,
scheduler_id=self.scheduler_id,
)
- schedule.enabled = False
- self.ctx.datastores.schedule_store.update_schedule(schedule)
continue
- new_boefje_task = BoefjeTask(
- boefje=Boefje.model_validate(plugin.dict()),
- input_ooi=ooi.primary_key if ooi else None,
- organization=self.organisation.id,
- )
+ # When the boefje task has an ooi, we need to do some additional
+ # checks.
+ ooi = None
+ if boefje_task.input_ooi:
+ # OOI still exists?
+ ooi = self.ctx.services.octopoes.get_object(boefje_task.organization, boefje_task.input_ooi)
+ if not ooi:
+ self.logger.info(
+ "OOI does not exist anymore, skipping and disabling schedule",
+ ooi_primary_key=boefje_task.input_ooi,
+ schedule_id=schedule.id,
+ organisation_id=schedule.organisation,
+ scheduler_id=self.scheduler_id,
+ )
+ schedule.enabled = False
+ self.ctx.datastores.schedule_store.update_schedule(schedule)
+ continue
- executor.submit(self.push_boefje_task, new_boefje_task, self.push_tasks_for_rescheduling.__name__)
+ # Boefje still consuming ooi type?
+ if ooi.object_type not in plugin.consumes:
+ self.logger.debug(
+ "Boefje does not consume ooi anymore, skipping",
+ boefje_id=boefje_task.boefje.id,
+ ooi_primary_key=ooi.primary_key,
+ organisation_id=schedule.organisation,
+ scheduler_id=self.scheduler_id,
+ )
+ schedule.enabled = False
+ self.ctx.datastores.schedule_store.update_schedule(schedule)
+ continue
- @tracer.start_as_current_span("boefje_push_task")
- def push_boefje_task(self, boefje_task: BoefjeTask, create_schedule: bool = True, caller: str = "") -> None:
- """Given a Boefje and OOI create a BoefjeTask and push it onto
- the queue.
+ # TODO: do we want to disable the schedule when a
+ # boefje is not allowed to scan an ooi?
- Args:
- boefje: Boefje to run.
- ooi: OOI to run Boefje on.
- caller: The name of the function that called this function, used for logging.
+ # Boefje allowed to scan ooi?
+ if not self.has_boefje_permission_to_run(plugin, ooi):
+ self.logger.info(
+ "Boefje not allowed to scan ooi, skipping and disabling schedule",
+ boefje_id=boefje_task.boefje.id,
+ ooi_primary_key=ooi.primary_key,
+ schedule_id=schedule.id,
+ organisation_id=schedule.organisation,
+ scheduler_id=self.scheduler_id,
+ )
+ schedule.enabled = False
+ self.ctx.datastores.schedule_store.update_schedule(schedule)
+ continue
- """
- self.logger.debug(
- "Pushing boefje task",
- task_hash=boefje_task.hash,
- boefje_id=boefje_task.boefje.id,
- ooi_primary_key=boefje_task.input_ooi,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- caller=caller,
- )
+ new_boefje_task = models.BoefjeTask(
+ boefje=models.Boefje.model_validate(plugin.dict()),
+ input_ooi=ooi.primary_key if ooi else None,
+ organization=schedule.organisation,
+ )
+ except (StorageError, ValidationError, ExternalServiceError):
+ self.logger.exception(
+ "Error occurred while processing rescheduling",
+ schedule_id=schedule.id,
+ scheduler_id=self.scheduler_id,
+ )
+ continue
- try:
- grace_period_passed = self.has_boefje_task_grace_period_passed(boefje_task)
- if not grace_period_passed:
- self.logger.debug(
- "Task has not passed grace period: %s",
- boefje_task.hash,
- task_hash=boefje_task.hash,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- caller=caller,
+ executor.submit(
+ self.push_boefje_task,
+ new_boefje_task,
+ schedule.organisation,
+ self.create_schedule,
+ self.process_rescheduling.__name__,
)
- return
- except Exception as exc_grace_period:
- self.logger.warning(
- "Could not check if grace period has passed: %s",
+
+ @exception_handler
+ @tracer.start_as_current_span("BoefjeScheduler.push_boefje_task")
+ def push_boefje_task(
+ self, boefje_task: models.BoefjeTask, organisation_id: str, create_schedule: bool = True, caller: str = ""
+ ) -> None:
+ grace_period_passed = self.has_boefje_task_grace_period_passed(boefje_task)
+ if not grace_period_passed:
+ self.logger.debug(
+ "Task has not passed grace period: %s",
boefje_task.hash,
task_hash=boefje_task.hash,
- organisation_id=self.organisation.id,
scheduler_id=self.scheduler_id,
caller=caller,
- exc_info=exc_grace_period,
)
return
- try:
- is_stalled = self.has_boefje_task_stalled(boefje_task)
- if is_stalled:
- self.logger.debug(
- "Task is stalled: %s",
- boefje_task.hash,
- task_hash=boefje_task.hash,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- caller=caller,
- )
-
- # Update task in datastore to be failed
- task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(boefje_task.hash)
- task_db.status = TaskStatus.FAILED
- self.ctx.datastores.task_store.update_task(task_db)
- except Exception as exc_stalled:
- self.logger.warning(
- "Could not check if task is stalled: %s",
+ is_stalled = self.has_boefje_task_stalled(boefje_task)
+ if is_stalled:
+ self.logger.debug(
+ "Task is stalled: %s",
boefje_task.hash,
- boefje_task_hash=boefje_task.hash,
- organisation_id=self.organisation.id,
+ task_hash=boefje_task.hash,
scheduler_id=self.scheduler_id,
caller=caller,
- exc_info=exc_stalled,
)
- return
- try:
- is_running = self.has_boefje_task_started_running(boefje_task)
- if is_running:
- self.logger.debug(
- "Task is still running: %s",
- boefje_task.hash,
- task_hash=boefje_task.hash,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- caller=caller,
- )
- return
- except Exception as exc_running:
- self.logger.warning(
- "Could not check if task is running: %s",
+ # Update task in datastore to be failed
+ task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(boefje_task.hash)
+ task_db.status = models.TaskStatus.FAILED
+ self.ctx.datastores.task_store.update_task(task_db)
+
+ is_running = self.has_boefje_task_started_running(boefje_task)
+ if is_running:
+ self.logger.debug(
+ "Task is still running: %s",
boefje_task.hash,
task_hash=boefje_task.hash,
- organisation_id=self.organisation.id,
scheduler_id=self.scheduler_id,
caller=caller,
- exc_info=exc_running,
)
return
@@ -592,39 +403,24 @@ def push_boefje_task(self, boefje_task: BoefjeTask, create_schedule: bool = True
"Task is already on queue: %s",
boefje_task.hash,
task_hash=boefje_task.hash,
- organisation_id=self.organisation.id,
scheduler_id=self.scheduler_id,
caller=caller,
exc_info=True,
)
return
- latest_task = self.ctx.datastores.task_store.get_latest_task_by_hash(boefje_task.hash)
- score = self.priority_ranker.rank(SimpleNamespace(latest_task=latest_task, task=boefje_task))
-
- task = Task(
+ task = models.Task(
id=boefje_task.id,
scheduler_id=self.scheduler_id,
+ organisation=organisation_id,
type=self.ITEM_TYPE.type,
- priority=score,
hash=boefje_task.hash,
data=boefje_task.model_dump(),
)
- try:
- self.push_item_to_queue_with_timeout(item=task, max_tries=self.max_tries, create_schedule=create_schedule)
- except QueueFullError:
- self.logger.warning(
- "Could not add task to queue, queue was full: %s",
- boefje_task.hash,
- task_hash=boefje_task.hash,
- queue_qsize=self.queue.qsize(),
- queue_maxsize=self.queue.maxsize,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- caller=caller,
- )
- return
+ task.priority = self.ranker.rank(task)
+
+ self.push_item_to_queue_with_timeout(item=task, max_tries=self.max_tries, create_schedule=create_schedule)
self.logger.info(
"Created boefje task",
@@ -632,15 +428,15 @@ def push_boefje_task(self, boefje_task: BoefjeTask, create_schedule: bool = True
task_hash=task.hash,
boefje_id=boefje_task.boefje.id,
ooi_primary_key=boefje_task.input_ooi,
- organisation_id=self.organisation.id,
scheduler_id=self.scheduler_id,
+ organisation_id=organisation_id,
caller=caller,
)
- def push_item_to_queue(self, item: Task, create_schedule: bool = True) -> Task:
+ def push_item_to_queue(self, item: models.Task, create_schedule: bool = True) -> models.Task:
"""Some boefje scheduler specific logic before pushing the item to the
queue."""
- boefje_task = BoefjeTask.model_validate(item.data)
+ boefje_task = models.BoefjeTask.model_validate(item.data)
# Check if id's are unique and correctly set. Same id's are necessary
# for the task runner.
@@ -652,8 +448,7 @@ def push_item_to_queue(self, item: Task, create_schedule: bool = True) -> Task:
return super().push_item_to_queue(item=item, create_schedule=create_schedule)
- @tracer.start_as_current_span("boefje_has_boefje_permission_to_run")
- def has_boefje_permission_to_run(self, boefje: Plugin, ooi: OOI) -> bool:
+ def has_boefje_permission_to_run(self, boefje: models.Plugin, ooi: models.OOI) -> bool:
"""Checks whether a boefje is allowed to run on an ooi.
Args:
@@ -665,22 +460,14 @@ def has_boefje_permission_to_run(self, boefje: Plugin, ooi: OOI) -> bool:
"""
if boefje.enabled is False:
self.logger.debug(
- "Boefje: %s is disabled",
- boefje.name,
- boefje_id=boefje.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
+ "Boefje: %s is disabled", boefje.name, boefje_id=boefje.id, scheduler_id=self.scheduler_id
)
return False
boefje_scan_level = boefje.scan_level
if boefje_scan_level is None:
self.logger.warning(
- "No scan level found for boefje: %s",
- boefje.id,
- boefje_id=boefje.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
+ "No scan level found for boefje: %s", boefje.id, boefje_id=boefje.id, scheduler_id=self.scheduler_id
)
return False
@@ -693,7 +480,6 @@ def has_boefje_permission_to_run(self, boefje: Plugin, ooi: OOI) -> bool:
"No scan_profile found for ooi: %s",
ooi.primary_key,
ooi_primary_key=ooi.primary_key,
- organisation_id=self.organisation.id,
scheduler_id=self.scheduler_id,
)
return False
@@ -704,7 +490,6 @@ def has_boefje_permission_to_run(self, boefje: Plugin, ooi: OOI) -> bool:
"No scan level found for ooi: %s",
ooi.primary_key,
ooi_primary_key=ooi.primary_key,
- organisation_id=self.organisation.id,
scheduler_id=self.scheduler_id,
)
return False
@@ -722,15 +507,13 @@ def has_boefje_permission_to_run(self, boefje: Plugin, ooi: OOI) -> bool:
ooi_scan_level,
boefje_id=boefje.id,
ooi_primary_key=ooi.primary_key,
- organisation_id=self.organisation.id,
scheduler_id=self.scheduler_id,
)
return False
return True
- @tracer.start_as_current_span("boefje_has_boefje_task_started_running")
- def has_boefje_task_started_running(self, task: BoefjeTask) -> bool:
+ def has_boefje_task_started_running(self, task: models.BoefjeTask) -> bool:
"""Check if the same task is already running.
Args:
@@ -740,44 +523,17 @@ def has_boefje_task_started_running(self, task: BoefjeTask) -> bool:
True if the task is still running, False otherwise.
"""
# Is task still running according to the datastore?
- task_db = None
- try:
- task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(task.hash)
- except Exception as exc_db:
- self.logger.error(
- "Could not get latest task by hash: %s",
- task.hash,
- task_id=task.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- exc_info=exc_db,
- )
- raise exc_db
-
- if task_db is not None and task_db.status not in [TaskStatus.FAILED, TaskStatus.COMPLETED]:
+ task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(task.hash)
+ if task_db is not None and task_db.status not in [models.TaskStatus.FAILED, models.TaskStatus.COMPLETED]:
self.logger.debug(
- "Task is still running, according to the datastore",
- task_id=task_db.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
+ "Task is still running, according to the datastore", task_id=task_db.id, scheduler_id=self.scheduler_id
)
return True
# Is task running according to bytes?
- try:
- task_bytes = self.ctx.services.bytes.get_last_run_boefje(
- boefje_id=task.boefje.id, input_ooi=task.input_ooi, organization_id=task.organization
- )
- except ExternalServiceError as exc:
- self.logger.error(
- "Failed to get last run boefje from bytes",
- boefje_id=task.boefje.id,
- input_ooi_primary_key=task.input_ooi,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- exc_info=exc,
- )
- raise exc
+ task_bytes = self.ctx.services.bytes.get_last_run_boefje(
+ boefje_id=task.boefje.id, input_ooi=task.input_ooi, organization_id=task.organization
+ )
# Task has been finished (failed, or succeeded) according to
# the datastore, but we have no results of it in bytes, meaning
@@ -786,7 +542,7 @@ def has_boefje_task_started_running(self, task: BoefjeTask) -> bool:
if (
task_bytes is None
and task_db is not None
- and task_db.status in [TaskStatus.COMPLETED, TaskStatus.FAILED]
+ and task_db.status in [models.TaskStatus.COMPLETED, models.TaskStatus.FAILED]
and (
task_db.modified_at is not None
and task_db.modified_at
@@ -798,24 +554,19 @@ def has_boefje_task_started_running(self, task: BoefjeTask) -> bool:
"please review the bytes logs for more information regarding "
"this error.",
task_id=task_db.id,
- organisation_id=self.organisation.id,
scheduler_id=self.scheduler_id,
)
raise RuntimeError("Task has been finished, but no results found in bytes")
if task_bytes is not None and task_bytes.ended_at is None and task_bytes.started_at is not None:
self.logger.debug(
- "Task is still running, according to bytes",
- task_id=task_bytes.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
+ "Task is still running, according to bytes", task_id=task_bytes.id, scheduler_id=self.scheduler_id
)
return True
return False
- @tracer.start_as_current_span("boefje_is_task_stalled")
- def has_boefje_task_stalled(self, task: BoefjeTask) -> bool:
+ def has_boefje_task_stalled(self, task: models.BoefjeTask) -> bool:
"""Check if the same task is stalled.
Args:
@@ -824,23 +575,10 @@ def has_boefje_task_stalled(self, task: BoefjeTask) -> bool:
Returns:
True if the task is stalled, False otherwise.
"""
- task_db = None
- try:
- task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(task.hash)
- except Exception as exc_db:
- self.logger.warning(
- "Could not get latest task by hash: %s",
- task.hash,
- task_hash=task.hash,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- exc_info=exc_db,
- )
- raise exc_db
-
+ task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(task.hash)
if (
task_db is not None
- and task_db.status == TaskStatus.DISPATCHED
+ and task_db.status == models.TaskStatus.DISPATCHED
and (
task_db.modified_at is not None
and datetime.now(timezone.utc)
@@ -851,8 +589,7 @@ def has_boefje_task_stalled(self, task: BoefjeTask) -> bool:
return False
- @tracer.start_as_current_span("boefje_has_boefje_task_grace_period_passed")
- def has_boefje_task_grace_period_passed(self, task: BoefjeTask) -> bool:
+ def has_boefje_task_grace_period_passed(self, task: models.BoefjeTask) -> bool:
"""Check if the grace period has passed for a task in both the
datastore and bytes.
@@ -866,24 +603,13 @@ def has_boefje_task_grace_period_passed(self, task: BoefjeTask) -> bool:
True if the grace period has passed, False otherwise.
"""
# Does boefje have an interval specified?
- plugin = self.ctx.services.katalogus.get_plugin_by_id_and_org_id(task.boefje.id, self.organisation.id)
+ plugin = self.ctx.services.katalogus.get_plugin_by_id_and_org_id(task.boefje.id, task.organization)
if plugin is not None and plugin.interval is not None and plugin.interval > 0:
timeout = timedelta(minutes=plugin.interval)
else:
timeout = timedelta(seconds=self.ctx.config.pq_grace_period)
- try:
- task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(task.hash)
- except Exception as exc_db:
- self.logger.warning(
- "Could not get latest task by hash: %s",
- task.hash,
- task_hash=task.hash,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- exc_info=exc_db,
- )
- raise exc_db
+ task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(task.hash)
# Has grace period passed according to datastore?
if task_db is not None and datetime.now(timezone.utc) - task_db.modified_at < timeout:
@@ -891,24 +617,13 @@ def has_boefje_task_grace_period_passed(self, task: BoefjeTask) -> bool:
"Task has not passed grace period, according to the datastore",
task_id=task_db.id,
task_hash=task.hash,
- organisation_id=self.organisation.id,
scheduler_id=self.scheduler_id,
)
return False
- try:
- task_bytes = self.ctx.services.bytes.get_last_run_boefje(
- boefje_id=task.boefje.id, input_ooi=task.input_ooi, organization_id=task.organization
- )
- except ExternalServiceError as exc_bytes:
- self.logger.error(
- "Failed to get last run boefje from bytes",
- boefje_id=task.boefje.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- exc_info=exc_bytes,
- )
- raise exc_bytes
+ task_bytes = self.ctx.services.bytes.get_last_run_boefje(
+ boefje_id=task.boefje.id, input_ooi=task.input_ooi, organization_id=task.organization
+ )
# Did the grace period pass, according to bytes?
if (
@@ -920,14 +635,13 @@ def has_boefje_task_grace_period_passed(self, task: BoefjeTask) -> bool:
"Task has not passed grace period, according to bytes",
task_id=task_bytes.id,
task_hash=task.hash,
- organisation_id=self.organisation.id,
scheduler_id=self.scheduler_id,
)
return False
return True
- def get_boefjes_for_ooi(self, ooi: OOI) -> list[Plugin]:
+ def get_boefjes_for_ooi(self, ooi: models.OOI, organisation: str) -> list[models.Plugin]:
"""Get available all boefjes (enabled and disabled) for an ooi.
Args:
@@ -936,24 +650,13 @@ def get_boefjes_for_ooi(self, ooi: OOI) -> list[Plugin]:
Returns:
A list of Plugin of type Boefje that can be run on the ooi.
"""
- try:
- boefjes = self.ctx.services.katalogus.get_boefjes_by_type_and_org_id(ooi.object_type, self.organisation.id)
- except ExternalServiceError:
- self.logger.error(
- "Could not get boefjes for object_type: %s",
- ooi.object_type,
- object_type=ooi.object_type,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- )
- return []
+ boefjes = self.ctx.services.katalogus.get_boefjes_by_type_and_org_id(ooi.object_type, organisation)
if boefjes is None:
self.logger.debug(
"No boefjes found for type: %s",
ooi.object_type,
input_ooi_primary_key=ooi.primary_key,
- organisation_id=self.organisation.id,
scheduler_id=self.scheduler_id,
)
return []
@@ -964,30 +667,52 @@ def get_boefjes_for_ooi(self, ooi: OOI) -> list[Plugin]:
ooi,
input_ooi_primary_key=ooi.primary_key,
boefjes=[boefje.id for boefje in boefjes],
- organisation_id=self.organisation.id,
scheduler_id=self.scheduler_id,
)
return boefjes
- def set_cron(self, item: Task) -> str | None:
+ def get_oois_for_boefje(self, boefje: models.Plugin, organisation: str) -> list[models.OOI]:
+ oois = []
+
+ oois_by_object_type = self.ctx.services.octopoes.get_objects_by_object_types(
+ organisation,
+ boefje.consumes,
+ list(range(boefje.scan_level, 5)), # type: ignore
+ )
+
+ # Filter OOIs based on permission
+ for ooi in oois_by_object_type:
+ if not self.has_boefje_permission_to_run(boefje, ooi):
+ self.logger.debug(
+ "Boefje not allowed to run on ooi",
+ boefje_id=boefje.id,
+ ooi_primary_key=ooi.primary_key,
+ scheduler_id=self.scheduler_id,
+ )
+ continue
+ oois.append(ooi)
+
+ return oois
+
+ def set_cron(self, item: models.Task) -> str | None:
"""Override Schedule.set_cron() when a boefje specifies a schedule for
execution (cron expression) we schedule for its execution"""
# Does a boefje have a schedule defined?
plugin = self.ctx.services.katalogus.get_plugin_by_id_and_org_id(
- utils.deep_get(item.data, ["boefje", "id"]), self.organisation.id
+ utils.deep_get(item.data, ["boefje", "id"]), item.organisation
)
if plugin is None or plugin.cron is None:
return super().set_cron(item)
return plugin.cron
- def calculate_deadline(self, task: Task) -> datetime:
+ def calculate_deadline(self, task: models.Task) -> datetime:
"""Override Scheduler.calculate_deadline() to calculate the deadline
for a task and based on the boefje interval."""
# Does the boefje have an interval defined?
plugin = self.ctx.services.katalogus.get_plugin_by_id_and_org_id(
- utils.deep_get(task.data, ["boefje", "id"]), self.organisation.id
+ utils.deep_get(task.data, ["boefje", "id"]), task.organisation
)
if plugin is not None and plugin.interval is not None and plugin.interval > 0:
return datetime.now(timezone.utc) + timedelta(minutes=plugin.interval)
diff --git a/mula/scheduler/schedulers/schedulers/normalizer.py b/mula/scheduler/schedulers/schedulers/normalizer.py
index d1dff6e7c56..ff0918ef991 100644
--- a/mula/scheduler/schedulers/schedulers/normalizer.py
+++ b/mula/scheduler/schedulers/schedulers/normalizer.py
@@ -1,62 +1,39 @@
import uuid
-from collections.abc import Callable
from concurrent import futures
from types import SimpleNamespace
-from typing import Any
+from typing import Any, Literal
-import structlog
from opentelemetry import trace
+from pydantic import ValidationError
from scheduler import clients, context, models
from scheduler.clients.errors import ExternalServiceError
-from scheduler.models import Normalizer, NormalizerTask, Organisation, Plugin, RawDataReceivedEvent, Task, TaskStatus
-from scheduler.schedulers import Scheduler
-from scheduler.schedulers.queue import PriorityQueue, QueueFullError
-from scheduler.schedulers.rankers import NormalizerRanker
+from scheduler.schedulers import Scheduler, rankers
+from scheduler.schedulers.errors import exception_handler
tracer = trace.get_tracer(__name__)
class NormalizerScheduler(Scheduler):
- """A KAT specific implementation of a Normalizer scheduler. It extends
- the `Scheduler` class by adding a `organisation` attribute.
+ """Scheduler implementation for the creation of NormalizerTask models.
Attributes:
- logger: A logger instance.
- organisation: The organisation that this scheduler is for.
+ ranker: The ranker to calculate the priority of a task.
"""
- ITEM_TYPE: Any = NormalizerTask
-
- def __init__(
- self,
- ctx: context.AppContext,
- scheduler_id: str,
- organisation: Organisation,
- queue: PriorityQueue | None = None,
- callback: Callable[..., None] | None = None,
- ):
- self.logger: structlog.BoundLogger = structlog.getLogger(__name__)
- self.organisation: Organisation = organisation
-
- self.queue = queue or PriorityQueue(
- pq_id=scheduler_id,
- maxsize=ctx.config.pq_maxsize,
- item_type=self.ITEM_TYPE,
- allow_priority_updates=True,
- pq_store=ctx.datastores.pq_store,
- )
+ ID: Literal["normalizer"] = "normalizer"
+ TYPE: models.SchedulerType = models.SchedulerType.NORMALIZER
+ ITEM_TYPE: Any = models.NormalizerTask
- super().__init__(
- ctx=ctx,
- queue=self.queue,
- scheduler_id=scheduler_id,
- callback=callback,
- create_schedule=False,
- auto_calculate_deadline=False,
- )
+ def __init__(self, ctx: context.AppContext):
+ """Initializes the NormalizerScheduler.
- self.ranker = NormalizerRanker(ctx=self.ctx)
+ Args:
+ ctx (context.AppContext): Application context of shared data (e.g.
+ configuration, external services connections).
+ """
+ super().__init__(ctx=ctx, scheduler_id=self.ID, create_schedule=False, auto_calculate_deadline=False)
+ self.ranker = rankers.NormalizerRanker(ctx=self.ctx)
def run(self) -> None:
"""The run method is called when the scheduler is started. It will
@@ -68,166 +45,113 @@ def run(self) -> None:
for each normalizer that is registered for the mime type of the raw
file.
"""
- listener = clients.RawData(
+ self.listeners["raw_data"] = clients.RawData(
dsn=str(self.ctx.config.host_raw_data),
- queue=f"{self.organisation.id}__raw_file_received",
- func=self.push_tasks_for_received_raw_data,
+ queue="raw_file_received",
+ func=self.process_raw_data,
prefetch_count=self.ctx.config.rabbitmq_prefetch_count,
)
- self.listeners["raw_data"] = listener
-
- self.run_in_thread(
- name=f"NormalizerScheduler-{self.scheduler_id}-raw_file",
- target=self.listeners["raw_data"].listen,
- loop=False,
- )
+ self.run_in_thread(name="NormalizerScheduler-raw_file", target=self.listeners["raw_data"].listen, loop=False)
self.logger.info(
- "Normalizer scheduler started for %s",
- self.organisation.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- item_type=self.queue.item_type.__name__,
+ "Normalizer scheduler started", scheduler_id=self.scheduler_id, item_type=self.queue.item_type.__name__
)
- @tracer.start_as_current_span("normalizer_push_task_for_received_raw_data")
- def push_tasks_for_received_raw_data(self, body: bytes) -> None:
+ @tracer.start_as_current_span("NormalizerScheduler.process_raw_data")
+ def process_raw_data(self, body: bytes) -> None:
"""Create tasks for the received raw data.
Args:
latest_raw_data: A `RawData` object that was received from the
message queue.
"""
- # Convert body into a RawDataReceivedEvent
- latest_raw_data = RawDataReceivedEvent.model_validate_json(body)
-
- self.logger.debug(
- "Received raw data %s",
- latest_raw_data.raw_data.id,
- raw_data_id=latest_raw_data.raw_data.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- )
+ try:
+ # Convert body into a RawDataReceivedEvent
+ latest_raw_data = models.RawDataReceivedEvent.model_validate_json(body)
+ self.logger.debug(
+ "Received raw data %s",
+ latest_raw_data.raw_data.id,
+ raw_data_id=latest_raw_data.raw_data.id,
+ scheduler_id=self.scheduler_id,
+ )
+ except ValidationError:
+ self.logger.exception("Failed to validate raw data", scheduler_id=self.scheduler_id)
+ return
# Check if the raw data doesn't contain an error mime-type,
# we don't need to create normalizers when the raw data returned
# an error.
- for mime_type in latest_raw_data.raw_data.mime_types:
- if mime_type.get("value", "").startswith("error/"):
- self.logger.debug(
- "Skipping raw data %s with error mime type",
- latest_raw_data.raw_data.id,
- mime_type=mime_type.get("value"),
- raw_data_id=latest_raw_data.raw_data.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- )
- return
-
- # Get all normalizers for the mime types of the raw data
- normalizers: dict[str, Plugin] = {}
- for mime_type in latest_raw_data.raw_data.mime_types:
- normalizers_by_mime_type: list[Plugin] = self.get_normalizers_for_mime_type(mime_type.get("value"))
-
- for normalizer in normalizers_by_mime_type:
- normalizers[normalizer.id] = normalizer
-
- if not normalizers:
+ if self.has_raw_data_errors(latest_raw_data.raw_data):
self.logger.debug(
- "No normalizers found for raw data %s",
+ "Skipping raw data %s with error mime type",
latest_raw_data.raw_data.id,
raw_data_id=latest_raw_data.raw_data.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
)
+ return
- with futures.ThreadPoolExecutor(
- thread_name_prefix=f"NormalizerScheduler-TPE-{self.scheduler_id}-raw_data"
- ) as executor:
- for normalizer in normalizers.values():
- if not self.has_normalizer_permission_to_run(normalizer):
- self.logger.debug(
- "Normalizer is not allowed to run: %s",
- normalizer.id,
- normalizer_id=normalizer.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- )
- continue
-
- normalizer_task = NormalizerTask(
- normalizer=Normalizer.model_validate(normalizer.model_dump()), raw_data=latest_raw_data.raw_data
- )
+ # Get all unique normalizers for the mime types of the raw data
+ normalizers: dict[str, models.Plugin] = {}
+ for mime_type in latest_raw_data.raw_data.mime_types:
+ normalizers_by_mime_type = self.get_normalizers_for_mime_type(
+ mime_type.get("value"), latest_raw_data.organization
+ )
- executor.submit(
- self.push_normalizer_task, normalizer_task, self.push_tasks_for_received_raw_data.__name__
- )
+ self.logger.debug(
+ "Found normalizers for mime type",
+ mime_type=mime_type.get("value"),
+ normalizers=normalizers_by_mime_type,
+ )
- @tracer.start_as_current_span("normalizer_push_task")
- def push_normalizer_task(self, normalizer_task: models.NormalizerTask, caller: str = "") -> None:
- """Given a normalizer and raw data, create a task and push it to the
- queue.
+ for normalizer in normalizers_by_mime_type:
+ normalizers[normalizer.id] = normalizer
+
+ unique_normalizers = list(normalizers.values())
- Args:
- normalizer: The normalizer to create a task for.
- raw_data: The raw data to create a task for.
- caller: The name of the function that called this function, used for logging.
- """
self.logger.debug(
- "Pushing normalizer task",
- task_id=normalizer_task.id,
- normalizer_id=normalizer_task.normalizer.id,
- organisation_id=self.organisation.id,
+ "Found normalizers for raw data",
+ raw_data_id=latest_raw_data.raw_data.id,
+ mime_types=[mime_type.get("value") for mime_type in latest_raw_data.raw_data.mime_types],
+ normalizers=[normalizer.id for normalizer in unique_normalizers],
scheduler_id=self.scheduler_id,
- caller=caller,
)
- try:
- plugin = self.ctx.services.katalogus.get_plugin_by_id_and_org_id(
- normalizer_task.normalizer.id, self.organisation.id
- )
- if not self.has_normalizer_permission_to_run(plugin):
+ # Create tasks for the normalizers
+ normalizer_tasks = []
+ for normalizer in unique_normalizers:
+ if not self.has_normalizer_permission_to_run(normalizer):
self.logger.debug(
- "Task is not allowed to run: %s",
- normalizer_task.id,
- task_id=normalizer_task.id,
- organisation_id=self.organisation.id,
+ "Normalizer is not allowed to run: %s",
+ normalizer.id,
+ normalizer_id=normalizer.id,
scheduler_id=self.scheduler_id,
- caller=caller,
)
- return
- except ExternalServiceError:
- self.logger.warning(
- "Could not get plugin by id: %s",
- normalizer_task.normalizer.id,
- task_id=normalizer_task.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- caller=caller,
+ continue
+
+ normalizer_task = models.NormalizerTask(
+ normalizer=models.Normalizer.model_validate(normalizer.model_dump()), raw_data=latest_raw_data.raw_data
)
- return
- try:
- if self.has_normalizer_task_started_running(normalizer_task):
- self.logger.debug(
- "Task is still running: %s",
- normalizer_task.id,
- task_id=normalizer_task.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- caller=caller,
+ normalizer_tasks.append(normalizer_task)
+
+ with futures.ThreadPoolExecutor(thread_name_prefix=f"TPE-{self.scheduler_id}-raw_data") as executor:
+ for normalizer_task in normalizer_tasks:
+ executor.submit(
+ self.push_normalizer_task, normalizer_task, latest_raw_data.organization, self.create_schedule
)
- return
- except Exception:
- self.logger.warning(
- "Could not check if task is running: %s",
+
+ @exception_handler
+ @tracer.start_as_current_span("NormalizerScheduler.push_normalizer_task")
+ def push_normalizer_task(
+ self, normalizer_task: models.NormalizerTask, organisation_id: str, create_schedule: bool, caller: str = ""
+ ) -> None:
+ if self.has_normalizer_task_started_running(normalizer_task):
+ self.logger.debug(
+ "Task is still running: %s",
normalizer_task.id,
task_id=normalizer_task.id,
- organisation_id=self.organisation.id,
scheduler_id=self.scheduler_id,
caller=caller,
- exc_info=True,
)
return
@@ -236,37 +160,23 @@ def push_normalizer_task(self, normalizer_task: models.NormalizerTask, caller: s
"Task is already on queue: %s",
normalizer_task.id,
task_id=normalizer_task.id,
- organisation_id=self.organisation.id,
scheduler_id=self.scheduler_id,
caller=caller,
)
return
- score = self.ranker.rank(SimpleNamespace(raw_data=normalizer_task.raw_data, task=normalizer_task))
-
- task = Task(
+ task = models.Task(
id=normalizer_task.id,
scheduler_id=self.scheduler_id,
- type=self.ITEM_TYPE.type,
- priority=score,
+ organisation=organisation_id,
+ type=normalizer_task.type,
hash=normalizer_task.hash,
data=normalizer_task.model_dump(),
)
- try:
- self.push_item_to_queue_with_timeout(item=task, max_tries=self.max_tries)
- except QueueFullError:
- self.logger.warning(
- "Could not add task to queue, queue was full: %s",
- task.id,
- task_id=task.id,
- queue_qsize=self.queue.qsize(),
- queue_maxsize=self.queue.maxsize,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- caller=caller,
- )
- return
+ task.priority = self.ranker.rank(SimpleNamespace(raw_data=normalizer_task.raw_data, task=normalizer_task))
+
+ self.push_item_to_queue_with_timeout(task, self.max_tries, create_schedule=create_schedule)
self.logger.info(
"Created normalizer task",
@@ -274,15 +184,15 @@ def push_normalizer_task(self, normalizer_task: models.NormalizerTask, caller: s
task_hash=task.hash,
normalizer_id=normalizer_task.normalizer.id,
raw_data_id=normalizer_task.raw_data.id,
- organisation_id=self.organisation.id,
scheduler_id=self.scheduler_id,
+ organisation_id=organisation_id,
caller=caller,
)
- def push_item_to_queue(self, item: Task, create_schedule: bool = True) -> Task:
+ def push_item_to_queue(self, item: models.Task, create_schedule: bool = True) -> models.Task:
"""Some normalizer scheduler specific logic before pushing the item to the
queue."""
- normalizer_task = NormalizerTask.model_validate(item.data)
+ normalizer_task = models.NormalizerTask.model_validate(item.data)
# Check if id's are unique and correctly set. Same id's are necessary
# for the task runner.
@@ -294,8 +204,7 @@ def push_item_to_queue(self, item: Task, create_schedule: bool = True) -> Task:
return super().push_item_to_queue(item=item, create_schedule=create_schedule)
- @tracer.start_as_current_span("normalizer_has_normalizer_permission_to_run")
- def has_normalizer_permission_to_run(self, normalizer: Plugin) -> bool:
+ def has_normalizer_permission_to_run(self, normalizer: models.Plugin) -> bool:
"""Check if the task is allowed to run.
Args:
@@ -306,18 +215,13 @@ def has_normalizer_permission_to_run(self, normalizer: Plugin) -> bool:
"""
if not normalizer.enabled:
self.logger.debug(
- "Normalizer: %s is disabled",
- normalizer.id,
- normalizer_id=normalizer.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
+ "Normalizer: %s is disabled", normalizer.id, normalizer_id=normalizer.id, scheduler_id=self.scheduler_id
)
return False
return True
- @tracer.start_as_current_span("normalizer_has_normalizer_task_started_running")
- def has_normalizer_task_started_running(self, task: NormalizerTask) -> bool:
+ def has_normalizer_task_started_running(self, task: models.NormalizerTask) -> bool:
"""Check if the same task is already running.
Args:
@@ -328,33 +232,32 @@ def has_normalizer_task_started_running(self, task: NormalizerTask) -> bool:
"""
# Get the last tasks that have run or are running for the hash
# of this particular NormalizerTask.
- try:
- task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(task.hash)
- except Exception as exc_db:
- self.logger.error(
- "Could not get latest task by hash: %s",
- task.hash,
- task_id=task.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- exc_info=exc_db,
- )
- raise exc_db
+ task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(task.hash)
# Is task still running according to the datastore?
- if task_db is not None and task_db.status not in [TaskStatus.COMPLETED, TaskStatus.FAILED]:
+ if task_db is not None and task_db.status not in [models.TaskStatus.COMPLETED, models.TaskStatus.FAILED]:
self.logger.debug(
"Task is still running, according to the datastore",
task_id=task_db.id,
task_hash=task.hash,
- organisation_id=self.organisation.id,
scheduler_id=self.scheduler_id,
)
return True
return False
- def get_normalizers_for_mime_type(self, mime_type: str) -> list[Plugin]:
+ def has_raw_data_errors(self, raw_data: models.RawData) -> bool:
+ """Check if the raw data contains errors.
+
+ Args:
+ raw_data: The raw data to check.
+
+ Returns:
+ True if the raw data contains errors, False otherwise.
+ """
+ return any(mime_type.get("value", "").startswith("error/") for mime_type in raw_data.mime_types)
+
+ def get_normalizers_for_mime_type(self, mime_type: str, organisation: str) -> list[models.Plugin]:
"""Get available normalizers for a given mime type.
Args:
@@ -364,37 +267,17 @@ def get_normalizers_for_mime_type(self, mime_type: str) -> list[Plugin]:
A list of Plugins of type normalizer for the given mime type.
"""
try:
- normalizers = self.ctx.services.katalogus.get_normalizers_by_org_id_and_type(
- self.organisation.id, mime_type
- )
+ normalizers = self.ctx.services.katalogus.get_normalizers_by_org_id_and_type(organisation, mime_type)
except ExternalServiceError:
- self.logger.warning(
- "Could not get normalizers for mime_type: %s [mime_type=%s, organisation_id=%s, scheduler_id=%s]",
- mime_type,
- mime_type,
- self.organisation.id,
- self.scheduler_id,
- )
- return []
-
- if normalizers is None:
- self.logger.debug(
- "No normalizer found for mime_type: %s",
+ self.logger.error(
+ "Failed to get normalizers for mime type %s",
mime_type,
mime_type=mime_type,
- organisation_id=self.organisation.id,
scheduler_id=self.scheduler_id,
)
return []
- self.logger.debug(
- "Found %d normalizers for mime_type: %s",
- len(normalizers),
- mime_type,
- mime_type=mime_type,
- normalizers=[normalizer.id for normalizer in normalizers],
- organisation_=self.organisation.id,
- scheduler_id=self.scheduler_id,
- )
+ if normalizers is None:
+ return []
return normalizers
diff --git a/mula/scheduler/schedulers/schedulers/report.py b/mula/scheduler/schedulers/schedulers/report.py
index 05f4d5d3e43..3c58c1518e6 100644
--- a/mula/scheduler/schedulers/schedulers/report.py
+++ b/mula/scheduler/schedulers/schedulers/report.py
@@ -1,176 +1,97 @@
-from collections.abc import Callable
from concurrent import futures
from datetime import datetime, timezone
-from typing import Any
+from typing import Any, Literal
-import structlog
from opentelemetry import trace
-from scheduler import context, storage
-from scheduler.models import Organisation, ReportTask, Task, TaskStatus
+from scheduler import context, models
from scheduler.schedulers import Scheduler
-from scheduler.schedulers.queue import PriorityQueue, QueueFullError
+from scheduler.schedulers.errors import exception_handler
from scheduler.storage import filters
tracer = trace.get_tracer(__name__)
class ReportScheduler(Scheduler):
- ITEM_TYPE: Any = ReportTask
-
- def __init__(
- self,
- ctx: context.AppContext,
- scheduler_id: str,
- organisation: Organisation,
- queue: PriorityQueue | None = None,
- callback: Callable[..., None] | None = None,
- ):
- self.logger: structlog.BoundLogger = structlog.get_logger(__name__)
- self.organisation = organisation
- self.queue = queue or PriorityQueue(
- pq_id=scheduler_id,
- maxsize=ctx.config.pq_maxsize,
- item_type=self.ITEM_TYPE,
- allow_priority_updates=True,
- pq_store=ctx.datastores.pq_store,
- )
+ """Scheduler implementation for the creation of ReportTask models."""
- super().__init__(
- ctx=ctx,
- queue=self.queue,
- scheduler_id=scheduler_id,
- callback=callback,
- create_schedule=True,
- auto_calculate_deadline=False,
- )
+ ID: Literal["report"] = "report"
+ TYPE: models.SchedulerType = models.SchedulerType.REPORT
+ ITEM_TYPE: Any = models.ReportTask
+
+ def __init__(self, ctx: context.AppContext):
+ """Initializes the NormalizerScheduler.
+
+ Args:
+ ctx (context.AppContext): Application context of shared data (e.g.
+ configuration, external services connections).
+ """
+ super().__init__(ctx=ctx, scheduler_id=self.ID, create_schedule=True, auto_calculate_deadline=False)
def run(self) -> None:
+ """The run method is called when the schedulers is started. It will
+ start the rescheduling process for the ReportTask models that are
+ scheduled.
+ """
# Rescheduling
- self.run_in_thread(
- name=f"scheduler-{self.scheduler_id}-reschedule", target=self.push_tasks_for_rescheduling, interval=60.0
+ self.run_in_thread(name="ReportScheduler-rescheduling", target=self.process_rescheduling, interval=60.0)
+ self.logger.info(
+ "Report scheduler started", scheduler_id=self.scheduler_id, item_type=self.queue.item_type.__name__
)
- @tracer.start_as_current_span(name="report_push_tasks_for_rescheduling")
- def push_tasks_for_rescheduling(self):
- if self.queue.full():
- self.logger.warning(
- "Report queue is full, not populating with new tasks",
- queue_qsize=self.queue.qsize(),
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
+ @tracer.start_as_current_span(name="ReportScheduler.process_rescheduling")
+ def process_rescheduling(self):
+ schedules, _ = self.ctx.datastores.schedule_store.get_schedules(
+ filters=filters.FilterRequest(
+ filters=[
+ filters.Filter(column="scheduler_id", operator="eq", value=self.scheduler_id),
+ filters.Filter(column="deadline_at", operator="lt", value=datetime.now(timezone.utc)),
+ filters.Filter(column="enabled", operator="eq", value=True),
+ ]
)
- return
-
- try:
- schedules, _ = self.ctx.datastores.schedule_store.get_schedules(
- filters=filters.FilterRequest(
- filters=[
- filters.Filter(column="scheduler_id", operator="eq", value=self.scheduler_id),
- filters.Filter(column="deadline_at", operator="lt", value=datetime.now(timezone.utc)),
- filters.Filter(column="enabled", operator="eq", value=True),
- ]
- )
- )
- except storage.errors.StorageError as exc_db:
- self.logger.error(
- "Could not get schedules for rescheduling %s",
- self.scheduler_id,
- scheduler_id=self.scheduler_id,
- organisation_id=self.organisation.id,
- exc_info=exc_db,
- )
- raise exc_db
-
- with futures.ThreadPoolExecutor(
- thread_name_prefix=f"ReportScheduler-TPE-{self.scheduler_id}-rescheduling"
- ) as executor:
- for schedule in schedules:
- report_task = ReportTask.model_validate(schedule.data)
- executor.submit(self.push_report_task, report_task, self.push_tasks_for_rescheduling.__name__)
-
- def push_report_task(self, report_task: ReportTask, caller: str = "") -> None:
- self.logger.debug(
- "Pushing report task",
- task_hash=report_task.hash,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- caller=caller,
)
- if self.has_report_task_started_running(report_task):
- self.logger.debug(
- "Report task already running",
- task_hash=report_task.hash,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- caller=caller,
- )
- return
+ # Create report tasks for the schedules
+ report_tasks = []
+ for schedule in schedules:
+ report_task = models.ReportTask.model_validate(schedule.data)
+ report_tasks.append(report_task)
+
+ with futures.ThreadPoolExecutor(thread_name_prefix=f"TPE-{self.scheduler_id}-rescheduling") as executor:
+ for report_task in report_tasks:
+ executor.submit(
+ self.push_report_task,
+ report_task,
+ report_task.organisation_id,
+ self.create_schedule,
+ self.process_rescheduling.__name__,
+ )
+ @exception_handler
+ @tracer.start_as_current_span("ReportScheduler.push_report_task")
+ def push_report_task(
+ self, report_task: models.ReportTask, organisation_id: str, create_schedule: bool, caller: str = ""
+ ) -> None:
if self.is_item_on_queue_by_hash(report_task.hash):
- self.logger.debug(
- "Report task already on queue",
- task_hash=report_task.hash,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- caller=caller,
- )
+ self.logger.debug("Report task already on queue", scheduler_id=self.scheduler_id, caller=caller)
return
- task = Task(
+ task = models.Task(
scheduler_id=self.scheduler_id,
+ organisation=organisation_id,
priority=int(datetime.now().timestamp()),
type=self.ITEM_TYPE.type,
hash=report_task.hash,
data=report_task.model_dump(),
)
- try:
- self.push_item_to_queue_with_timeout(task, self.max_tries)
- except QueueFullError:
- self.logger.warning(
- "Could not add task %s to queue, queue was full",
- report_task.hash,
- task_hash=report_task.hash,
- queue_qsize=self.queue.qsize(),
- queue_maxsize=self.queue.maxsize,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- caller=caller,
- )
- return
+ self.push_item_to_queue_with_timeout(task, self.max_tries)
self.logger.info(
- "Report task pushed to queue",
+ "Created report task",
task_id=task.id,
- task_hash=report_task.hash,
- organisation_id=self.organisation.id,
+ task_hash=task.hash,
scheduler_id=self.scheduler_id,
+ organisation_id=organisation_id,
caller=caller,
)
-
- def has_report_task_started_running(self, task: ReportTask) -> bool:
- task_db = None
- try:
- task_db = self.ctx.datastores.task_store.get_latest_task_by_hash(task.hash)
- except storage.errors.StorageError as exc_db:
- self.logger.error(
- "Could not get latest task by hash %s",
- task.hash,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- exc_info=exc_db,
- )
- raise exc_db
-
- if task_db is not None and task_db.status not in [TaskStatus.FAILED, TaskStatus.COMPLETED]:
- self.logger.debug(
- "Task is still running, according to the datastore",
- task_id=task_db.id,
- organisation_id=self.organisation.id,
- scheduler_id=self.scheduler_id,
- )
- return True
-
- return False
diff --git a/mula/scheduler/server/handlers/__init__.py b/mula/scheduler/server/handlers/__init__.py
index 302806efaa3..2aea97fa01f 100644
--- a/mula/scheduler/server/handlers/__init__.py
+++ b/mula/scheduler/server/handlers/__init__.py
@@ -1,6 +1,5 @@
from .health import HealthAPI
from .metrics import MetricsAPI
-from .queues import QueueAPI
from .root import RootAPI
from .schedulers import SchedulerAPI
from .schedules import ScheduleAPI
diff --git a/mula/scheduler/server/handlers/queues.py b/mula/scheduler/server/handlers/queues.py
deleted file mode 100644
index 461c897c5e9..00000000000
--- a/mula/scheduler/server/handlers/queues.py
+++ /dev/null
@@ -1,103 +0,0 @@
-from typing import Any
-
-import fastapi
-import structlog
-from fastapi import status
-
-from scheduler import context, models, schedulers, storage
-from scheduler.schedulers.queue import NotAllowedError, QueueEmptyError, QueueFullError
-from scheduler.server import serializers
-from scheduler.server.errors import BadRequestError, ConflictError, NotFoundError, TooManyRequestsError
-
-
-class QueueAPI:
- def __init__(self, api: fastapi.FastAPI, ctx: context.AppContext, s: dict[str, schedulers.Scheduler]) -> None:
- self.logger: structlog.BoundLogger = structlog.getLogger(__name__)
- self.api: fastapi.FastAPI = api
- self.ctx: context.AppContext = ctx
- self.schedulers: dict[str, schedulers.Scheduler] = s
-
- self.api.add_api_route(
- path="/queues",
- endpoint=self.list,
- methods=["GET"],
- response_model=list[models.Queue],
- response_model_exclude_unset=True,
- status_code=status.HTTP_200_OK,
- description="List all queues",
- )
-
- self.api.add_api_route(
- path="/queues/{queue_id}",
- endpoint=self.get,
- methods=["GET"],
- response_model=models.Queue,
- status_code=status.HTTP_200_OK,
- description="Get a queue",
- )
-
- self.api.add_api_route(
- path="/queues/{queue_id}/pop",
- endpoint=self.pop,
- methods=["POST"],
- response_model=models.Task | None,
- status_code=status.HTTP_200_OK,
- description="Pop an item from a queue",
- )
-
- self.api.add_api_route(
- path="/queues/{queue_id}/push",
- endpoint=self.push,
- methods=["POST"],
- response_model=models.Task | None,
- status_code=status.HTTP_201_CREATED,
- description="Push an item to a queue",
- )
-
- def list(self) -> Any:
- return [models.Queue(**s.queue.dict(include_pq=False)) for s in self.schedulers.copy().values()]
-
- def get(self, queue_id: str) -> Any:
- s = self.schedulers.get(queue_id)
- if s is None:
- raise NotFoundError(f"queue not found, by queue_id: {queue_id}")
-
- return models.Queue(**s.queue.dict())
-
- def pop(self, queue_id: str, filters: storage.filters.FilterRequest | None = None) -> Any:
- s = self.schedulers.get(queue_id)
- if s is None:
- raise NotFoundError(f"queue not found, by queue_id: {queue_id}")
-
- try:
- item = s.pop_item_from_queue(filters)
- except QueueEmptyError:
- return None
-
- if item is None:
- raise NotFoundError("could not pop item from queue, check your filters")
-
- return models.Task(**item.model_dump())
-
- def push(self, queue_id: str, item_in: serializers.Task) -> Any:
- s = self.schedulers.get(queue_id)
- if s is None:
- raise NotFoundError(f"queue not found, by queue_id: {queue_id}")
-
- # Load default values
- new_item = models.Task(**item_in.model_dump(exclude_unset=True))
-
- # Set values
- if new_item.scheduler_id is None:
- new_item.scheduler_id = s.scheduler_id
-
- try:
- pushed_item = s.push_item_to_queue(new_item)
- except ValueError:
- raise BadRequestError("malformed item")
- except QueueFullError:
- raise TooManyRequestsError("queue is full")
- except NotAllowedError:
- raise ConflictError("queue is not allowed to push items")
-
- return pushed_item
diff --git a/mula/scheduler/server/handlers/schedulers.py b/mula/scheduler/server/handlers/schedulers.py
index 9358dcec45a..65ca2ac1c9a 100644
--- a/mula/scheduler/server/handlers/schedulers.py
+++ b/mula/scheduler/server/handlers/schedulers.py
@@ -4,12 +4,14 @@
import structlog
from fastapi import status
-from scheduler import context, models, schedulers
-from scheduler.server.errors import BadRequestError, NotFoundError
+from scheduler import context, models, schedulers, storage
+from scheduler.schedulers.queue import NotAllowedError, QueueFullError
+from scheduler.server import serializers, utils
+from scheduler.server.errors import BadRequestError, ConflictError, NotFoundError, TooManyRequestsError
class SchedulerAPI:
- def __init__(self, api: fastapi.FastAPI, ctx: context.AppContext, s: dict[str, schedulers.Scheduler]) -> None:
+ def __init__(self, api: fastapi.FastAPI, ctx: context.AppContext, s: dict[str, schedulers.Scheduler]):
self.logger: structlog.BoundLogger = structlog.getLogger(__name__)
self.api: fastapi.FastAPI = api
self.ctx: context.AppContext = ctx
@@ -19,7 +21,7 @@ def __init__(self, api: fastapi.FastAPI, ctx: context.AppContext, s: dict[str, s
path="/schedulers",
endpoint=self.list,
methods=["GET"],
- response_model=list[models.Scheduler],
+ response_model=list[serializers.Scheduler],
status_code=status.HTTP_200_OK,
description="List all schedulers",
)
@@ -28,51 +30,80 @@ def __init__(self, api: fastapi.FastAPI, ctx: context.AppContext, s: dict[str, s
path="/schedulers/{scheduler_id}",
endpoint=self.get,
methods=["GET"],
- response_model=models.Scheduler,
+ response_model=serializers.Scheduler,
status_code=status.HTTP_200_OK,
description="Get a scheduler",
)
self.api.add_api_route(
- path="/schedulers/{scheduler_id}",
- endpoint=self.patch,
- methods=["PATCH"],
- response_model=models.Scheduler,
+ path="/schedulers/{scheduler_id}/push",
+ endpoint=self.push,
+ methods=["POST"],
+ response_model=models.Task,
+ status_code=status.HTTP_201_CREATED,
+ description="Push a task to a scheduler",
+ )
+
+ self.api.add_api_route(
+ path="/schedulers/{scheduler_id}/pop",
+ endpoint=self.pop,
+ methods=["POST"],
+ response_model=utils.PaginatedResponse,
status_code=status.HTTP_200_OK,
- description="Update a scheduler",
+ description="Pop a task from a scheduler",
)
- def list(self) -> Any:
- return [models.Scheduler(**s.dict()) for s in self.schedulers.values()]
+ def list(self) -> list[serializers.Scheduler]:
+ return [serializers.Scheduler(**s.dict()) for s in self.schedulers.values()]
def get(self, scheduler_id: str) -> Any:
s = self.schedulers.get(scheduler_id)
if s is None:
raise NotFoundError(f"Scheduler {scheduler_id} not found")
- return models.Scheduler(**s.dict())
+ return serializers.Scheduler(**s.dict())
+
+ def pop(
+ self,
+ request: fastapi.Request,
+ scheduler_id: str,
+ offset: int = 0,
+ limit: int = 100,
+ filters: storage.filters.FilterRequest | None = None,
+ ) -> utils.PaginatedResponse:
+ results, count = self.ctx.datastores.pq_store.pop(
+ scheduler_id=scheduler_id, offset=offset, limit=limit, filters=filters
+ )
+
+ # Update status for popped items
+ self.ctx.datastores.pq_store.bulk_update_status(
+ scheduler_id, [item.id for item in results], models.TaskStatus.DISPATCHED
+ )
+
+ return utils.paginate(request, results, count, offset, limit)
- def patch(self, scheduler_id: str, item: models.Scheduler) -> Any:
+ def push(self, scheduler_id: str, item: serializers.TaskPush) -> Any:
s = self.schedulers.get(scheduler_id)
if s is None:
raise NotFoundError(f"Scheduler {scheduler_id} not found")
- stored_scheduler_model = models.Scheduler(**s.dict())
- patch_data = item.model_dump(exclude_unset=True)
- if len(patch_data) == 0:
- raise BadRequestError("no data to patch")
+ if item.scheduler_id is not None and item.scheduler_id != scheduler_id:
+ raise BadRequestError("scheduler_id in item does not match the scheduler_id in the path")
- updated_scheduler = stored_scheduler_model.model_copy(update=patch_data)
+ # Set scheduler_id if not set
+ if item.scheduler_id is None:
+ item.scheduler_id = scheduler_id
- # We update the patched attributes, since the schedulers are kept
- # in memory.
- for attr, value in patch_data.items():
- setattr(s, attr, value)
+ # Load default values
+ new_item = models.Task(**item.model_dump(exclude_unset=True))
- # Enable or disable the scheduler if needed.
- if updated_scheduler.enabled:
- s.enable()
- elif not updated_scheduler.enabled:
- s.disable()
+ try:
+ pushed_item = s.push_item_to_queue(new_item)
+ except ValueError:
+ raise BadRequestError("malformed item")
+ except QueueFullError:
+ raise TooManyRequestsError("queue is full")
+ except NotAllowedError:
+ raise ConflictError("queue is not allowed to push items")
- return updated_scheduler
+ return pushed_item
diff --git a/mula/scheduler/server/handlers/schedules.py b/mula/scheduler/server/handlers/schedules.py
index 895a50c9b24..e67fa0f9bc6 100644
--- a/mula/scheduler/server/handlers/schedules.py
+++ b/mula/scheduler/server/handlers/schedules.py
@@ -12,13 +12,11 @@
class ScheduleAPI:
- def __init__(
- self, api: fastapi.FastAPI, ctx: context.AppContext, schedulers: dict[str, schedulers.Scheduler]
- ) -> None:
- self.logger: structlog.BoundLogger = structlog.get_logger(__name__)
- self.api = api
- self.ctx = ctx
- self.schedulers = schedulers
+ def __init__(self, api: fastapi.FastAPI, ctx: context.AppContext, s: dict[str, schedulers.Scheduler]):
+ self.logger: structlog.BoundLogger = structlog.getLogger(__name__)
+ self.api: fastapi.FastAPI = api
+ self.ctx: context.AppContext = ctx
+ self.schedulers: dict[str, schedulers.Scheduler] = s
self.api.add_api_route(
path="/schedules",
@@ -113,8 +111,8 @@ def create(self, schedule: serializers.ScheduleCreate) -> Any:
try:
new_schedule = models.Schedule(**schedule.model_dump())
- except ValueError:
- raise ValidationError("validation error")
+ except ValueError as exc:
+ raise ValidationError(exc)
s = self.schedulers.get(new_schedule.scheduler_id)
if s is None:
@@ -123,8 +121,8 @@ def create(self, schedule: serializers.ScheduleCreate) -> Any:
# Validate data with task type of the scheduler
try:
instance = s.ITEM_TYPE.model_validate(new_schedule.data)
- except ValueError:
- raise BadRequestError("validation error")
+ except ValueError as exc:
+ raise BadRequestError(exc)
# Create hash for schedule with task type
new_schedule.hash = instance.hash
diff --git a/mula/scheduler/server/handlers/tasks.py b/mula/scheduler/server/handlers/tasks.py
index 46b6cc7469a..ac933085b8c 100644
--- a/mula/scheduler/server/handlers/tasks.py
+++ b/mula/scheduler/server/handlers/tasks.py
@@ -34,14 +34,6 @@ def __init__(self, api: fastapi.FastAPI, ctx: context.AppContext) -> None:
description="Get task status counts for all schedulers in last 24 hours",
)
- self.api.add_api_route(
- path="/tasks/stats/{scheduler_id}",
- endpoint=self.stats,
- methods=["GET"],
- status_code=status.HTTP_200_OK,
- description="Get task status counts for a scheduler in last 24 hours",
- )
-
self.api.add_api_route(
path="/tasks/{task_id}",
endpoint=self.get,
@@ -163,5 +155,7 @@ def patch(self, task_id: uuid.UUID, item: serializers.Task) -> Any:
return updated_task
- def stats(self, scheduler_id: str | None = None) -> dict[str, dict[str, int]] | None:
- return self.ctx.datastores.task_store.get_status_count_per_hour(scheduler_id)
+ def stats(
+ self, scheduler_id: str | None = None, organisation_id: str | None = None
+ ) -> dict[str, dict[str, int]] | None:
+ return self.ctx.datastores.task_store.get_status_count_per_hour(scheduler_id, organisation_id)
diff --git a/mula/scheduler/server/serializers/__init__.py b/mula/scheduler/server/serializers/__init__.py
index a4d3c0b20c4..ac706a15163 100644
--- a/mula/scheduler/server/serializers/__init__.py
+++ b/mula/scheduler/server/serializers/__init__.py
@@ -1,2 +1,3 @@
from .schedule import ScheduleCreate, SchedulePatch
-from .task import Task, TaskStatus
+from .scheduler import Scheduler
+from .task import Task, TaskPush, TaskStatus
diff --git a/mula/scheduler/server/serializers/schedule.py b/mula/scheduler/server/serializers/schedule.py
index 5e3c0a0bbb9..e614b623f50 100644
--- a/mula/scheduler/server/serializers/schedule.py
+++ b/mula/scheduler/server/serializers/schedule.py
@@ -7,11 +7,9 @@ class ScheduleCreate(BaseModel):
model_config = ConfigDict(from_attributes=True)
scheduler_id: str
-
+ organisation: str
data: dict
-
schedule: str | None = None
-
deadline_at: datetime | None = None
@@ -20,11 +18,7 @@ class SchedulePatch(BaseModel):
model_config = ConfigDict(from_attributes=True)
hash: str | None = Field(None, max_length=32)
-
data: dict | None = None
-
enabled: bool | None = None
-
schedule: str | None = None
-
deadline_at: datetime | None = None
diff --git a/mula/scheduler/server/serializers/scheduler.py b/mula/scheduler/server/serializers/scheduler.py
new file mode 100644
index 00000000000..f267e98909d
--- /dev/null
+++ b/mula/scheduler/server/serializers/scheduler.py
@@ -0,0 +1,11 @@
+from datetime import datetime
+
+from pydantic import BaseModel
+
+
+class Scheduler(BaseModel):
+ id: str
+ type: str
+ item_type: str
+ qsize: int = 0
+ last_activity: datetime | None = None
diff --git a/mula/scheduler/server/serializers/task.py b/mula/scheduler/server/serializers/task.py
index 3a4e6fc3846..cc2aafbfdac 100644
--- a/mula/scheduler/server/serializers/task.py
+++ b/mula/scheduler/server/serializers/task.py
@@ -34,21 +34,20 @@ class Task(BaseModel):
model_config = ConfigDict(from_attributes=True, use_enum_values=True)
id: uuid.UUID | None = None
-
scheduler_id: str | None = None
-
schedule_id: uuid.UUID | None = None
-
+ organisation: str | None = None
priority: int | None = None
-
status: TaskStatus | None = None
-
type: str | None = None
-
hash: str | None = None
-
data: dict | None = None
-
created_at: datetime | None = None
-
modified_at: datetime | None = None
+
+
+class TaskPush(BaseModel):
+ scheduler_id: str | None = None
+ organisation: str
+ priority: int | None = None
+ data: dict
diff --git a/mula/scheduler/server/server.py b/mula/scheduler/server/server.py
index b39cf1fca5c..2c08ebcc156 100644
--- a/mula/scheduler/server/server.py
+++ b/mula/scheduler/server/server.py
@@ -19,7 +19,7 @@ class Server:
api: A fastapi.FastAPI object used for exposing API endpoints.
"""
- def __init__(self, ctx: context.AppContext, s: dict[str, schedulers.Scheduler]):
+ def __init__(self, ctx: context.AppContext, s: dict[str, schedulers.Scheduler]) -> None:
"""Initializer of the Server class.
Args:
@@ -45,7 +45,6 @@ def __init__(self, ctx: context.AppContext, s: dict[str, schedulers.Scheduler]):
# Set up API endpoints
handlers.SchedulerAPI(self.api, self.ctx, s)
- handlers.QueueAPI(self.api, self.ctx, s)
handlers.ScheduleAPI(self.api, self.ctx, s)
handlers.TaskAPI(self.api, self.ctx)
handlers.MetricsAPI(self.api, self.ctx)
diff --git a/mula/scheduler/storage/connection.py b/mula/scheduler/storage/connection.py
index dc381191528..4787afe44bf 100644
--- a/mula/scheduler/storage/connection.py
+++ b/mula/scheduler/storage/connection.py
@@ -10,7 +10,7 @@
class DBConn:
def __init__(self, dsn: str, pool_size: int = 25):
- self.logger: structlog.BoundLogger = structlog.get_logger(__name__)
+ self.logger: structlog.BoundLogger = structlog.getLogger(__name__)
self.dsn = dsn
self.pool_size = pool_size
diff --git a/mula/scheduler/storage/migrations/versions/0009_add_organisation.py b/mula/scheduler/storage/migrations/versions/0009_add_organisation.py
new file mode 100644
index 00000000000..5136c2a4a15
--- /dev/null
+++ b/mula/scheduler/storage/migrations/versions/0009_add_organisation.py
@@ -0,0 +1,48 @@
+"""Add organisation column to schedules and tasks
+
+Revision ID: 0009
+Revises: 0008
+Create Date: 2024-12-10 15:21:27.445743
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "0009"
+down_revision = "0008"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.add_column("schedules", sa.Column("organisation", sa.String(), nullable=True))
+ op.add_column("tasks", sa.Column("organisation", sa.String(), nullable=True))
+
+ conn = op.get_bind()
+ conn.execute(
+ sa.text(
+ """
+UPDATE schedules SET organisation = data->>'organization' WHERE data->>'organization' IS NOT NULL;
+UPDATE schedules SET organisation = data->'raw_data'->'boefje_meta'->>'organization' WHERE data->'raw_data'->'boefje_meta'->>'organization' IS NOT NULL;
+UPDATE schedules SET organisation = data->>'organisation_id' WHERE data->>'organisation_id' IS NOT NULL;
+
+UPDATE tasks SET organisation = data->>'organization' WHERE type = 'boefje';
+UPDATE tasks SET organisation = data->'raw_data'->'boefje_meta'->>'organization' WHERE type = 'normalizer';
+UPDATE tasks SET organisation = data->>'organisation_id' WHERE type = 'report';
+""" # noqa: E501
+ )
+ )
+
+ op.alter_column("schedules", "organisation", nullable=False)
+ op.alter_column("tasks", "organisation", nullable=False)
+ # ### end Alembic commands ###
+
+
+def downgrade():
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.drop_column("tasks", "organisation")
+ op.drop_column("schedules", "organisation")
+ # ### end Alembic commands ###
diff --git a/mula/scheduler/storage/storage.py b/mula/scheduler/storage/storage.py
deleted file mode 100644
index 7fe2f8d1438..00000000000
--- a/mula/scheduler/storage/storage.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import json
-from functools import partial
-
-import sqlalchemy
-import structlog
-
-from scheduler.config import settings
-
-from .errors import StorageError
-
-
-class DBConn:
- def __init__(self, dsn: str, pool_size: int = 25):
- self.logger: structlog.BoundLogger = structlog.getLogger(__name__)
-
- self.dsn = dsn
- self.pool_size = pool_size
-
- def connect(self) -> None:
- db_uri_redacted = sqlalchemy.engine.make_url(name_or_url=self.dsn).render_as_string(hide_password=True)
-
- pool_size = settings.Settings().db_connection_pool_size
-
- self.logger.debug(
- "Connecting to database %s with pool size %s...",
- self.dsn,
- pool_size,
- dsn=db_uri_redacted,
- pool_size=pool_size,
- )
-
- try:
- serializer = partial(json.dumps, default=str)
- self.engine = sqlalchemy.create_engine(
- self.dsn,
- pool_pre_ping=True,
- pool_size=pool_size,
- pool_recycle=300,
- json_serializer=serializer,
- connect_args={"options": "-c timezone=utc"},
- )
- except sqlalchemy.exc.SQLAlchemyError as e:
- self.logger.error("Failed to connect to database %s: %s", self.dsn, e, dsn=db_uri_redacted)
- raise StorageError("Failed to connect to database.")
-
- self.logger.debug("Connected to database %s.", db_uri_redacted, dsn=db_uri_redacted)
-
- try:
- self.session = sqlalchemy.orm.sessionmaker(bind=self.engine)
- except sqlalchemy.exc.SQLAlchemyError as e:
- self.logger.error("Failed to create session: %s", e)
- raise StorageError("Failed to create session.")
diff --git a/mula/scheduler/storage/stores/pq.py b/mula/scheduler/storage/stores/pq.py
index feb62bd01c7..b7c8951225c 100644
--- a/mula/scheduler/storage/stores/pq.py
+++ b/mula/scheduler/storage/stores/pq.py
@@ -1,8 +1,10 @@
from uuid import UUID
+from sqlalchemy import exc
+
from scheduler import models
from scheduler.storage import DBConn
-from scheduler.storage.errors import exception_handler
+from scheduler.storage.errors import StorageError, exception_handler
from scheduler.storage.filters import FilterRequest, apply_filter
from scheduler.storage.utils import retry
@@ -15,25 +17,33 @@ def __init__(self, dbconn: DBConn) -> None:
@retry()
@exception_handler
- def pop(self, scheduler_id: str, filters: FilterRequest | None = None) -> models.Task | None:
+ def pop(
+ self, scheduler_id: str | None = None, offset: int = 0, limit: int = 100, filters: FilterRequest | None = None
+ ) -> tuple[list[models.Task], int]:
with self.dbconn.session.begin() as session:
- query = (
- session.query(models.TaskDB)
- .filter(models.TaskDB.status == models.TaskStatus.QUEUED)
- .order_by(models.TaskDB.priority.asc())
- .order_by(models.TaskDB.created_at.asc())
- .filter(models.TaskDB.scheduler_id == scheduler_id)
- )
+ query = session.query(models.TaskDB).filter(models.TaskDB.status == models.TaskStatus.QUEUED)
+
+ if scheduler_id is not None:
+ query = query.filter(models.TaskDB.scheduler_id == scheduler_id)
if filters is not None:
query = apply_filter(models.TaskDB, query, filters)
- item_orm = query.first()
+ try:
+ count = query.count()
+ item_orm = (
+ query.order_by(models.TaskDB.priority.asc())
+ .order_by(models.TaskDB.created_at.asc())
+ .offset(offset)
+ .limit(limit)
+ .all()
+ )
+ except exc.ProgrammingError as e:
+ raise StorageError(f"Invalid filter: {e}") from e
- if item_orm is None:
- return None
+ items = [models.Task.model_validate(item_orm) for item_orm in item_orm]
- return models.Task.model_validate(item_orm)
+ return items, count
@retry()
@exception_handler
@@ -188,3 +198,14 @@ def clear(self, scheduler_id: str) -> None:
.filter(models.TaskDB.scheduler_id == scheduler_id)
.delete(),
)
+
+ @retry()
+ @exception_handler
+ def bulk_update_status(self, scheduler_id: str, item_ids: list[UUID], status: models.TaskStatus) -> None:
+ with self.dbconn.session.begin() as session:
+ (
+ session.query(models.TaskDB)
+ .filter(models.TaskDB.scheduler_id == scheduler_id)
+ .filter(models.TaskDB.id.in_([str(item_id) for item_id in item_ids]))
+ .update({"status": status.name}, synchronize_session=False),
+ )
diff --git a/mula/scheduler/storage/stores/task.py b/mula/scheduler/storage/stores/task.py
index 437e10ca538..d2a83adcfe8 100644
--- a/mula/scheduler/storage/stores/task.py
+++ b/mula/scheduler/storage/stores/task.py
@@ -136,7 +136,9 @@ def cancel_tasks(self, scheduler_id: str, task_ids: list[str]) -> None:
@retry()
@exception_handler
- def get_status_count_per_hour(self, scheduler_id: str | None = None) -> dict[str, dict[str, int]] | None:
+ def get_status_count_per_hour(
+ self, scheduler_id: str | None = None, organisation_id: str | None = None
+ ) -> dict[str, dict[str, int]] | None:
with self.dbconn.session.begin() as session:
query = (
session.query(
@@ -152,6 +154,9 @@ def get_status_count_per_hour(self, scheduler_id: str | None = None) -> dict[str
if scheduler_id is not None:
query = query.filter(models.TaskDB.scheduler_id == scheduler_id)
+ if organisation_id is not None:
+ query = query.filter(models.TaskDB.organisation == organisation_id)
+
results = query.all()
response: dict[str, dict[str, int]] = {}
@@ -166,7 +171,9 @@ def get_status_count_per_hour(self, scheduler_id: str | None = None) -> dict[str
@retry()
@exception_handler
- def get_status_counts(self, scheduler_id: str | None = None) -> dict[str, int] | None:
+ def get_status_counts(
+ self, scheduler_id: str | None = None, organisation_id: str | None = None
+ ) -> dict[str, int] | None:
with self.dbconn.session.begin() as session:
query = (
session.query(models.TaskDB.status, func.count(models.TaskDB.id).label("count"))
@@ -177,6 +184,9 @@ def get_status_counts(self, scheduler_id: str | None = None) -> dict[str, int] |
if scheduler_id is not None:
query = query.filter(models.TaskDB.scheduler_id == scheduler_id)
+ if organisation_id is not None:
+ query = query.filter(models.TaskDB.organisation == organisation_id)
+
results = query.all()
response = {k.value: 0 for k in models.TaskStatus}
diff --git a/mula/scheduler/utils/dict_utils.py b/mula/scheduler/utils/dict_utils.py
index af5bda651cf..6084ec058c9 100644
--- a/mula/scheduler/utils/dict_utils.py
+++ b/mula/scheduler/utils/dict_utils.py
@@ -34,6 +34,10 @@ def get(self, key: str, default: Any | None = None) -> Any:
except KeyError:
return default
+ def is_empty(self) -> bool:
+ with self.lock:
+ return len(self.cache) == 0
+
def reset(self) -> None:
with self.lock:
self.cache.clear()
diff --git a/mula/tests/integration/test_api.py b/mula/tests/integration/test_api.py
index 6eaa82086c2..0c927d6a3dd 100644
--- a/mula/tests/integration/test_api.py
+++ b/mula/tests/integration/test_api.py
@@ -64,7 +64,7 @@ def tearDown(self):
self.dbconn.engine.dispose()
-class APITestCase(APITemplateTestCase):
+class APISchedulerEndpointTestCase(APITemplateTestCase):
def test_get_schedulers(self):
response = self.client.get("/schedulers")
self.assertEqual(response.status_code, 200)
@@ -78,78 +78,12 @@ def test_get_scheduler_malformed_id(self):
response = self.client.get("/schedulers/123.123")
self.assertEqual(response.status_code, 404)
- def test_patch_scheduler(self):
- self.assertTrue(self.scheduler.is_enabled())
- response = self.client.patch(f"/schedulers/{self.scheduler.scheduler_id}", json={"enabled": False})
- self.assertEqual(200, response.status_code)
- self.assertFalse(response.json().get("enabled"))
- self.assertFalse(self.scheduler.is_enabled())
-
- def test_patch_scheduler_attr_not_found(self):
- response = self.client.patch(f"/schedulers/{self.scheduler.scheduler_id}", json={"not_found": "not found"})
- self.assertEqual(response.status_code, 400)
- self.assertEqual(response.json(), {"detail": "Bad request error occurred: no data to patch"})
-
- def test_patch_scheduler_not_found(self):
- mock_id = uuid.uuid4()
- response = self.client.patch(f"/schedulers/{mock_id}", json={"enabled": False})
- self.assertEqual(response.status_code, 404)
- self.assertEqual(response.json(), {"detail": f"Resource not found: Scheduler {mock_id} not found"})
-
- def test_patch_scheduler_disable(self):
- self.assertTrue(self.scheduler.is_enabled())
- response = self.client.patch(f"/schedulers/{self.scheduler.scheduler_id}", json={"enabled": False})
- self.assertEqual(200, response.status_code)
- self.assertFalse(response.json().get("enabled"))
- self.assertFalse(self.scheduler.is_enabled())
-
- # Try to push to queue
- item = create_task_in(0)
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=item)
- self.assertNotEqual(response.status_code, 201)
- self.assertEqual(0, self.scheduler.queue.qsize())
-
- def test_patch_scheduler_enable(self):
- # Disable queue first
- self.assertTrue(self.scheduler.is_enabled())
- response = self.client.patch(f"/schedulers/{self.scheduler.scheduler_id}", json={"enabled": False})
- self.assertEqual(200, response.status_code)
- self.assertFalse(response.json().get("enabled"))
- self.assertFalse(self.scheduler.is_enabled())
-
- # Enable again
- response = self.client.patch(f"/schedulers/{self.scheduler.scheduler_id}", json={"enabled": True})
- self.assertEqual(200, response.status_code)
- self.assertTrue(response.json().get("enabled"))
- self.assertTrue(self.scheduler.is_enabled())
-
- # Try to push to queue
- self.assertEqual(0, self.scheduler.queue.qsize())
- item = create_task_in(1)
-
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=item)
- self.assertEqual(response.status_code, 201)
- self.assertEqual(1, self.scheduler.queue.qsize())
-
- def test_get_queues(self):
- response = self.client.get("/queues")
- self.assertEqual(response.status_code, 200)
-
- def test_get_queue(self):
- response = self.client.get(f"/queues/{self.scheduler.scheduler_id}")
- self.assertEqual(response.status_code, 200)
- self.assertEqual(response.json().get("id"), self.scheduler.scheduler_id)
-
- def test_get_queue_malformed_id(self):
- response = self.client.get("/queues/123.123")
- self.assertEqual(response.status_code, 404)
-
def test_push_queue(self):
self.assertEqual(0, self.scheduler.queue.qsize())
- item = create_task_in(1)
+ item = create_task_in(1, self.organisation.id)
- response_post = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=item)
+ response_post = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=item)
self.assertEqual(201, response_post.status_code)
self.assertEqual(1, self.scheduler.queue.qsize())
self.assertIsNotNone(response_post.json().get("id"))
@@ -166,23 +100,24 @@ def test_push_queue(self):
def test_push_incorrect_item_type(self):
response = self.client.post(
- f"/queues/{self.scheduler.scheduler_id}/push", json={"priority": 0, "item": "not a task"}
+ f"/schedulers/{self.scheduler.scheduler_id}/push", json={"organisation": self.organisation.id, "data": {}}
)
self.assertEqual(response.status_code, 400)
+ self.assertEqual(response.json(), {"detail": "Bad request error occurred: malformed item"})
def test_push_queue_full(self):
# Set maxsize of the queue to 1
self.scheduler.queue.maxsize = 1
# Add one task to the queue
- first_item = create_task_in(1)
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=first_item)
+ first_item = create_task_in(1, self.organisation.id)
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=first_item)
self.assertEqual(response.status_code, 201)
self.assertEqual(1, self.scheduler.queue.qsize())
# Try to add another task to the queue through the api
- second_item = create_task_in(2)
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=second_item)
+ second_item = create_task_in(2, self.organisation.id)
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=second_item)
self.assertEqual(response.status_code, 429)
self.assertEqual(1, self.scheduler.queue.qsize())
@@ -191,14 +126,14 @@ def test_push_queue_full_high_priority(self):
self.scheduler.queue.maxsize = 1
# Add one task to the queue
- first_item = create_task_in(1)
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=first_item)
+ first_item = create_task_in(1, self.organisation.id)
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=first_item)
self.assertEqual(response.status_code, 201)
self.assertEqual(1, self.scheduler.queue.qsize())
# Try to add another task to the queue through the api
- second_item = create_task_in(1)
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=second_item)
+ second_item = create_task_in(1, self.organisation.id)
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=second_item)
self.assertEqual(response.status_code, 201)
self.assertEqual(2, self.scheduler.queue.qsize())
@@ -212,13 +147,13 @@ def test_push_replace_not_allowed(self):
self.scheduler.queue.allow_priority_updates = False
# Add one task to the queue
- initial_item = create_task_in(1)
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=initial_item)
+ initial_item = create_task_in(1, self.organisation.id)
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=initial_item)
self.assertEqual(response.status_code, 201)
self.assertEqual(1, self.scheduler.queue.qsize())
# Add the same item again through the api
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=initial_item)
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=initial_item)
# The queue should still have one item
self.assertEqual(response.status_code, 409)
@@ -230,13 +165,13 @@ def test_push_replace_allowed(self):
self.scheduler.queue.allow_replace = True
# Add one task to the queue
- initial_item = create_task_in(1)
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=initial_item)
+ initial_item = create_task_in(1, self.organisation.id)
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=initial_item)
self.assertEqual(response.status_code, 201)
self.assertEqual(1, self.scheduler.queue.qsize())
# Add the same item again through the api
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", json=response.json())
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", json=response.json())
# The queue should have one item
self.assertEqual(response.status_code, 201)
@@ -252,8 +187,8 @@ def test_push_updates_not_allowed(self):
self.scheduler.queue.allow_priority_updates = False
# Add one task to the queue
- initial_item = create_task_in(1)
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=initial_item)
+ initial_item = create_task_in(1, self.organisation.id)
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=initial_item)
self.assertEqual(response.status_code, 201)
self.assertEqual(1, self.scheduler.queue.qsize())
@@ -262,7 +197,9 @@ def test_push_updates_not_allowed(self):
updated_item.data["name"] = "updated-name"
# Try to update the item through the api
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=updated_item.model_dump_json())
+ response = self.client.post(
+ f"/schedulers/{self.scheduler.scheduler_id}/push", data=updated_item.model_dump_json()
+ )
# The queue should still have one item
self.assertEqual(response.status_code, 409)
@@ -274,8 +211,8 @@ def test_push_updates_allowed(self):
self.scheduler.queue.allow_updates = True
# Add one task to the queue
- initial_item = create_task_in(1)
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=initial_item)
+ initial_item = create_task_in(1, self.organisation.id)
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=initial_item)
self.assertEqual(response.status_code, 201)
self.assertEqual(1, self.scheduler.queue.qsize())
@@ -284,7 +221,9 @@ def test_push_updates_allowed(self):
updated_item.data["name"] = "updated-name"
# Try to update the item through the api
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=updated_item.model_dump_json())
+ response = self.client.post(
+ f"/schedulers/{self.scheduler.scheduler_id}/push", data=updated_item.model_dump_json()
+ )
self.assertEqual(response.status_code, 201)
# The queue should have one item
@@ -301,8 +240,8 @@ def test_push_priority_updates_not_allowed(self):
self.scheduler.queue.allow_priority_updates = False
# Add one task to the queue
- initial_item = create_task_in(1)
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=initial_item)
+ initial_item = create_task_in(1, self.organisation.id)
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=initial_item)
self.assertEqual(response.status_code, 201)
self.assertEqual(1, self.scheduler.queue.qsize())
@@ -311,7 +250,9 @@ def test_push_priority_updates_not_allowed(self):
updated_item.priority = 2
# Try to update the item through the api
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=updated_item.model_dump_json())
+ response = self.client.post(
+ f"/schedulers/{self.scheduler.scheduler_id}/push", data=updated_item.model_dump_json()
+ )
# The queue should still have one item
self.assertEqual(response.status_code, 409)
@@ -328,8 +269,8 @@ def test_update_priority_higher(self):
self.scheduler.queue.allow_priority_updates = True
# Add one task to the queue
- initial_item = create_task_in(2)
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=initial_item)
+ initial_item = create_task_in(2, self.organisation.id)
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=initial_item)
self.assertEqual(response.status_code, 201)
# Update priority of the item
@@ -337,7 +278,9 @@ def test_update_priority_higher(self):
updated_item.priority = 1
# Try to update the item through the api
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=updated_item.model_dump_json())
+ response = self.client.post(
+ f"/schedulers/{self.scheduler.scheduler_id}/push", data=updated_item.model_dump_json()
+ )
self.assertEqual(response.status_code, 201)
# The queue should have one item
@@ -356,8 +299,8 @@ def test_update_priority_lower(self):
self.scheduler.queue.allow_priority_updates = True
# Add one task to the queue
- initial_item = create_task_in(1)
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=initial_item)
+ initial_item = create_task_in(1, self.organisation.id)
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=initial_item)
self.assertEqual(response.status_code, 201)
# Update priority of the item
@@ -365,7 +308,9 @@ def test_update_priority_lower(self):
updated_item.priority = 2
# Try to update the item through the api
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=updated_item.model_dump_json())
+ response = self.client.post(
+ f"/schedulers/{self.scheduler.scheduler_id}/push", data=updated_item.model_dump_json()
+ )
self.assertEqual(response.status_code, 201)
# The queue should have one item
@@ -376,135 +321,227 @@ def test_update_priority_lower(self):
def test_pop_queue(self):
# Add one task to the queue
- initial_item = create_task_in(1)
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=initial_item)
+ initial_item = create_task_in(1, self.organisation.id)
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=initial_item)
initial_item_id = response.json().get("id")
self.assertEqual(response.status_code, 201)
self.assertEqual(1, self.scheduler.queue.qsize())
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/pop")
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/pop")
self.assertEqual(200, response.status_code)
- self.assertEqual(initial_item_id, response.json().get("id"))
+ self.assertEqual(1, response.json().get("count"))
+ self.assertEqual(initial_item_id, response.json().get("results")[0].get("id"))
self.assertEqual(0, self.scheduler.queue.qsize())
+ # Status of the item should be DISPATCHED
+ get_item = self.client.get(f"/tasks/{initial_item_id}")
+ self.assertEqual(get_item.json().get("status"), models.TaskStatus.DISPATCHED.name.lower())
+
+ def test_pop_queue_multiple(self):
+ # Add one task to the queue
+ first_item = create_task_in(1, self.organisation.id)
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=first_item)
+ first_item_id = response.json().get("id")
+ self.assertEqual(response.status_code, 201)
+ self.assertEqual(1, self.scheduler.queue.qsize())
+
+ # Add second item to the queue
+ second_item = create_task_in(2, self.organisation.id)
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=second_item)
+ second_item_id = response.json().get("id")
+ self.assertEqual(response.status_code, 201)
+ self.assertEqual(2, self.scheduler.queue.qsize())
+
+ # Should get two items, and queue should be empty
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/pop")
+ self.assertEqual(200, response.status_code)
+ self.assertEqual(2, response.json().get("count"))
+ self.assertEqual(first_item_id, response.json().get("results")[0].get("id"))
+ self.assertEqual(second_item_id, response.json().get("results")[1].get("id"))
+ self.assertEqual(0, self.scheduler.queue.qsize())
+
+ # Status of the items should be DISPATCHED
+ get_first_item = self.client.get(f"/tasks/{first_item_id}")
+ get_second_item = self.client.get(f"/tasks/{second_item_id}")
+ self.assertEqual(get_first_item.json().get("status"), models.TaskStatus.DISPATCHED.name.lower())
+ self.assertEqual(get_second_item.json().get("status"), models.TaskStatus.DISPATCHED.name.lower())
+
+ def test_pop_queue_multiple_pagination(self):
+ # Add 10 tasks to the queue
+ for i in range(10):
+ item = create_task_in(1, self.organisation.id)
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=item)
+ self.assertEqual(response.status_code, 201)
+
+ # Should get 5 items, and queue should have 5 items
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/pop?limit=5")
+ self.assertEqual(200, response.status_code)
+ self.assertEqual(10, response.json().get("count"))
+ self.assertEqual(5, self.scheduler.queue.qsize())
+ self.assertEqual(5, len(response.json().get("results")))
+
+ # Status of the items should be DISPATCHED
+ for item in response.json().get("results"):
+ get_item = self.client.get(f"/tasks/{item.get('id')}")
+ self.assertEqual(get_item.json().get("status"), models.TaskStatus.DISPATCHED.name.lower())
+
+ # Should get 5 items, and queue should be empty
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/pop?limit=5")
+ self.assertEqual(200, response.status_code)
+ self.assertEqual(5, response.json().get("count"))
+ self.assertEqual(0, self.scheduler.queue.qsize())
+
+ # Status of the items should be DISPATCHED
+ for item in response.json().get("results"):
+ get_item = self.client.get(f"/tasks/{item.get('id')}")
+ self.assertEqual(get_item.json().get("status"), models.TaskStatus.DISPATCHED.name.lower())
+
def test_pop_queue_not_found(self):
mock_id = uuid.uuid4()
- response = self.client.post(f"/queues/{mock_id}/pop")
- self.assertEqual(404, response.status_code)
- self.assertEqual({"detail": f"Resource not found: queue not found, by queue_id: {mock_id}"}, response.json())
+ response = self.client.post(f"/schedulers/{mock_id}/pop")
+ self.assertEqual(200, response.status_code)
+ self.assertEqual(0, response.json().get("count"))
- def test_pop_queue_filters(self):
+ def test_pop_queue_filters_two_items(self):
# Add one task to the queue
- first_item = create_task_in(1, data=functions.TestModel(id="123", name="test"))
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=first_item)
+ first_item = create_task_in(1, self.organisation.id, data=functions.TestModel(id="123", name="test"))
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=first_item)
first_item_id = response.json().get("id")
self.assertEqual(response.status_code, 201)
self.assertEqual(1, self.scheduler.queue.qsize())
# Add second item to the queue
- second_item = create_task_in(2, data=functions.TestModel(id="456", name="test"))
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=second_item)
+ second_item = create_task_in(2, self.organisation.id, data=functions.TestModel(id="456", name="test"))
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=second_item)
second_item_id = response.json().get("id")
self.assertEqual(response.status_code, 201)
self.assertEqual(2, self.scheduler.queue.qsize())
- # Should get the first item
+ # Should get two items, and queue should be empty
response = self.client.post(
- f"/queues/{self.scheduler.scheduler_id}/pop",
+ f"/schedulers/{self.scheduler.scheduler_id}/pop",
json={"filters": [{"column": "data", "field": "name", "operator": "eq", "value": "test"}]},
)
self.assertEqual(200, response.status_code)
- self.assertEqual(first_item_id, response.json().get("id"))
+ self.assertEqual(2, response.json().get("count"))
+ self.assertEqual(first_item_id, response.json().get("results")[0].get("id"))
+ self.assertEqual(second_item_id, response.json().get("results")[1].get("id"))
+ self.assertEqual(0, self.scheduler.queue.qsize())
+
+ # Status of the items should be DISPATCHED
+ get_first_item = self.client.get(f"/tasks/{first_item_id}")
+ get_second_item = self.client.get(f"/tasks/{second_item_id}")
+ self.assertEqual(get_first_item.json().get("status"), models.TaskStatus.DISPATCHED.name.lower())
+ self.assertEqual(get_second_item.json().get("status"), models.TaskStatus.DISPATCHED.name.lower())
+
+ def test_pop_queue_filters_one_item(self):
+ # Add one task to the queue
+ first_item = create_task_in(1, self.organisation.id, data=functions.TestModel(id="123", name="test"))
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=first_item)
+ first_item_id = response.json().get("id")
+ self.assertEqual(response.status_code, 201)
self.assertEqual(1, self.scheduler.queue.qsize())
- # Should not return any items
+ # Add second item to the queue
+ second_item = create_task_in(2, self.organisation.id, data=functions.TestModel(id="456", name="test"))
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=second_item)
+ second_item_id = response.json().get("id")
+ self.assertEqual(response.status_code, 201)
+ self.assertEqual(2, self.scheduler.queue.qsize())
+
+ # Should get the first item, and should still be an item on the queue
response = self.client.post(
- f"/queues/{self.scheduler.scheduler_id}/pop",
+ f"/schedulers/{self.scheduler.scheduler_id}/pop",
json={"filters": [{"column": "data", "field": "id", "operator": "eq", "value": "123"}]},
)
- self.assertEqual(404, response.status_code)
- self.assertEqual(
- response.json(), {"detail": "Resource not found: could not pop item from queue, check your filters"}
- )
+ self.assertEqual(200, response.status_code)
+ self.assertEqual(1, response.json().get("count"))
+ self.assertEqual(first_item_id, response.json().get("results")[0].get("id"))
self.assertEqual(1, self.scheduler.queue.qsize())
- # Should get the second item
+ # Should get the second item, and should be no items on the queue
response = self.client.post(
- f"/queues/{self.scheduler.scheduler_id}/pop",
- json={"filters": [{"column": "data", "field": "name", "operator": "eq", "value": "test"}]},
+ f"/schedulers/{self.scheduler.scheduler_id}/pop",
+ json={"filters": [{"column": "data", "field": "id", "operator": "eq", "value": "456"}]},
)
self.assertEqual(200, response.status_code)
- self.assertEqual(second_item_id, response.json().get("id"))
+ self.assertEqual(1, response.json().get("count"))
+ self.assertEqual(second_item_id, response.json().get("results")[0].get("id"))
self.assertEqual(0, self.scheduler.queue.qsize())
def test_pop_queue_filters_nested(self):
# Add one task to the queue
- first_item = create_task_in(1, data=functions.TestModel(id="123", name="test", categories=["foo", "bar"]))
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=first_item)
+ first_item = create_task_in(
+ 1, self.organisation.id, data=functions.TestModel(id="123", name="test", categories=["foo", "bar"])
+ )
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=first_item)
first_item_id = response.json().get("id")
self.assertEqual(response.status_code, 201)
self.assertEqual(1, self.scheduler.queue.qsize())
# Add second item to the queue
- second_item = create_task_in(2, data=functions.TestModel(id="456", name="test", categories=["baz", "bat"]))
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=second_item)
+ second_item = create_task_in(
+ 2, self.organisation.id, data=functions.TestModel(id="456", name="test", categories=["baz", "bat"])
+ )
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=second_item)
second_item_id = response.json().get("id")
self.assertEqual(response.status_code, 201)
self.assertEqual(2, self.scheduler.queue.qsize())
# Should get the first item
response = self.client.post(
- f"/queues/{self.scheduler.scheduler_id}/pop",
+ f"/schedulers/{self.scheduler.scheduler_id}/pop",
json={
"filters": [{"column": "data", "operator": "@>", "value": json.dumps({"categories": ["foo", "bar"]})}]
},
)
self.assertEqual(200, response.status_code)
- self.assertEqual(first_item_id, response.json().get("id"))
+ self.assertEqual(first_item_id, response.json().get("results")[0].get("id"))
self.assertEqual(1, self.scheduler.queue.qsize())
# Should not return any items
response = self.client.post(
- f"/queues/{self.scheduler.scheduler_id}/pop",
+ f"/schedulers/{self.scheduler.scheduler_id}/pop",
json={
"filters": [{"column": "data", "operator": "@>", "value": json.dumps({"categories": ["foo", "bar"]})}]
},
)
-
- self.assertEqual(404, response.status_code)
- self.assertEqual(
- response.json(), {"detail": "Resource not found: could not pop item from queue, check your filters"}
- )
+ self.assertEqual(200, response.status_code)
+ self.assertEqual(0, response.json().get("count"))
self.assertEqual(1, self.scheduler.queue.qsize())
# Should get the second item
response = self.client.post(
- f"/queues/{self.scheduler.scheduler_id}/pop",
+ f"/schedulers/{self.scheduler.scheduler_id}/pop",
json={
"filters": [{"column": "data", "operator": "@>", "value": json.dumps({"categories": ["baz", "bat"]})}]
},
)
self.assertEqual(200, response.status_code)
- self.assertEqual(second_item_id, response.json().get("id"))
+ self.assertEqual(second_item_id, response.json().get("results")[0].get("id"))
self.assertEqual(0, self.scheduler.queue.qsize())
def test_pop_queue_filters_nested_contained_by(self):
# Add one task to the queue
- first_item = create_task_in(1, data=functions.TestModel(id="123", name="test", categories=["foo", "bar"]))
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=first_item)
+ first_item = create_task_in(
+ 1, self.organisation.id, data=functions.TestModel(id="123", name="test", categories=["foo", "bar"])
+ )
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=first_item)
self.assertEqual(response.status_code, 201)
self.assertEqual(1, self.scheduler.queue.qsize())
# Add second item to the queue
- second_item = create_task_in(2, data=functions.TestModel(id="456", name="test", categories=["baz", "bat"]))
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=second_item)
+ second_item = create_task_in(
+ 2, self.organisation.id, data=functions.TestModel(id="456", name="test", categories=["baz", "bat"])
+ )
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=second_item)
second_item_id = response.json().get("id")
self.assertEqual(response.status_code, 201)
self.assertEqual(2, self.scheduler.queue.qsize())
# Test contained by
response = self.client.post(
- f"/queues/{self.scheduler.scheduler_id}/pop",
+ f"/schedulers/{self.scheduler.scheduler_id}/pop",
json={
"filters": [
{"column": "data", "operator": "<@", "field": "categories", "value": json.dumps(["baz", "bat"])}
@@ -513,13 +550,14 @@ def test_pop_queue_filters_nested_contained_by(self):
)
self.assertEqual(200, response.status_code)
- self.assertEqual(second_item_id, response.json().get("id"))
+ self.assertEqual(second_item_id, response.json().get("results")[0].get("id"))
self.assertEqual(1, self.scheduler.queue.qsize())
def test_pop_empty(self):
"""When queue is empty it should return an empty response"""
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/pop")
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/pop")
self.assertEqual(200, response.status_code)
+ self.assertEqual(0, response.json().get("count"))
class APITasksEndpointTestCase(APITemplateTestCase):
@@ -529,9 +567,10 @@ def setUp(self):
# Add one task to the queue
first_item = create_task_in(
1,
+ self.organisation.id,
data=functions.TestModel(id="123", name="test", child=functions.TestModel(id="123.123", name="test.child")),
)
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=first_item)
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=first_item)
initial_item_id = response.json().get("id")
self.assertEqual(response.status_code, 201)
self.assertEqual(1, self.scheduler.queue.qsize())
@@ -539,8 +578,8 @@ def setUp(self):
self.first_item_api = self.client.get(f"/tasks/{initial_item_id}").json()
# Add second item to the queue
- second_item = create_task_in(1, data=functions.TestModel(id="456", name="test"))
- response = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=second_item)
+ second_item = create_task_in(1, self.organisation.id, data=functions.TestModel(id="456", name="test"))
+ response = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=second_item)
second_item_id = response.json().get("id")
self.assertEqual(response.status_code, 201)
self.assertEqual(2, self.scheduler.queue.qsize())
@@ -548,8 +587,8 @@ def setUp(self):
self.second_item_api = self.client.get(f"/tasks/{second_item_id}").json()
def test_create_task(self):
- item = create_task_in(1)
- response_post = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=item)
+ item = create_task_in(1, self.organisation.id)
+ response_post = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=item)
self.assertEqual(201, response_post.status_code)
initial_item_id = response_post.json().get("id")
@@ -574,9 +613,9 @@ def test_get_tasks(self):
def test_get_task(self):
# First add a task
- item = create_task_in(1)
+ item = create_task_in(1, self.organisation.id)
- response_post = self.client.post(f"/queues/{self.scheduler.scheduler_id}/push", data=item)
+ response_post = self.client.post(f"/schedulers/{self.scheduler.scheduler_id}/push", data=item)
self.assertEqual(201, response_post.status_code)
initial_item_id = response_post.json().get("id")
@@ -732,7 +771,10 @@ def test_get_tasks_stats(self):
response = self.client.get("/tasks/stats")
self.assertEqual(200, response.status_code)
- response = self.client.get(f"/tasks/stats/{self.first_item_api.get('scheduler_id')}")
+ response = self.client.get(f"/tasks/stats?scheduler_id={self.first_item_api.get('scheduler_id')}")
+ self.assertEqual(200, response.status_code)
+
+ response = self.client.get(f"/tasks/stats?organisation_id={self.first_item_api.get('organisation_id')}")
self.assertEqual(200, response.status_code)
@@ -740,20 +782,22 @@ class APIScheduleEndpointTestCase(APITemplateTestCase):
def setUp(self):
super().setUp()
- self.first_item = functions.create_item(self.scheduler.scheduler_id, 1)
+ self.first_item = functions.create_task(self.scheduler.scheduler_id, self.organisation.id)
self.first_schedule = self.mock_ctx.datastores.schedule_store.create_schedule(
models.Schedule(
scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
hash=self.first_item.hash,
data=self.first_item.data,
deadline_at=datetime.now(timezone.utc) + timedelta(days=1),
)
)
- self.second_item = functions.create_item(self.scheduler.scheduler_id, 1)
+ self.second_item = functions.create_task(self.scheduler.scheduler_id, self.organisation.id)
self.second_schedule = self.mock_ctx.datastores.schedule_store.create_schedule(
models.Schedule(
scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
hash=self.second_item.hash,
data=self.second_item.data,
deadline_at=datetime.now(timezone.utc) + timedelta(days=2),
@@ -886,9 +930,15 @@ def test_list_schedules_min_and_max_created_at(self):
self.assertEqual(str(self.first_schedule.id), response.json()["results"][0]["id"])
def test_post_schedule(self):
- item = functions.create_item(self.scheduler.scheduler_id, 1)
+ item = functions.create_task(self.scheduler.scheduler_id, self.organisation.id)
response = self.client.post(
- "/schedules", json={"scheduler_id": item.scheduler_id, "schedule": "*/5 * * * *", "data": item.data}
+ "/schedules",
+ json={
+ "scheduler_id": item.scheduler_id,
+ "organisation": self.organisation.id,
+ "schedule": "*/5 * * * *",
+ "data": item.data,
+ },
)
self.assertEqual(201, response.status_code)
self.assertEqual(item.hash, response.json().get("hash"))
@@ -904,10 +954,16 @@ def test_post_schedule(self):
def test_post_schedule_explicit_deadline_at(self):
"""When a schedule is created, the deadline_at should be set if it is provided."""
- item = functions.create_item(self.scheduler.scheduler_id, 1)
+ item = functions.create_task(self.scheduler.scheduler_id, self.organisation.id)
now = datetime.now(timezone.utc)
response = self.client.post(
- "/schedules", json={"scheduler_id": item.scheduler_id, "data": item.data, "deadline_at": now.isoformat()}
+ "/schedules",
+ json={
+ "scheduler_id": item.scheduler_id,
+ "organisation": self.organisation.id,
+ "data": item.data,
+ "deadline_at": now.isoformat(),
+ },
)
self.assertEqual(201, response.status_code)
self.assertIsNone(response.json().get("schedule"))
@@ -920,54 +976,92 @@ def test_post_schedule_explicit_deadline_at(self):
def test_post_schedule_schedule_and_deadline_at_none(self):
"""When a schedule is created, both schedule and deadline_at should not be None."""
- item = functions.create_item(self.scheduler.scheduler_id, 1)
- response = self.client.post("/schedules", json={"scheduler_id": item.scheduler_id, "data": item.data})
+ item = functions.create_task(self.scheduler.scheduler_id, self.organisation.id)
+ response = self.client.post(
+ "/schedules",
+ json={"scheduler_id": item.scheduler_id, "organisation": self.organisation.id, "data": item.data},
+ )
self.assertEqual(400, response.status_code)
self.assertEqual(
{"detail": "Bad request error occurred: Either deadline_at or schedule must be provided"}, response.json()
)
def test_post_schedule_invalid_schedule(self):
- item = functions.create_item(self.scheduler.scheduler_id, 1)
+ item = functions.create_task(self.scheduler.scheduler_id, self.organisation.id)
response = self.client.post(
- "/schedules", json={"scheduler_id": item.scheduler_id, "schedule": "invalid", "data": item.data}
+ "/schedules",
+ json={
+ "scheduler_id": item.scheduler_id,
+ "organisation": self.organisation.id,
+ "schedule": "invalid",
+ "data": item.data,
+ },
)
self.assertEqual(400, response.status_code)
self.assertIn("validation error", response.json().get("detail"))
def test_post_schedule_invalid_scheduler_id(self):
- item = functions.create_item(self.scheduler.scheduler_id, 1)
+ item = functions.create_task(self.scheduler.scheduler_id, self.organisation.id)
response = self.client.post(
- "/schedules", json={"scheduler_id": "invalid", "schedule": "*/5 * * * *", "data": item.data}
+ "/schedules",
+ json={
+ "scheduler_id": "invalid",
+ "organisation": self.organisation.id,
+ "schedule": "*/5 * * * *",
+ "data": item.data,
+ },
)
self.assertEqual(400, response.status_code)
self.assertEqual({"detail": "Bad request error occurred: Scheduler invalid not found"}, response.json())
def test_post_schedule_invalid_data(self):
- item = functions.create_item(self.scheduler.scheduler_id, 1)
+ item = functions.create_task(self.scheduler.scheduler_id, self.organisation.id)
response = self.client.post(
- "/schedules", json={"scheduler_id": item.scheduler_id, "schedule": "*/5 * * * *", "data": "invalid"}
+ "/schedules",
+ json={
+ "scheduler_id": item.scheduler_id,
+ "organisation": self.organisation.id,
+ "schedule": "*/5 * * * *",
+ "data": "invalid",
+ },
)
self.assertEqual(422, response.status_code)
def test_post_schedule_invalid_data_type(self):
- item = functions.create_item(self.scheduler.scheduler_id, 1)
+ item = functions.create_task(self.scheduler.scheduler_id, self.organisation.id)
response = self.client.post(
"/schedules",
- json={"scheduler_id": item.scheduler_id, "schedule": "*/5 * * * *", "data": {"invalid": "invalid"}},
+ json={
+ "scheduler_id": item.scheduler_id,
+ "organisation": self.organisation.id,
+ "schedule": "*/5 * * * *",
+ "data": {"invalid": "invalid"},
+ },
)
self.assertEqual(400, response.status_code)
self.assertIn("validation error", response.json().get("detail"))
def test_post_schedule_hash_already_exists(self):
- item = functions.create_item(self.scheduler.scheduler_id, 1)
+ item = functions.create_task(self.scheduler.scheduler_id, self.organisation.id)
response = self.client.post(
- "/schedules", json={"scheduler_id": item.scheduler_id, "schedule": "*/5 * * * *", "data": item.data}
+ "/schedules",
+ json={
+ "scheduler_id": item.scheduler_id,
+ "organisation": self.organisation.id,
+ "schedule": "*/5 * * * *",
+ "data": item.data,
+ },
)
self.assertEqual(201, response.status_code)
response = self.client.post(
- "/schedules", json={"scheduler_id": item.scheduler_id, "schedule": "*/5 * * * *", "data": item.data}
+ "/schedules",
+ json={
+ "scheduler_id": item.scheduler_id,
+ "organisation": self.organisation.id,
+ "schedule": "*/5 * * * *",
+ "data": item.data,
+ },
)
self.assertEqual(409, response.status_code)
self.assertIn("schedule with the same hash already exists", response.json().get("detail"))
diff --git a/mula/tests/integration/test_app.py b/mula/tests/integration/test_app.py
index b75f0576883..aa8add3bb5a 100644
--- a/mula/tests/integration/test_app.py
+++ b/mula/tests/integration/test_app.py
@@ -40,105 +40,15 @@ def tearDown(self):
models.Base.metadata.drop_all(self.dbconn.engine)
self.dbconn.engine.dispose()
- def test_monitor_orgs_add(self):
- """Test that when a new organisation is added, a new scheduler is created"""
- # Arrange
- self.mock_ctx.services.katalogus.organisations = {
- "org-1": OrganisationFactory(id="org-1"),
- "org-2": OrganisationFactory(id="org-2"),
- }
-
- # Act
- self.app.monitor_organisations()
-
- # Assert: six schedulers should have been created for two organisations
- self.assertEqual(6, len(self.app.schedulers.keys()))
- self.assertEqual(6, len(self.app.server.schedulers.keys()))
-
- scheduler_org_ids = {s.organisation.id for s in self.app.schedulers.values()}
- self.assertEqual({"org-1", "org-2"}, scheduler_org_ids)
-
- def test_monitor_orgs_remove(self):
- """Test that when an organisation is removed, the scheduler is removed"""
- # Arrange
- self.mock_ctx.services.katalogus.organisations = {
- "org-1": OrganisationFactory(id="org-1"),
- "org-2": OrganisationFactory(id="org-2"),
- }
-
- # Act
- self.app.monitor_organisations()
-
- # Assert: six schedulers should have been created for two organisations
- self.assertEqual(6, len(self.app.schedulers.keys()))
- self.assertEqual(6, len(self.app.server.schedulers.keys()))
-
- scheduler_org_ids = {s.organisation.id for s in self.app.schedulers.values()}
- self.assertEqual({"org-1", "org-2"}, scheduler_org_ids)
-
- # Arrange
- self.mock_ctx.services.katalogus.organisations = {}
-
- # Act
- self.app.monitor_organisations()
-
- # Assert
- self.assertEqual(0, len(self.app.schedulers.keys()))
- self.assertEqual(0, len(self.app.server.schedulers.keys()))
-
- scheduler_org_ids = {s.organisation.id for s in self.app.schedulers.values()}
- self.assertEqual(set(), scheduler_org_ids)
-
- def test_monitor_orgs_add_and_remove(self):
- """Test that when an organisation is added and removed, the scheduler
- is removed"""
- # Arrange
- self.mock_ctx.services.katalogus.organisations = {
- "org-1": OrganisationFactory(id="org-1"),
- "org-2": OrganisationFactory(id="org-2"),
- }
-
- # Act
- self.app.monitor_organisations()
-
- # Assert: six schedulers should have been created for two organisations
- self.assertEqual(6, len(self.app.schedulers.keys()))
- self.assertEqual(6, len(self.app.server.schedulers.keys()))
-
- scheduler_org_ids = {s.organisation.id for s in self.app.schedulers.values()}
- self.assertEqual({"org-1", "org-2"}, scheduler_org_ids)
-
- # Arrange
- self.mock_ctx.services.katalogus.organisations = {
- "org-1": OrganisationFactory(id="org-1"),
- "org-3": OrganisationFactory(id="org-3"),
- }
-
- # Act
- self.app.monitor_organisations()
-
- # Assert
- self.assertEqual(6, len(self.app.schedulers.keys()))
- self.assertEqual(6, len(self.app.server.schedulers.keys()))
-
- scheduler_org_ids = {s.organisation.id for s in self.app.schedulers.values()}
- self.assertEqual({"org-1", "org-3"}, scheduler_org_ids)
-
def test_shutdown(self):
"""Test that the app shuts down gracefully"""
# Arrange
self.mock_ctx.services.katalogus.organisations = {"org-1": OrganisationFactory(id="org-1")}
-
self.app.start_schedulers()
- self.app.start_monitors()
# Shutdown the app
self.app.shutdown()
- # Assert that the schedulers have been stopped
- for s in self.app.schedulers.copy().values():
- self.assertFalse(s.is_alive())
-
# Assert that all threads have been stopped
# for thread in self.app.threads:
for t in threading.enumerate():
diff --git a/mula/tests/integration/test_boefje_scheduler.py b/mula/tests/integration/test_boefje_scheduler.py
index 9fcc9585ce6..b6411a3eea4 100644
--- a/mula/tests/integration/test_boefje_scheduler.py
+++ b/mula/tests/integration/test_boefje_scheduler.py
@@ -56,10 +56,10 @@ def setUp(self):
)
# Scheduler
+ self.scheduler = schedulers.BoefjeScheduler(self.mock_ctx)
+
+ # Organisation
self.organisation = OrganisationFactory()
- self.scheduler = schedulers.BoefjeScheduler(
- ctx=self.mock_ctx, scheduler_id=self.organisation.id, organisation=self.organisation
- )
def tearDown(self):
self.scheduler.stop()
@@ -88,6 +88,21 @@ def setUp(self):
def tearDown(self):
mock.patch.stopall()
+ def test_run(self):
+ """When the scheduler is started, the run method should be called.
+ And the scheduler should start the threads.
+ """
+ # Act
+ self.scheduler.run()
+
+ # Assert: threads started
+ thread_ids = ["BoefjeScheduler-mutations", "BoefjeScheduler-new_boefjes", "BoefjeScheduler-rescheduling"]
+ for thread in self.scheduler.threads:
+ self.assertIn(thread.name, thread_ids)
+ self.assertTrue(thread.is_alive())
+
+ self.scheduler.stop()
+
def test_is_allowed_to_run(self):
# Arrange
scan_profile = ScanProfileFactory(level=0)
@@ -156,7 +171,9 @@ def test_has_boefje_task_started_running_datastore_running(self):
boefje = BoefjeFactory()
boefje_task = models.BoefjeTask(boefje=boefje, input_ooi=ooi.primary_key, organization=self.organisation.id)
- task = functions.create_task(scheduler_id=self.scheduler.scheduler_id, data=boefje_task)
+ task = functions.create_task(
+ scheduler_id=self.scheduler.scheduler_id, data=boefje_task, organisation=self.organisation.id
+ )
# Mock
self.mock_get_latest_task_by_hash.return_value = task
@@ -180,6 +197,7 @@ def test_has_boefje_task_started_running_datastore_not_running(self):
task_db_first = models.Task(
scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
priority=1,
status=models.TaskStatus.COMPLETED,
type=models.BoefjeTask.type,
@@ -191,6 +209,7 @@ def test_has_boefje_task_started_running_datastore_not_running(self):
task_db_second = models.Task(
scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
priority=1,
type=models.BoefjeTask.type,
hash=boefje_task.hash,
@@ -294,11 +313,12 @@ def test_has_boefje_task_started_running_stalled_before_grace_period(self):
task_db = models.Task(
scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
priority=1,
+ status=models.TaskStatus.DISPATCHED,
type=models.BoefjeTask.type,
hash=boefje_task.hash,
data=boefje_task.model_dump(),
- status=models.TaskStatus.DISPATCHED,
created_at=datetime.now(timezone.utc),
modified_at=datetime.now(timezone.utc),
)
@@ -321,6 +341,7 @@ def test_has_boefje_task_started_running_stalled_after_grace_period(self):
task_db = models.Task(
scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
priority=1,
status=models.TaskStatus.DISPATCHED,
type=models.BoefjeTask.type,
@@ -351,6 +372,7 @@ def test_has_boefje_task_started_running_mismatch_before_grace_period(self):
task_db = models.Task(
scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
priority=1,
status=models.TaskStatus.COMPLETED,
type=models.BoefjeTask.type,
@@ -384,6 +406,7 @@ def test_has_boefje_task_started_running_mismatch_after_grace_period(self):
task_db = models.Task(
scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
priority=1,
status=models.TaskStatus.COMPLETED,
type=models.BoefjeTask.type,
@@ -412,6 +435,7 @@ def test_has_boefje_task_grace_period_passed_datastore_passed(self):
task_db = models.Task(
scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
priority=1,
status=models.TaskStatus.COMPLETED,
type=models.BoefjeTask.type,
@@ -443,6 +467,7 @@ def test_has_boefje_task_grace_period_passed_datastore_not_passed(self):
task_db = models.Task(
scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
priority=1,
status=models.TaskStatus.COMPLETED,
type=models.BoefjeTask.type,
@@ -472,6 +497,7 @@ def test_has_boefje_task_grace_period_passed_bytes_passed(self):
task_db = models.Task(
scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
priority=1,
status=models.TaskStatus.COMPLETED,
type=models.BoefjeTask.type,
@@ -507,6 +533,7 @@ def test_has_boefje_task_grace_period_passed_bytes_not_passed(self):
task_db = models.Task(
scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
priority=1,
status=models.TaskStatus.COMPLETED,
type=models.BoefjeTask.type,
@@ -531,7 +558,7 @@ def test_has_boefje_task_grace_period_passed_bytes_not_passed(self):
# Assert
self.assertFalse(has_passed)
- def test_push_task(self):
+ def test_push_boefje_task(self):
# Arrange
scan_profile = ScanProfileFactory(level=0)
ooi = OOIFactory(scan_profile=scan_profile)
@@ -549,12 +576,12 @@ def test_push_task(self):
self.mock_get_plugin.return_value = PluginFactory(scan_level=0, consumes=[ooi.object_type])
# Act
- self.scheduler.push_boefje_task(boefje_task)
+ self.scheduler.push_boefje_task(boefje_task, self.organisation.id)
# Assert
self.assertEqual(1, self.scheduler.queue.qsize())
- def test_push_task_no_ooi(self):
+ def test_push_boefje_task_no_ooi(self):
# Arrange
boefje = BoefjeFactory()
@@ -568,7 +595,7 @@ def test_push_task_no_ooi(self):
self.mock_get_plugin.return_value = PluginFactory(scan_level=0)
# Act
- self.scheduler.push_boefje_task(boefje_task)
+ self.scheduler.push_boefje_task(boefje_task, self.organisation.id)
# Assert
self.assertEqual(1, self.scheduler.queue.qsize())
@@ -578,7 +605,7 @@ def test_push_task_no_ooi(self):
@mock.patch("scheduler.schedulers.BoefjeScheduler.has_boefje_task_grace_period_passed")
@mock.patch("scheduler.schedulers.BoefjeScheduler.is_item_on_queue_by_hash")
@mock.patch("scheduler.context.AppContext.datastores.task_store.get_latest_task_by_hash")
- def test_push_task_queue_full(
+ def test_push_boefje_task_queue_full(
self,
mock_get_latest_task_by_hash,
mock_is_item_on_queue_by_hash,
@@ -610,15 +637,15 @@ def test_push_task_queue_full(
self.mock_get_plugin.return_value = PluginFactory(scan_level=0, consumes=[ooi.object_type])
# Act
- self.scheduler.push_boefje_task(boefje_task)
+ self.scheduler.push_boefje_task(boefje_task, self.organisation.id)
# Assert
self.assertEqual(1, self.scheduler.queue.qsize())
with capture_logs() as cm:
- self.scheduler.push_boefje_task(boefje_task)
+ self.scheduler.push_boefje_task(boefje_task, self.organisation.id)
- self.assertIn("Could not add task to queue, queue was full", cm[-1].get("event"))
+ self.assertIn("Queue is full", cm[-1].get("event"))
self.assertEqual(1, self.scheduler.queue.qsize())
@mock.patch("scheduler.schedulers.BoefjeScheduler.has_boefje_task_stalled")
@@ -627,7 +654,7 @@ def test_push_task_queue_full(
@mock.patch("scheduler.schedulers.BoefjeScheduler.has_boefje_task_grace_period_passed")
@mock.patch("scheduler.schedulers.BoefjeScheduler.is_item_on_queue_by_hash")
@mock.patch("scheduler.context.AppContext.datastores.task_store.get_tasks_by_hash")
- def test_push_task_stalled(
+ def test_push_boefje_task_stalled(
self,
mock_get_tasks_by_hash,
mock_is_item_on_queue_by_hash,
@@ -646,6 +673,7 @@ def test_push_task_stalled(
task = models.Task(
scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
priority=1,
type=models.BoefjeTask.type,
hash=boefje_task.hash,
@@ -654,13 +682,11 @@ def test_push_task_stalled(
modified_at=datetime.now(timezone.utc),
)
- item = functions.create_item(scheduler_id=self.organisation.id, priority=1, task=task)
-
# Mocks
self.mock_get_plugin.return_value = PluginFactory(scan_level=0, consumes=[ooi.object_type])
# Act
- self.scheduler.push_item_to_queue(item)
+ self.scheduler.push_item_to_queue(task)
# Assert: task should be on priority queue
task_pq = models.BoefjeTask(**self.scheduler.queue.peek(0).data)
@@ -669,16 +695,16 @@ def test_push_task_stalled(
self.assertEqual(boefje_task.boefje.id, task_pq.boefje.id)
# Assert: task should be in datastore, and queued
- task_db = self.mock_ctx.datastores.task_store.get_task(item.id)
- self.assertEqual(task_db.id, item.id)
+ task_db = self.mock_ctx.datastores.task_store.get_task(task.id)
+ self.assertEqual(task_db.id, task.id)
self.assertEqual(task_db.status, models.TaskStatus.QUEUED)
# Act
self.scheduler.pop_item_from_queue()
# Assert: task should be in datastore, and dispatched
- task_db = self.mock_ctx.datastores.task_store.get_task(item.id)
- self.assertEqual(task_db.id, item.id)
+ task_db = self.mock_ctx.datastores.task_store.get_task(task.id)
+ self.assertEqual(task_db.id, task.id)
self.assertEqual(task_db.status, models.TaskStatus.DISPATCHED)
# Mocks
@@ -691,11 +717,11 @@ def test_push_task_stalled(
mock_get_tasks_by_hash.return_value = None
# Act
- self.scheduler.push_boefje_task(boefje_task)
+ self.scheduler.push_boefje_task(boefje_task, self.organisation.id)
# Assert: task should be in datastore, and failed
- task_db = self.mock_ctx.datastores.task_store.get_task(item.id)
- self.assertEqual(task_db.id, item.id)
+ task_db = self.mock_ctx.datastores.task_store.get_task(task.id)
+ self.assertEqual(task_db.id, task.id)
self.assertEqual(task_db.status, models.TaskStatus.FAILED)
# Assert: new task should be queued
@@ -715,6 +741,7 @@ def test_post_push(self):
task = models.Task(
scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
priority=1,
type=models.BoefjeTask.type,
hash=boefje_task.hash,
@@ -723,12 +750,10 @@ def test_post_push(self):
modified_at=datetime.now(timezone.utc),
)
- item = functions.create_item(scheduler_id=self.organisation.id, priority=1, task=task)
-
self.mock_get_plugin.return_value = PluginFactory(scan_level=0, consumes=[ooi.object_type])
# Act
- self.scheduler.push_item_to_queue(item)
+ self.scheduler.push_item_to_queue(task)
# Task should be on priority queue
task_pq = models.BoefjeTask(**self.scheduler.queue.peek(0).data)
@@ -737,8 +762,8 @@ def test_post_push(self):
self.assertEqual(boefje_task.boefje.id, task_pq.boefje.id)
# Task should be in datastore, and queued
- task_db = self.mock_ctx.datastores.task_store.get_task(item.id)
- self.assertEqual(task_db.id, item.id)
+ task_db = self.mock_ctx.datastores.task_store.get_task(task.id)
+ self.assertEqual(task_db.id, task.id)
self.assertEqual(task_db.status, models.TaskStatus.QUEUED)
# Schedule should be in datastore
@@ -764,6 +789,7 @@ def test_post_push_boefje_cron(self):
task = models.Task(
scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
priority=1,
type=models.BoefjeTask.type,
hash=boefje_task.hash,
@@ -772,12 +798,10 @@ def test_post_push_boefje_cron(self):
modified_at=datetime.now(timezone.utc),
)
- item = functions.create_item(scheduler_id=self.organisation.id, priority=1, task=task)
-
self.mock_get_plugin.return_value = PluginFactory(scan_level=0, consumes=[ooi.object_type], cron=cron)
# Act
- self.scheduler.push_item_to_queue(item)
+ self.scheduler.push_item_to_queue(task)
# Task should be on priority queue
task_pq = models.BoefjeTask(**self.scheduler.queue.peek(0).data)
@@ -786,8 +810,8 @@ def test_post_push_boefje_cron(self):
self.assertEqual(boefje_task.boefje.id, task_pq.boefje.id)
# Task should be in datastore, and queued
- task_db = self.mock_ctx.datastores.task_store.get_task(item.id)
- self.assertEqual(task_db.id, item.id)
+ task_db = self.mock_ctx.datastores.task_store.get_task(task.id)
+ self.assertEqual(task_db.id, task.id)
self.assertEqual(task_db.status, models.TaskStatus.QUEUED)
# Schedule should be in datastore
@@ -819,6 +843,7 @@ def test_post_push_boefje_interval(self):
task = models.Task(
scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
priority=1,
type=models.BoefjeTask.type,
hash=boefje_task.hash,
@@ -827,12 +852,10 @@ def test_post_push_boefje_interval(self):
modified_at=datetime.now(timezone.utc),
)
- item = functions.create_item(scheduler_id=self.organisation.id, priority=1, task=task)
-
self.mock_get_plugin.return_value = PluginFactory(scan_level=0, consumes=[ooi.object_type], interval=1500)
# Act
- self.scheduler.push_item_to_queue(item)
+ self.scheduler.push_item_to_queue(task)
# Task should be on priority queue
task_pq = models.BoefjeTask(**self.scheduler.queue.peek(0).data)
@@ -841,8 +864,8 @@ def test_post_push_boefje_interval(self):
self.assertEqual(boefje_task.boefje.id, task_pq.boefje.id)
# Task should be in datastore, and queued
- task_db = self.mock_ctx.datastores.task_store.get_task(item.id)
- self.assertEqual(task_db.id, item.id)
+ task_db = self.mock_ctx.datastores.task_store.get_task(task.id)
+ self.assertEqual(task_db.id, task.id)
self.assertEqual(task_db.status, models.TaskStatus.QUEUED)
# Schedule should be in datastore
@@ -871,6 +894,7 @@ def test_post_pop(self):
task = models.Task(
scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
priority=1,
type=models.BoefjeTask.type,
hash=boefje_task.hash,
@@ -879,13 +903,11 @@ def test_post_pop(self):
modified_at=datetime.now(timezone.utc),
)
- item = functions.create_item(scheduler_id=self.organisation.id, priority=1, task=task)
-
# Mocks
self.mock_get_plugin.return_value = PluginFactory(scan_level=0, consumes=[ooi.object_type])
# Act
- self.scheduler.push_item_to_queue(item)
+ self.scheduler.push_item_to_queue(task)
# Assert: task should be on priority queue
task_pq = models.BoefjeTask(**self.scheduler.queue.peek(0).data)
@@ -894,109 +916,18 @@ def test_post_pop(self):
self.assertEqual(boefje_task.boefje.id, task_pq.boefje.id)
# Assert: task should be in datastore, and queued
- task_db = self.mock_ctx.datastores.task_store.get_task(item.id)
- self.assertEqual(task_db.id, item.id)
+ task_db = self.mock_ctx.datastores.task_store.get_task(task.id)
+ self.assertEqual(task_db.id, task.id)
self.assertEqual(task_db.status, models.TaskStatus.QUEUED)
# Act
self.scheduler.pop_item_from_queue()
# Assert: task should be in datastore, and queued
- task_db = self.mock_ctx.datastores.task_store.get_task(item.id)
- self.assertEqual(task_db.id, item.id)
+ task_db = self.mock_ctx.datastores.task_store.get_task(task.id)
+ self.assertEqual(task_db.id, task.id)
self.assertEqual(task_db.status, models.TaskStatus.DISPATCHED)
- def test_disable_scheduler(self):
- # Arrange: start scheduler
- self.scheduler.run()
-
- # Arrange: add tasks
- scan_profile = ScanProfileFactory(level=0)
- ooi = OOIFactory(scan_profile=scan_profile)
- boefje_task = models.BoefjeTask(
- boefje=BoefjeFactory(), input_ooi=ooi.primary_key, organization=self.organisation.id
- )
-
- # Mocks
- self.mock_get_plugin.return_value = PluginFactory(scan_level=0, consumes=[ooi.object_type])
-
- # Act
- task = functions.create_task(scheduler_id=self.scheduler.scheduler_id, data=boefje_task)
-
- item = functions.create_item(scheduler_id=self.organisation.id, priority=1, task=task)
- self.scheduler.push_item_to_queue(item)
-
- # Assert: task should be on priority queue
- pq_item = self.scheduler.queue.peek(0)
- self.assertEqual(1, self.scheduler.queue.qsize())
- self.assertEqual(pq_item.id, item.id)
-
- # Assert: task should be in datastore, and queued
- task_db = self.mock_ctx.datastores.task_store.get_task(item.id)
- self.assertEqual(task_db.id, item.id)
- self.assertEqual(task_db.status, models.TaskStatus.QUEUED)
-
- # Assert: listeners should be running
- self.assertGreater(len(self.scheduler.listeners), 0)
-
- # Assert: threads should be running
- self.assertGreater(len(self.scheduler.threads), 0)
-
- # Act
- self.scheduler.disable()
-
- # Listeners should be stopped
- self.assertEqual(0, len(self.scheduler.listeners))
-
- # Threads should be stopped
- self.assertEqual(0, len(self.scheduler.threads))
-
- # Queue should be empty
- self.assertEqual(0, self.scheduler.queue.qsize())
-
- # All tasks on queue should be set to CANCELLED
- tasks, _ = self.mock_ctx.datastores.task_store.get_tasks(self.scheduler.scheduler_id)
- for task in tasks:
- self.assertEqual(task.status, models.TaskStatus.CANCELLED)
-
- # Scheduler should be disabled
- self.assertFalse(self.scheduler.is_enabled())
-
- self.scheduler.stop()
-
- def test_enable_scheduler(self):
- self.scheduler.run()
-
- # Assert: listeners should be running
- self.assertGreater(len(self.scheduler.listeners), 0)
-
- # Assert: threads should be running
- self.assertGreater(len(self.scheduler.threads), 0)
-
- # Disable scheduler first
- self.scheduler.disable()
-
- # Listeners should be stopped
- self.assertEqual(0, len(self.scheduler.listeners))
-
- # Threads should be stopped
- self.assertEqual(0, len(self.scheduler.threads))
-
- # Queue should be empty
- self.assertEqual(0, self.scheduler.queue.qsize())
-
- # Re-enable scheduler
- self.scheduler.enable()
-
- # Threads should be started
- self.assertGreater(len(self.scheduler.threads), 0)
-
- # Scheduler should be enabled
- self.assertTrue(self.scheduler.is_enabled())
-
- # Stop the scheduler
- self.scheduler.stop()
-
def test_has_boefje_permission_to_run(self):
# Arrange
scan_profile = ScanProfileFactory(level=0)
@@ -1083,21 +1014,20 @@ def setUp(self):
def tearDown(self):
mock.patch.stopall()
- def test_push_tasks_for_scan_profile_mutations(self):
+ def test_process_mutations(self):
"""Scan level change"""
# Arrange
- scan_profile = ScanProfileFactory(level=0)
- ooi = OOIFactory(scan_profile=scan_profile)
+ ooi = OOIFactory(scan_profile=ScanProfileFactory(level=0))
boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type])
mutation = models.ScanProfileMutation(
- operation="create", primary_key=ooi.primary_key, value=ooi
+ operation="create", primary_key=ooi.primary_key, value=ooi, client_id=self.organisation.id
).model_dump_json()
# Mocks
self.mock_get_boefjes_for_ooi.return_value = [boefje]
# Act
- self.scheduler.push_tasks_for_scan_profile_mutations(mutation)
+ self.scheduler.process_mutations(mutation)
# Task should be on priority queue
item = self.scheduler.queue.peek(0)
@@ -1111,43 +1041,45 @@ def test_push_tasks_for_scan_profile_mutations(self):
self.assertEqual(task_db.id, item.id)
self.assertEqual(task_db.status, models.TaskStatus.QUEUED)
- def test_push_tasks_for_scan_profile_mutations_value_empty(self):
+ def test_process_mutations_value_empty(self):
"""When the value of a mutation is empty it should not push any tasks"""
# Arrange
- mutation = models.ScanProfileMutation(operation="create", primary_key="123", value=None).model_dump_json()
+ mutation = models.ScanProfileMutation(
+ operation="create", primary_key="123", value=None, client_id=self.organisation.id
+ ).model_dump_json()
# Act
- self.scheduler.push_tasks_for_scan_profile_mutations(mutation)
+ self.scheduler.process_mutations(mutation)
# Task should not be on priority queue
self.assertEqual(0, self.scheduler.queue.qsize())
- def test_push_tasks_for_scan_profile_mutations_no_boefjes_found(self):
+ def test_process_mutations_no_boefjes_found(self):
"""When no plugins are found for boefjes, it should return no boefje tasks"""
# Arrange
scan_profile = ScanProfileFactory(level=0)
ooi = OOIFactory(scan_profile=scan_profile)
mutation = models.ScanProfileMutation(
- operation="create", primary_key=ooi.primary_key, value=ooi
+ operation="create", primary_key=ooi.primary_key, value=ooi, client_id=self.organisation.id
).model_dump_json()
# Mocks
self.mock_get_boefjes_for_ooi.return_value = []
# Act
- self.scheduler.push_tasks_for_scan_profile_mutations(mutation)
+ self.scheduler.process_mutations(mutation)
# Task should not be on priority queue
self.assertEqual(0, self.scheduler.queue.qsize())
- def test_push_tasks_for_scan_profile_mutations_not_allowed_to_run(self):
+ def test_process_mutations_not_allowed_to_run(self):
"""When a boefje is not allowed to run, it should not be added to the queue"""
# Arrange
scan_profile = ScanProfileFactory(level=0)
ooi = OOIFactory(scan_profile=scan_profile)
boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type])
mutation = models.ScanProfileMutation(
- operation="create", primary_key=ooi.primary_key, value=ooi
+ operation="create", primary_key=ooi.primary_key, value=ooi, client_id=self.organisation.id
).model_dump_json()
# Mocks
@@ -1155,19 +1087,19 @@ def test_push_tasks_for_scan_profile_mutations_not_allowed_to_run(self):
self.mock_has_boefje_permission_to_run.return_value = False
# Act
- self.scheduler.push_tasks_for_scan_profile_mutations(mutation)
+ self.scheduler.process_mutations(mutation)
# Task should not be on priority queue
self.assertEqual(0, self.scheduler.queue.qsize())
- def test_push_tasks_for_scan_profile_mutations_still_running(self):
+ def test_process_mutations_still_running(self):
"""When a boefje is still running, it should not be added to the queue"""
# Arrange
scan_profile = ScanProfileFactory(level=0)
ooi = OOIFactory(scan_profile=scan_profile)
boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type])
mutation = models.ScanProfileMutation(
- operation="create", primary_key=ooi.primary_key, value=ooi
+ operation="create", primary_key=ooi.primary_key, value=ooi, client_id=self.organisation.id
).model_dump_json()
# Mocks
@@ -1175,30 +1107,31 @@ def test_push_tasks_for_scan_profile_mutations_still_running(self):
self.mock_has_boefje_task_started_running.return_value = True
# Act
- self.scheduler.push_tasks_for_scan_profile_mutations(mutation)
+ self.scheduler.process_mutations(mutation)
# Task should not be on priority queue
self.assertEqual(0, self.scheduler.queue.qsize())
- def test_push_tasks_for_scan_profile_mutations_item_on_queue(self):
+ def test_process_mutations_item_on_queue(self):
"""When a boefje is already on the queue, it should not be added to the queue"""
# Arrange
scan_profile = ScanProfileFactory(level=0)
ooi = OOIFactory(scan_profile=scan_profile)
boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type])
+
mutation1 = models.ScanProfileMutation(
- operation="create", primary_key=ooi.primary_key, value=ooi
+ operation="create", primary_key=ooi.primary_key, value=ooi, client_id=self.organisation.id
).model_dump_json()
mutation2 = models.ScanProfileMutation(
- operation="create", primary_key=ooi.primary_key, value=ooi
+ operation="create", primary_key=ooi.primary_key, value=ooi, client_id=self.organisation.id
).model_dump_json()
# Mocks
self.mock_get_boefjes_for_ooi.return_value = [boefje]
# Act
- self.scheduler.push_tasks_for_scan_profile_mutations(mutation1)
- self.scheduler.push_tasks_for_scan_profile_mutations(mutation2)
+ self.scheduler.process_mutations(mutation1)
+ self.scheduler.process_mutations(mutation2)
# Task should be on priority queue (only one)
task_pq = self.scheduler.queue.peek(0)
@@ -1211,7 +1144,7 @@ def test_push_tasks_for_scan_profile_mutations_item_on_queue(self):
task_db = self.mock_ctx.datastores.task_store.get_task(task_pq.id)
self.assertEqual(task_db.status, models.TaskStatus.QUEUED)
- def test_push_tasks_for_scan_profile_mutations_delete(self):
+ def test_process_mutations_delete(self):
"""When an OOI is deleted it should not create tasks"""
# Arrange
scan_profile = ScanProfileFactory(level=0)
@@ -1219,19 +1152,22 @@ def test_push_tasks_for_scan_profile_mutations_delete(self):
boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type])
mutation1 = models.ScanProfileMutation(
- operation=models.MutationOperationType.DELETE, primary_key=ooi.primary_key, value=ooi
+ operation=models.MutationOperationType.DELETE,
+ primary_key=ooi.primary_key,
+ value=ooi,
+ client_id=self.organisation.id,
).model_dump_json()
# Mocks
self.mock_get_boefjes_for_ooi.return_value = [boefje]
# Act
- self.scheduler.push_tasks_for_scan_profile_mutations(mutation1)
+ self.scheduler.process_mutations(mutation1)
# Assert
self.assertEqual(0, self.scheduler.queue.qsize())
- def test_push_tasks_for_scan_profile_mutations_delete_on_queue(self):
+ def test_process_mutations_delete_on_queue(self):
"""When an OOI is deleted, and tasks associated with that ooi
should be removed from the queue
"""
@@ -1241,14 +1177,17 @@ def test_push_tasks_for_scan_profile_mutations_delete_on_queue(self):
boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type])
mutation1 = models.ScanProfileMutation(
- operation=models.MutationOperationType.CREATE, primary_key=ooi.primary_key, value=ooi
+ operation=models.MutationOperationType.CREATE,
+ primary_key=ooi.primary_key,
+ value=ooi,
+ client_id=self.organisation.id,
).model_dump_json()
# Mocks
self.mock_get_boefjes_for_ooi.return_value = [boefje]
# Act
- self.scheduler.push_tasks_for_scan_profile_mutations(mutation1)
+ self.scheduler.process_mutations(mutation1)
# Assert: task should be on priority queue
item = self.scheduler.queue.peek(0)
@@ -1259,11 +1198,14 @@ def test_push_tasks_for_scan_profile_mutations_delete_on_queue(self):
# Arrange
mutation2 = models.ScanProfileMutation(
- operation=models.MutationOperationType.DELETE, primary_key=ooi.primary_key, value=ooi
+ operation=models.MutationOperationType.DELETE,
+ primary_key=ooi.primary_key,
+ value=ooi,
+ client_id=self.organisation.id,
).model_dump_json()
# Act
- self.scheduler.push_tasks_for_scan_profile_mutations(mutation2)
+ self.scheduler.process_mutations(mutation2)
# Assert
self.assertIsNone(self.scheduler.queue.peek(0))
@@ -1274,7 +1216,7 @@ def test_push_tasks_for_scan_profile_mutations_delete_on_queue(self):
task_db = self.mock_ctx.datastores.task_store.get_task(item.id)
self.assertEqual(task_db.status, models.TaskStatus.CANCELLED)
- def test_push_tasks_for_scan_profile_mutations_op_create_run_on_create(self):
+ def test_process_mutations_op_create_run_on_create(self):
"""When a boefje has the run_on contains the setting create,
and we receive a create mutation, it should:
@@ -1286,14 +1228,17 @@ def test_push_tasks_for_scan_profile_mutations_op_create_run_on_create(self):
ooi = OOIFactory(scan_profile=scan_profile)
boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type], run_on=[RunOn.CREATE])
mutation = models.ScanProfileMutation(
- operation=models.MutationOperationType.CREATE, primary_key=ooi.primary_key, value=ooi
+ operation=models.MutationOperationType.CREATE,
+ primary_key=ooi.primary_key,
+ value=ooi,
+ client_id=self.organisation.id,
).model_dump_json()
# Mocks
self.mock_get_boefjes_for_ooi.return_value = [boefje]
# Act
- self.scheduler.push_tasks_for_scan_profile_mutations(mutation)
+ self.scheduler.process_mutations(mutation)
# Assert: task should be on priority queue
item = self.scheduler.queue.peek(0)
@@ -1311,7 +1256,7 @@ def test_push_tasks_for_scan_profile_mutations_op_create_run_on_create(self):
schedule_db = self.mock_ctx.datastores.schedule_store.get_schedule_by_hash(task_db.hash)
self.assertIsNone(schedule_db)
- def test_push_tasks_for_scan_profile_mutations_op_create_run_on_create_update(self):
+ def test_process_mutations_op_create_run_on_create_update(self):
"""When a boefje has the run_on contains the setting create,update,
and we receive a create mutation, it should:
@@ -1323,14 +1268,17 @@ def test_push_tasks_for_scan_profile_mutations_op_create_run_on_create_update(se
ooi = OOIFactory(scan_profile=scan_profile)
boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type], run_on=[RunOn.CREATE, RunOn.UPDATE])
mutation = models.ScanProfileMutation(
- operation=models.MutationOperationType.CREATE, primary_key=ooi.primary_key, value=ooi
+ operation=models.MutationOperationType.CREATE,
+ primary_key=ooi.primary_key,
+ value=ooi,
+ client_id=self.organisation.id,
).model_dump_json()
# Mocks
self.mock_get_boefjes_for_ooi.return_value = [boefje]
# Act
- self.scheduler.push_tasks_for_scan_profile_mutations(mutation)
+ self.scheduler.process_mutations(mutation)
# Assert: task should be on priority queue
item = self.scheduler.queue.peek(0)
@@ -1348,7 +1296,7 @@ def test_push_tasks_for_scan_profile_mutations_op_create_run_on_create_update(se
schedule_db = self.mock_ctx.datastores.schedule_store.get_schedule_by_hash(task_db.hash)
self.assertIsNone(schedule_db)
- def test_push_tasks_for_scan_profile_mutations_op_create_run_on_update(self):
+ def test_process_mutations_op_create_run_on_update(self):
"""When a boefje has the run_on contains the setting update,
and we receive a create mutation, it should:
@@ -1360,19 +1308,22 @@ def test_push_tasks_for_scan_profile_mutations_op_create_run_on_update(self):
ooi = OOIFactory(scan_profile=scan_profile)
boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type], run_on=[RunOn.UPDATE])
mutation = models.ScanProfileMutation(
- operation=models.MutationOperationType.CREATE, primary_key=ooi.primary_key, value=ooi
+ operation=models.MutationOperationType.CREATE,
+ primary_key=ooi.primary_key,
+ value=ooi,
+ client_id=self.organisation.id,
).model_dump_json()
# Mocks
self.mock_get_boefjes_for_ooi.return_value = [boefje]
# Act
- self.scheduler.push_tasks_for_scan_profile_mutations(mutation)
+ self.scheduler.process_mutations(mutation)
# Assert: task should NOT be on priority queue
self.assertEqual(0, self.scheduler.queue.qsize())
- def test_push_tasks_for_scan_profile_mutations_op_create_run_on_none(self):
+ def test_process_mutations_op_create_run_on_none(self):
"""When a boefje has the run_on is empty, and we receive a create
mutation, it should:
@@ -1384,7 +1335,10 @@ def test_push_tasks_for_scan_profile_mutations_op_create_run_on_none(self):
ooi = OOIFactory(scan_profile=scan_profile)
boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type], run_on=None)
mutation = models.ScanProfileMutation(
- operation=models.MutationOperationType.CREATE, primary_key=ooi.primary_key, value=ooi
+ operation=models.MutationOperationType.CREATE,
+ primary_key=ooi.primary_key,
+ value=ooi,
+ client_id=self.organisation.id,
).model_dump_json()
# Mocks
@@ -1392,7 +1346,7 @@ def test_push_tasks_for_scan_profile_mutations_op_create_run_on_none(self):
self.mock_set_cron.return_value = "0 0 * * *"
# Act
- self.scheduler.push_tasks_for_scan_profile_mutations(mutation)
+ self.scheduler.process_mutations(mutation)
# Assert: task should be on priority queue
item = self.scheduler.queue.peek(0)
@@ -1410,7 +1364,7 @@ def test_push_tasks_for_scan_profile_mutations_op_create_run_on_none(self):
schedule_db = self.mock_ctx.datastores.schedule_store.get_schedule(task_db.schedule_id)
self.assertIsNotNone(schedule_db)
- def test_push_tasks_for_scan_profile_mutations_op_update_run_on_create(self):
+ def test_process_mutations_op_update_run_on_create(self):
"""When a boefje has the run_on contains the setting create,
and we receive an update mutation, it should:
@@ -1422,19 +1376,22 @@ def test_push_tasks_for_scan_profile_mutations_op_update_run_on_create(self):
ooi = OOIFactory(scan_profile=scan_profile)
boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type], run_on=[RunOn.CREATE])
mutation = models.ScanProfileMutation(
- operation=models.MutationOperationType.UPDATE, primary_key=ooi.primary_key, value=ooi
+ operation=models.MutationOperationType.UPDATE,
+ primary_key=ooi.primary_key,
+ value=ooi,
+ client_id=self.organisation.id,
).model_dump_json()
# Mocks
self.mock_get_boefjes_for_ooi.return_value = [boefje]
# Act
- self.scheduler.push_tasks_for_scan_profile_mutations(mutation)
+ self.scheduler.process_mutations(mutation)
# Assert: task should NOT be on priority queue
self.assertEqual(0, self.scheduler.queue.qsize())
- def test_push_tasks_scan_profile_mutations_op_update_run_on_create_update(self):
+ def test_process_mutations_op_update_run_on_create_update(self):
"""When a boefje has the run_on contains the setting create,update,
and we receive an update mutation, it should:
@@ -1446,14 +1403,17 @@ def test_push_tasks_scan_profile_mutations_op_update_run_on_create_update(self):
ooi = OOIFactory(scan_profile=scan_profile)
boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type], run_on=[RunOn.CREATE, RunOn.UPDATE])
mutation = models.ScanProfileMutation(
- operation=models.MutationOperationType.UPDATE, primary_key=ooi.primary_key, value=ooi
+ operation=models.MutationOperationType.UPDATE,
+ primary_key=ooi.primary_key,
+ value=ooi,
+ client_id=self.organisation.id,
).model_dump_json()
# Mocks
self.mock_get_boefjes_for_ooi.return_value = [boefje]
# Act
- self.scheduler.push_tasks_for_scan_profile_mutations(mutation)
+ self.scheduler.process_mutations(mutation)
# Assert: task should be on priority queue
item = self.scheduler.queue.peek(0)
@@ -1471,7 +1431,7 @@ def test_push_tasks_scan_profile_mutations_op_update_run_on_create_update(self):
schedule_db = self.mock_ctx.datastores.schedule_store.get_schedule_by_hash(task_db.hash)
self.assertIsNone(schedule_db)
- def test_push_tasks_scan_profile_mutations_op_update_run_on_update(self):
+ def test_process_mutations_op_update_run_on_update(self):
"""When a boefje has the run_on contains the setting update,
and we receive an update mutation, it should:
@@ -1483,14 +1443,17 @@ def test_push_tasks_scan_profile_mutations_op_update_run_on_update(self):
ooi = OOIFactory(scan_profile=scan_profile)
boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type], run_on=[RunOn.UPDATE])
mutation = models.ScanProfileMutation(
- operation=models.MutationOperationType.UPDATE, primary_key=ooi.primary_key, value=ooi
+ operation=models.MutationOperationType.UPDATE,
+ primary_key=ooi.primary_key,
+ value=ooi,
+ client_id=self.organisation.id,
).model_dump_json()
# Mocks
self.mock_get_boefjes_for_ooi.return_value = [boefje]
# Act
- self.scheduler.push_tasks_for_scan_profile_mutations(mutation)
+ self.scheduler.process_mutations(mutation)
# Assert: task should be on priority queue
item = self.scheduler.queue.peek(0)
@@ -1508,7 +1471,7 @@ def test_push_tasks_scan_profile_mutations_op_update_run_on_update(self):
schedule_db = self.mock_ctx.datastores.schedule_store.get_schedule_by_hash(task_db.hash)
self.assertIsNone(schedule_db)
- def test_push_tasks_scan_profile_mutations_op_update_run_on_none(self):
+ def test_process_mutations_op_update_run_on_none(self):
"""When a boefje has the run_on is empty, and we receive an update
mutation, it should:
@@ -1520,7 +1483,10 @@ def test_push_tasks_scan_profile_mutations_op_update_run_on_none(self):
ooi = OOIFactory(scan_profile=scan_profile)
boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type], run_on=None)
mutation = models.ScanProfileMutation(
- operation=models.MutationOperationType.UPDATE, primary_key=ooi.primary_key, value=ooi
+ operation=models.MutationOperationType.UPDATE,
+ primary_key=ooi.primary_key,
+ value=ooi,
+ client_id=self.organisation.id,
).model_dump_json()
# Mocks
@@ -1528,7 +1494,7 @@ def test_push_tasks_scan_profile_mutations_op_update_run_on_none(self):
self.mock_set_cron.return_value = "0 0 * * *"
# Act
- self.scheduler.push_tasks_for_scan_profile_mutations(mutation)
+ self.scheduler.process_mutations(mutation)
# Assert: task should be on priority queue
item = self.scheduler.queue.peek(0)
@@ -1571,21 +1537,26 @@ def setUp(self):
"scheduler.context.AppContext.services.octopoes.get_objects_by_object_types"
).start()
+ self.mock_get_organisations = mock.patch(
+ "scheduler.context.AppContext.services.katalogus.get_organisations"
+ ).start()
+
def tearDown(self):
mock.patch.stopall()
- def test_push_tasks_for_new_boefjes(self):
+ def test_process_new_boefjes(self):
# Arrange
scan_profile = ScanProfileFactory(level=0)
ooi = OOIFactory(scan_profile=scan_profile)
boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type])
# Mocks
+ self.mock_get_organisations.return_value = [self.organisation]
self.mock_get_objects_by_object_types.return_value = [ooi]
self.mock_get_new_boefjes_by_org_id.return_value = [boefje]
# Act
- self.scheduler.push_tasks_for_new_boefjes()
+ self.scheduler.process_new_boefjes()
# Task should be on priority queue
task_pq = self.scheduler.queue.peek(0)
@@ -1599,7 +1570,7 @@ def test_push_tasks_for_new_boefjes(self):
self.assertEqual(task_db.id, task_pq.id)
self.assertEqual(task_db.status, models.TaskStatus.QUEUED)
- def test_push_tasks_for_new_boefjes_request_exception(self):
+ def test_process_new_boefjes_request_exception(self):
# Arrange
scan_profile = ScanProfileFactory(level=0)
ooi = OOIFactory(scan_profile=scan_profile)
@@ -1613,13 +1584,13 @@ def test_push_tasks_for_new_boefjes_request_exception(self):
self.mock_get_new_boefjes_by_org_id.return_value = [boefje]
# Act
- self.scheduler.push_tasks_for_new_boefjes()
- self.scheduler.push_tasks_for_new_boefjes()
+ self.scheduler.process_new_boefjes()
+ self.scheduler.process_new_boefjes()
# Task should not be on priority queue
self.assertEqual(0, self.scheduler.queue.qsize())
- def test_push_tasks_for_new_boefjes_no_new_boefjes(self):
+ def test_process_new_boefjes_no_new_boefjes(self):
# Arrange
scan_profile = ScanProfileFactory(level=0)
ooi = OOIFactory(scan_profile=scan_profile)
@@ -1629,12 +1600,12 @@ def test_push_tasks_for_new_boefjes_no_new_boefjes(self):
self.mock_get_new_boefjes_by_org_id.return_value = []
# Act
- self.scheduler.push_tasks_for_new_boefjes()
+ self.scheduler.process_new_boefjes()
# Task should not be on priority queue
self.assertEqual(0, self.scheduler.queue.qsize())
- def test_push_tasks_for_new_boefjes_empty_consumes(self):
+ def test_process_new_boefjes_empty_consumes(self):
# Arrange
scan_profile = ScanProfileFactory(level=0)
ooi = OOIFactory(scan_profile=scan_profile)
@@ -1645,12 +1616,12 @@ def test_push_tasks_for_new_boefjes_empty_consumes(self):
self.mock_get_new_boefjes_by_org_id.return_value = [boefje]
# Act
- self.scheduler.push_tasks_for_new_boefjes()
+ self.scheduler.process_new_boefjes()
# Task should not be on priority queue
self.assertEqual(0, self.scheduler.queue.qsize())
- def test_push_tasks_for_new_boefjes_empty_consumes_no_ooi(self):
+ def test_process_new_boefjes_empty_consumes_no_ooi(self):
# Arrange
boefje = PluginFactory(scan_level=0, consumes=[])
@@ -1659,12 +1630,12 @@ def test_push_tasks_for_new_boefjes_empty_consumes_no_ooi(self):
self.mock_get_new_boefjes_by_org_id.return_value = [boefje]
# Act
- self.scheduler.push_tasks_for_new_boefjes()
+ self.scheduler.process_new_boefjes()
# Task should not be on priority queue
self.assertEqual(0, self.scheduler.queue.qsize())
- def test_push_tasks_for_new_boefjes_no_oois_found(self):
+ def test_process_new_boefjes_no_oois_found(self):
# Arrange
scan_profile = ScanProfileFactory(level=0)
ooi = OOIFactory(scan_profile=scan_profile)
@@ -1675,12 +1646,12 @@ def test_push_tasks_for_new_boefjes_no_oois_found(self):
self.mock_get_new_boefjes_by_org_id.return_value = [boefje]
# Act
- self.scheduler.push_tasks_for_new_boefjes()
+ self.scheduler.process_new_boefjes()
# Task should not be on priority queue
self.assertEqual(0, self.scheduler.queue.qsize())
- def test_push_tasks_for_new_boefjes_get_objects_request_exception(self):
+ def test_process_new_boefjes_get_objects_request_exception(self):
# Arrange
scan_profile = ScanProfileFactory(level=0)
ooi = OOIFactory(scan_profile=scan_profile)
@@ -1694,13 +1665,13 @@ def test_push_tasks_for_new_boefjes_get_objects_request_exception(self):
self.mock_get_new_boefjes_by_org_id.return_value = [boefje]
# Act
- self.scheduler.push_tasks_for_new_boefjes()
- self.scheduler.push_tasks_for_new_boefjes()
+ self.scheduler.process_new_boefjes()
+ self.scheduler.process_new_boefjes()
# Task should not be on priority queue
self.assertEqual(0, self.scheduler.queue.qsize())
- def test_push_tasks_for_new_boefjes_not_allowed_to_run(self):
+ def test_process_new_boefjes_not_allowed_to_run(self):
# Arrange
scan_profile = ScanProfileFactory(level=0)
ooi = OOIFactory(scan_profile=scan_profile)
@@ -1712,12 +1683,12 @@ def test_push_tasks_for_new_boefjes_not_allowed_to_run(self):
self.mock_has_boefje_permission_to_run.return_value = False
# Act
- self.scheduler.push_tasks_for_new_boefjes()
+ self.scheduler.process_new_boefjes()
# Task should not be on priority queue
self.assertEqual(0, self.scheduler.queue.qsize())
- def test_push_tasks_for_new_boefjes_still_running(self):
+ def test_process_new_boefjes_still_running(self):
# Arrange
scan_profile = ScanProfileFactory(level=0)
ooi = OOIFactory(scan_profile=scan_profile)
@@ -1729,23 +1700,24 @@ def test_push_tasks_for_new_boefjes_still_running(self):
self.mock_has_boefje_task_started_running.return_value = True
# Act
- self.scheduler.push_tasks_for_new_boefjes()
+ self.scheduler.process_new_boefjes()
# Task should not be on priority queue
self.assertEqual(0, self.scheduler.queue.qsize())
- def test_push_tasks_for_new_boefjes_item_on_queue(self):
+ def test_process_new_boefjes_item_on_queue(self):
# Arrange
scan_profile = ScanProfileFactory(level=0)
ooi = OOIFactory(scan_profile=scan_profile)
boefje = PluginFactory(scan_level=0, consumes=[ooi.object_type])
# Mocks
+ self.mock_get_organisations.return_value = [self.organisation]
self.mock_get_objects_by_object_types.return_value = [ooi]
self.mock_get_new_boefjes_by_org_id.return_value = [boefje]
# Act
- self.scheduler.push_tasks_for_new_boefjes()
+ self.scheduler.process_new_boefjes()
# Task should be on priority queue
task_pq = self.scheduler.queue.peek(0)
@@ -1760,7 +1732,7 @@ def test_push_tasks_for_new_boefjes_item_on_queue(self):
self.assertEqual(task_db.status, models.TaskStatus.QUEUED)
# Act
- self.scheduler.push_tasks_for_new_boefjes()
+ self.scheduler.process_new_boefjes()
# Should only be one task on queue
task_pq = models.BoefjeTask(**self.scheduler.queue.peek(0).data)
@@ -1792,10 +1764,10 @@ def setUp(self):
def tearDown(self):
mock.patch.stopall()
- def test_push_tasks_for_rescheduling_scheduler_id(self):
+ def test_process_rescheduling_scheduler_id(self):
pass
- def test_push_tasks_for_rescheduling(self):
+ def test_process_rescheduling(self):
"""When the deadline of schedules have passed, the resulting task should be added to the queue"""
# Arrange
scan_profile = ScanProfileFactory(level=0)
@@ -1809,7 +1781,10 @@ def test_push_tasks_for_rescheduling(self):
)
schedule = models.Schedule(
- scheduler_id=self.scheduler.scheduler_id, hash=boefje_task.hash, data=boefje_task.model_dump()
+ scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
+ hash=boefje_task.hash,
+ data=boefje_task.model_dump(),
)
schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
@@ -1820,7 +1795,7 @@ def test_push_tasks_for_rescheduling(self):
self.mock_get_plugin.return_value = plugin
# Act
- self.scheduler.push_tasks_for_rescheduling()
+ self.scheduler.process_rescheduling()
# Assert: new item should be on queue
self.assertEqual(1, self.scheduler.queue.qsize())
@@ -1834,7 +1809,7 @@ def test_push_tasks_for_rescheduling(self):
self.assertIsNotNone(task_db)
self.assertEqual(peek.id, task_db.id)
- def test_push_tasks_for_rescheduling_no_ooi(self):
+ def test_process_rescheduling_no_ooi(self):
"""When the deadline has passed, and when the resulting tasks doesn't
have an OOI, it should create a task.
"""
@@ -1850,7 +1825,10 @@ def test_push_tasks_for_rescheduling_no_ooi(self):
)
schedule = models.Schedule(
- scheduler_id=self.scheduler.scheduler_id, hash=boefje_task.hash, data=boefje_task.model_dump()
+ scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
+ hash=boefje_task.hash,
+ data=boefje_task.model_dump(),
)
schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
@@ -1861,7 +1839,7 @@ def test_push_tasks_for_rescheduling_no_ooi(self):
self.mock_get_plugin.return_value = plugin
# Act
- self.scheduler.push_tasks_for_rescheduling()
+ self.scheduler.process_rescheduling()
# Assert: new item should be on queue
self.assertEqual(1, self.scheduler.queue.qsize())
@@ -1875,7 +1853,7 @@ def test_push_tasks_for_rescheduling_no_ooi(self):
self.assertIsNotNone(task_db)
self.assertEqual(peek.id, task_db.id)
- def test_push_tasks_for_rescheduling_ooi_not_found(self):
+ def test_process_rescheduling_ooi_not_found(self):
"""When ooi isn't found anymore for the schedule, we disable the schedule"""
# Arrange
scan_profile = ScanProfileFactory(level=0)
@@ -1889,7 +1867,10 @@ def test_push_tasks_for_rescheduling_ooi_not_found(self):
)
schedule = models.Schedule(
- scheduler_id=self.scheduler.scheduler_id, hash=boefje_task.hash, data=boefje_task.model_dump()
+ scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
+ hash=boefje_task.hash,
+ data=boefje_task.model_dump(),
)
schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
@@ -1900,7 +1881,7 @@ def test_push_tasks_for_rescheduling_ooi_not_found(self):
self.mock_get_plugin.return_value = plugin
# Act
- self.scheduler.push_tasks_for_rescheduling()
+ self.scheduler.process_rescheduling()
# Assert: item should not be on queue
self.assertEqual(0, self.scheduler.queue.qsize())
@@ -1909,7 +1890,7 @@ def test_push_tasks_for_rescheduling_ooi_not_found(self):
schedule_db_disabled = self.mock_ctx.datastores.schedule_store.get_schedule(schedule.id)
self.assertFalse(schedule_db_disabled.enabled)
- def test_push_tasks_for_rescheduling_boefje_not_found(self):
+ def test_process_rescheduling_boefje_not_found(self):
"""When boefje isn't found anymore for the schedule, we disable the schedule"""
# Arrange
scan_profile = ScanProfileFactory(level=0)
@@ -1923,7 +1904,10 @@ def test_push_tasks_for_rescheduling_boefje_not_found(self):
)
schedule = models.Schedule(
- scheduler_id=self.scheduler.scheduler_id, hash=boefje_task.hash, data=boefje_task.model_dump()
+ scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
+ hash=boefje_task.hash,
+ data=boefje_task.model_dump(),
)
schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
@@ -1934,7 +1918,7 @@ def test_push_tasks_for_rescheduling_boefje_not_found(self):
self.mock_get_plugin.return_value = None
# Act
- self.scheduler.push_tasks_for_rescheduling()
+ self.scheduler.process_rescheduling()
# Assert: item should not be on queue
self.assertEqual(0, self.scheduler.queue.qsize())
@@ -1943,7 +1927,7 @@ def test_push_tasks_for_rescheduling_boefje_not_found(self):
schedule_db_disabled = self.mock_ctx.datastores.schedule_store.get_schedule(schedule.id)
self.assertFalse(schedule_db_disabled.enabled)
- def test_push_tasks_for_rescheduling_boefje_disabled(self):
+ def test_process_rescheduling_boefje_disabled(self):
"""When boefje disabled for the schedule, we disable the schedule"""
# Arrange
scan_profile = ScanProfileFactory(level=0)
@@ -1957,7 +1941,10 @@ def test_push_tasks_for_rescheduling_boefje_disabled(self):
)
schedule = models.Schedule(
- scheduler_id=self.scheduler.scheduler_id, hash=boefje_task.hash, data=boefje_task.model_dump()
+ scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
+ hash=boefje_task.hash,
+ data=boefje_task.model_dump(),
)
schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
@@ -1968,7 +1955,7 @@ def test_push_tasks_for_rescheduling_boefje_disabled(self):
self.mock_get_plugin.return_value = plugin
# Act
- self.scheduler.push_tasks_for_rescheduling()
+ self.scheduler.process_rescheduling()
# Assert: item should not be on queue
self.assertEqual(0, self.scheduler.queue.qsize())
@@ -1977,7 +1964,7 @@ def test_push_tasks_for_rescheduling_boefje_disabled(self):
schedule_db_disabled = self.mock_ctx.datastores.schedule_store.get_schedule(schedule.id)
self.assertFalse(schedule_db_disabled.enabled)
- def test_push_tasks_for_rescheduling_boefje_doesnt_consume_ooi(self):
+ def test_process_rescheduling_boefje_doesnt_consume_ooi(self):
"""When boefje doesn't consume the ooi, we disable the schedule"""
# Arrange
scan_profile = ScanProfileFactory(level=0)
@@ -1991,7 +1978,10 @@ def test_push_tasks_for_rescheduling_boefje_doesnt_consume_ooi(self):
)
schedule = models.Schedule(
- scheduler_id=self.scheduler.scheduler_id, hash=boefje_task.hash, data=boefje_task.model_dump()
+ scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
+ hash=boefje_task.hash,
+ data=boefje_task.model_dump(),
)
schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
@@ -2002,7 +1992,7 @@ def test_push_tasks_for_rescheduling_boefje_doesnt_consume_ooi(self):
self.mock_get_plugin.return_value = plugin
# Act
- self.scheduler.push_tasks_for_rescheduling()
+ self.scheduler.process_rescheduling()
# Assert: item should not be on queue
self.assertEqual(0, self.scheduler.queue.qsize())
@@ -2011,7 +2001,7 @@ def test_push_tasks_for_rescheduling_boefje_doesnt_consume_ooi(self):
schedule_db_disabled = self.mock_ctx.datastores.schedule_store.get_schedule(schedule.id)
self.assertFalse(schedule_db_disabled.enabled)
- def test_push_tasks_for_rescheduling_boefje_cannot_scan_ooi(self):
+ def test_process_rescheduling_boefje_cannot_scan_ooi(self):
"""When boefje cannot scan the ooi, we disable the schedule"""
# Arrange
scan_profile = ScanProfileFactory(level=0)
@@ -2025,7 +2015,10 @@ def test_push_tasks_for_rescheduling_boefje_cannot_scan_ooi(self):
)
schedule = models.Schedule(
- scheduler_id=self.scheduler.scheduler_id, hash=boefje_task.hash, data=boefje_task.model_dump()
+ scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
+ hash=boefje_task.hash,
+ data=boefje_task.model_dump(),
)
schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
@@ -2036,7 +2029,7 @@ def test_push_tasks_for_rescheduling_boefje_cannot_scan_ooi(self):
self.mock_get_plugin.return_value = plugin
# Act
- self.scheduler.push_tasks_for_rescheduling()
+ self.scheduler.process_rescheduling()
# Assert: item should not be on queue
self.assertEqual(0, self.scheduler.queue.qsize())
diff --git a/mula/tests/integration/test_normalizer_scheduler.py b/mula/tests/integration/test_normalizer_scheduler.py
index 493b4bd3f54..ad3d72e9ea8 100644
--- a/mula/tests/integration/test_normalizer_scheduler.py
+++ b/mula/tests/integration/test_normalizer_scheduler.py
@@ -41,10 +41,10 @@ def setUp(self):
)
# Scheduler
+ self.scheduler = schedulers.NormalizerScheduler(self.mock_ctx)
+
+ # Organisation
self.organisation = OrganisationFactory()
- self.scheduler = schedulers.NormalizerScheduler(
- ctx=self.mock_ctx, scheduler_id=self.organisation.id, organisation=self.organisation
- )
def tearDown(self):
self.scheduler.stop()
@@ -64,57 +64,6 @@ def setUp(self):
"scheduler.context.AppContext.services.katalogus.get_plugin_by_id_and_org_id"
).start()
- def test_disable_scheduler(self):
- # Act
- self.scheduler.disable()
-
- # Listeners should be stopped
- self.assertEqual(0, len(self.scheduler.listeners))
-
- # Threads should be stopped
- self.assertEqual(0, len(self.scheduler.threads))
-
- # Queue should be empty
- self.assertEqual(0, self.scheduler.queue.qsize())
-
- # All tasks on queue should be set to CANCELLED
- tasks, _ = self.mock_ctx.datastores.task_store.get_tasks(self.scheduler.scheduler_id)
- for task in tasks:
- self.assertEqual(task.status, models.TaskStatus.CANCELLED)
-
- # Scheduler should be disabled
- self.assertFalse(self.scheduler.is_enabled())
-
- def test_enable_scheduler(self):
- # Disable scheduler first
- self.scheduler.disable()
-
- # Listeners should be stopped
- self.assertEqual(0, len(self.scheduler.listeners))
-
- # Threads should be stopped
- self.assertEqual(0, len(self.scheduler.threads))
-
- # Queue should be empty
- self.assertEqual(0, self.scheduler.queue.qsize())
-
- # All tasks on queue should be set to CANCELLED
- tasks, _ = self.mock_ctx.datastores.task_store.get_tasks(self.scheduler.scheduler_id)
- for task in tasks:
- self.assertEqual(task.status, models.TaskStatus.CANCELLED)
-
- # Re-enable scheduler
- self.scheduler.enable()
-
- # Threads should be started
- self.assertGreater(len(self.scheduler.threads), 0)
-
- # Scheduler should be enabled
- self.assertTrue(self.scheduler.is_enabled())
-
- # Stop the scheduler
- self.scheduler.stop()
-
def test_is_allowed_to_run(self):
# Arrange
plugin = PluginFactory(type="normalizer", consumes=["text/plain"])
@@ -151,7 +100,7 @@ def test_get_normalizers_for_mime_type(self, mock_get_normalizers_by_org_id_and_
mock_get_normalizers_by_org_id_and_type.return_value = [normalizer]
# Act
- result = self.scheduler.get_normalizers_for_mime_type("text/plain")
+ result = self.scheduler.get_normalizers_for_mime_type("text/plain", self.organisation.id)
# Assert
self.assertEqual(len(result), 1)
@@ -166,7 +115,7 @@ def test_get_normalizers_for_mime_type_request_exception(self, mock_get_normaliz
]
# Act
- result = self.scheduler.get_normalizers_for_mime_type("text/plain")
+ result = self.scheduler.get_normalizers_for_mime_type("text/plain", self.organisation.id)
# Assert
self.assertEqual(len(result), 0)
@@ -177,7 +126,7 @@ def test_get_normalizers_for_mime_type_response_is_none(self, mock_get_normalize
mock_get_normalizers_by_org_id_and_type.return_value = None
# Act
- result = self.scheduler.get_normalizers_for_mime_type("text/plain")
+ result = self.scheduler.get_normalizers_for_mime_type("text/plain", self.organisation.id)
# Assert
self.assertEqual(len(result), 0)
@@ -199,7 +148,11 @@ def setUp(self):
"scheduler.schedulers.NormalizerScheduler.get_normalizers_for_mime_type"
).start()
- def test_push_tasks_for_received_raw_file(self):
+ self.mock_get_plugin = mock.patch(
+ "scheduler.context.AppContext.services.katalogus.get_plugin_by_id_and_org_id"
+ ).start()
+
+ def test_process_raw_data(self):
# Arrange
ooi = OOIFactory(scan_profile=ScanProfileFactory(level=0))
boefje = BoefjeFactory()
@@ -208,7 +161,7 @@ def test_push_tasks_for_received_raw_file(self):
# Arrange: create the RawDataReceivedEvent
raw_data_event = models.RawDataReceivedEvent(
raw_data=RawDataFactory(boefje_meta=boefje_meta, mime_types=[{"value": "text/plain"}]),
- organization=self.organisation.name,
+ organization=self.organisation.id,
created_at=datetime.datetime.now(),
).model_dump_json()
@@ -217,7 +170,7 @@ def test_push_tasks_for_received_raw_file(self):
self.mock_get_normalizers_for_mime_type.return_value = [plugin]
# Act
- self.scheduler.push_tasks_for_received_raw_data(raw_data_event)
+ self.scheduler.process_raw_data(raw_data_event)
# Task should be on priority queue
task_pq = self.scheduler.queue.peek(0)
@@ -228,7 +181,7 @@ def test_push_tasks_for_received_raw_file(self):
self.assertEqual(task_db.id, task_pq.id)
self.assertEqual(task_db.status, models.TaskStatus.QUEUED)
- def test_push_tasks_for_received_raw_file_no_normalizers_found(self):
+ def test_process_raw_data_no_normalizers_found(self):
# Arrange
ooi = OOIFactory(scan_profile=ScanProfileFactory(level=0))
boefje = BoefjeFactory()
@@ -236,7 +189,7 @@ def test_push_tasks_for_received_raw_file_no_normalizers_found(self):
raw_data_event = models.RawDataReceivedEvent(
raw_data=RawDataFactory(boefje_meta=boefje_meta, mime_types=[{"value": "text/plain"}]),
- organization=self.organisation.name,
+ organization=self.organisation.id,
created_at=datetime.datetime.now(),
).model_dump_json()
@@ -244,19 +197,21 @@ def test_push_tasks_for_received_raw_file_no_normalizers_found(self):
self.mock_get_normalizers_for_mime_type.return_value = []
# Act
- self.scheduler.push_tasks_for_received_raw_data(raw_data_event)
+ self.scheduler.process_raw_data(raw_data_event)
# Task should not be on priority queue
self.assertEqual(0, self.scheduler.queue.qsize())
- def test_push_tasks_for_received_raw_file_not_allowed_to_run(self):
+ def test_process_raw_data_not_allowed_to_run(self):
# Arrange
scan_profile = ScanProfileFactory(level=0)
ooi = OOIFactory(scan_profile=scan_profile)
boefje = BoefjeFactory()
boefje_task = models.BoefjeTask(boefje=boefje, input_ooi=ooi.primary_key, organization=self.organisation.id)
- task = functions.create_task(scheduler_id=self.scheduler.scheduler_id, data=boefje_task)
+ task = functions.create_task(
+ scheduler_id=self.scheduler.scheduler_id, data=boefje_task, organisation=self.organisation.id
+ )
self.mock_ctx.datastores.task_store.create_task(task)
boefje_meta = BoefjeMetaFactory(boefje=boefje, input_ooi=ooi.primary_key)
@@ -264,7 +219,7 @@ def test_push_tasks_for_received_raw_file_not_allowed_to_run(self):
# Mocks
raw_data_event = models.RawDataReceivedEvent(
raw_data=RawDataFactory(boefje_meta=boefje_meta, mime_types=[{"value": "text/plain"}]),
- organization=self.organisation.name,
+ organization=self.organisation.id,
created_at=datetime.datetime.now(),
).model_dump_json()
@@ -272,19 +227,21 @@ def test_push_tasks_for_received_raw_file_not_allowed_to_run(self):
self.mock_has_normalizer_permission_to_run.return_value = False
# Act
- self.scheduler.push_tasks_for_received_raw_data(raw_data_event)
+ self.scheduler.process_raw_data(raw_data_event)
# Task should not be on priority queue
self.assertEqual(0, self.scheduler.queue.qsize())
- def test_push_tasks_for_received_raw_file_still_running(self):
+ def test_process_raw_data_still_running(self):
# Arrange
scan_profile = ScanProfileFactory(level=0)
ooi = OOIFactory(scan_profile=scan_profile)
boefje = BoefjeFactory()
boefje_task = models.BoefjeTask(boefje=boefje, input_ooi=ooi.primary_key, organization=self.organisation.id)
- task = functions.create_task(scheduler_id=self.scheduler.scheduler_id, data=boefje_task)
+ task = functions.create_task(
+ scheduler_id=self.scheduler.scheduler_id, data=boefje_task, organisation=self.organisation.id
+ )
self.mock_ctx.datastores.task_store.create_task(task)
boefje_meta = BoefjeMetaFactory(boefje=boefje, input_ooi=ooi.primary_key)
@@ -292,7 +249,7 @@ def test_push_tasks_for_received_raw_file_still_running(self):
# Mocks
raw_data_event = models.RawDataReceivedEvent(
raw_data=RawDataFactory(boefje_meta=boefje_meta, mime_types=[{"value": "text/plain"}]),
- organization=self.organisation.name,
+ organization=self.organisation.id,
created_at=datetime.datetime.now(),
).model_dump_json()
@@ -301,19 +258,21 @@ def test_push_tasks_for_received_raw_file_still_running(self):
self.mock_has_normalizer_task_started_running.return_value = True
# Act
- self.scheduler.push_tasks_for_received_raw_data(raw_data_event)
+ self.scheduler.process_raw_data(raw_data_event)
# Task should not be on priority queue
self.assertEqual(0, self.scheduler.queue.qsize())
- def test_push_tasks_for_received_raw_file_still_running_exception(self):
+ def test_process_raw_data_still_running_exception(self):
# Arrange
scan_profile = ScanProfileFactory(level=0)
ooi = OOIFactory(scan_profile=scan_profile)
boefje = BoefjeFactory()
boefje_task = models.BoefjeTask(boefje=boefje, input_ooi=ooi.primary_key, organization=self.organisation.id)
- task = functions.create_task(scheduler_id=self.scheduler.scheduler_id, data=boefje_task)
+ task = functions.create_task(
+ scheduler_id=self.scheduler.scheduler_id, data=boefje_task, organisation=self.organisation.id
+ )
self.mock_ctx.datastores.task_store.create_task(task)
boefje_meta = BoefjeMetaFactory(boefje=boefje, input_ooi=ooi.primary_key)
@@ -321,7 +280,7 @@ def test_push_tasks_for_received_raw_file_still_running_exception(self):
# Mocks
raw_data_event = models.RawDataReceivedEvent(
raw_data=RawDataFactory(boefje_meta=boefje_meta, mime_types=[{"value": "text/plain"}]),
- organization=self.organisation.name,
+ organization=self.organisation.id,
created_at=datetime.datetime.now(),
).model_dump_json()
@@ -330,12 +289,12 @@ def test_push_tasks_for_received_raw_file_still_running_exception(self):
self.mock_has_normalizer_task_started_running.side_effect = Exception("Something went wrong")
# Act
- self.scheduler.push_tasks_for_received_raw_data(raw_data_event)
+ self.scheduler.process_raw_data(raw_data_event)
# Task should not be on priority queue
self.assertEqual(0, self.scheduler.queue.qsize())
- def test_push_tasks_for_received_raw_file_item_on_queue(self):
+ def test_process_raw_data_item_on_queue(self):
# Arrange
ooi = OOIFactory(scan_profile=ScanProfileFactory(level=0))
boefje = BoefjeFactory()
@@ -343,13 +302,13 @@ def test_push_tasks_for_received_raw_file_item_on_queue(self):
raw_data_event1 = models.RawDataReceivedEvent(
raw_data=RawDataFactory(boefje_meta=boefje_meta, mime_types=[{"value": "text/plain"}]),
- organization=self.organisation.name,
+ organization=self.organisation.id,
created_at=datetime.datetime.now(),
).model_dump_json()
raw_data_event2 = models.RawDataReceivedEvent(
raw_data=RawDataFactory(boefje_meta=boefje_meta, mime_types=[{"value": "text/plain"}]),
- organization=self.organisation.name,
+ organization=self.organisation.id,
created_at=datetime.datetime.now(),
).model_dump_json()
@@ -357,8 +316,8 @@ def test_push_tasks_for_received_raw_file_item_on_queue(self):
self.mock_get_normalizers_for_mime_type.return_value = [NormalizerFactory()]
# Act
- self.scheduler.push_tasks_for_received_raw_data(raw_data_event1)
- self.scheduler.push_tasks_for_received_raw_data(raw_data_event2)
+ self.scheduler.process_raw_data(raw_data_event1)
+ self.scheduler.process_raw_data(raw_data_event2)
# Task should be on priority queue (only one)
task_pq = self.scheduler.queue.peek(0)
@@ -369,31 +328,33 @@ def test_push_tasks_for_received_raw_file_item_on_queue(self):
self.assertEqual(task_db.id, task_pq.id)
self.assertEqual(task_db.status, models.TaskStatus.QUEUED)
- def test_push_tasks_for_received_raw_file_error_mimetype(self):
+ def test_process_raw_data_error_mimetype(self):
# Arrange
scan_profile = ScanProfileFactory(level=0)
ooi = OOIFactory(scan_profile=scan_profile)
boefje = BoefjeFactory()
boefje_task = models.BoefjeTask(boefje=boefje, input_ooi=ooi.primary_key, organization=self.organisation.id)
- task = functions.create_task(scheduler_id=self.scheduler.scheduler_id, data=boefje_task)
+ task = functions.create_task(
+ scheduler_id=self.scheduler.scheduler_id, data=boefje_task, organisation=self.organisation.id
+ )
self.mock_ctx.datastores.task_store.create_task(task)
boefje_meta = BoefjeMetaFactory(boefje=boefje, input_ooi=ooi.primary_key)
raw_data_event = models.RawDataReceivedEvent(
raw_data=RawDataFactory(boefje_meta=boefje_meta, mime_types=[{"value": "error/unknown"}]),
- organization=self.organisation.name,
+ organization=self.organisation.id,
created_at=datetime.datetime.now(),
).model_dump_json()
# Act
- self.scheduler.push_tasks_for_received_raw_data(raw_data_event)
+ self.scheduler.process_raw_data(raw_data_event)
# Task should not be on priority queue
self.assertEqual(0, self.scheduler.queue.qsize())
- def test_push_tasks_for_received_raw_file_queue_full(self):
+ def test_process_raw_data_queue_full(self):
events = []
for _ in range(0, 2):
# Arrange
@@ -401,14 +362,16 @@ def test_push_tasks_for_received_raw_file_queue_full(self):
ooi = OOIFactory(scan_profile=scan_profile)
boefje = BoefjeFactory()
boefje_task = models.BoefjeTask(boefje=boefje, input_ooi=ooi.primary_key, organization=self.organisation.id)
- task = functions.create_task(scheduler_id=self.scheduler.scheduler_id, data=boefje_task)
+ task = functions.create_task(
+ scheduler_id=self.scheduler.scheduler_id, data=boefje_task, organisation=self.organisation.id
+ )
self.mock_ctx.datastores.task_store.create_task(task)
boefje_meta = BoefjeMetaFactory(boefje=boefje, input_ooi=ooi.primary_key)
raw_data_event = models.RawDataReceivedEvent(
raw_data=RawDataFactory(boefje_meta=boefje_meta, mime_types=[{"value": "text/plain"}]),
- organization=self.organisation.name,
+ organization=self.organisation.id,
created_at=datetime.datetime.now(),
).model_dump_json()
@@ -421,13 +384,13 @@ def test_push_tasks_for_received_raw_file_queue_full(self):
self.mock_get_normalizers_for_mime_type.return_value = [NormalizerFactory()]
# Act
- self.scheduler.push_tasks_for_received_raw_data(events[0])
+ self.scheduler.process_raw_data(events[0])
# Assert
self.assertEqual(1, self.scheduler.queue.qsize())
with capture_logs() as cm:
- self.scheduler.push_tasks_for_received_raw_data(events[1])
+ self.scheduler.process_raw_data(events[1])
- self.assertIn("Could not add task to queue, queue was full", cm[-1].get("event"))
+ self.assertIn("Queue is full", cm[-1].get("event"))
self.assertEqual(1, self.scheduler.queue.qsize())
diff --git a/mula/tests/integration/test_pq_store.py b/mula/tests/integration/test_pq_store.py
index 0ace0867758..8fd9b6e6d91 100644
--- a/mula/tests/integration/test_pq_store.py
+++ b/mula/tests/integration/test_pq_store.py
@@ -38,7 +38,7 @@ def tearDown(self):
def test_push(self):
# Arrange
- item = functions.create_item(scheduler_id=uuid.uuid4().hex, priority=1)
+ item = functions.create_task(scheduler_id=uuid.uuid4().hex, organisation=self.organisation.id, priority=1)
item.status = models.TaskStatus.QUEUED
created_item = self.mock_ctx.datastores.pq_store.push(item)
@@ -50,7 +50,7 @@ def test_push(self):
self.assertEqual(item_db.id, created_item.id)
def test_push_status_not_queued(self):
- item = functions.create_item(scheduler_id=uuid.uuid4().hex, priority=1)
+ item = functions.create_task(scheduler_id=uuid.uuid4().hex, organisation=self.organisation.id, priority=1)
item.status = models.TaskStatus.PENDING
created_item = self.mock_ctx.datastores.pq_store.push(item)
@@ -62,24 +62,26 @@ def test_push_status_not_queued(self):
def test_pop(self):
# Arrange
- item = functions.create_item(scheduler_id=uuid.uuid4().hex, priority=1)
+ item = functions.create_task(scheduler_id=uuid.uuid4().hex, organisation=self.organisation.id, priority=1)
item.status = models.TaskStatus.QUEUED
created_item = self.mock_ctx.datastores.pq_store.push(item)
- popped_item = self.mock_ctx.datastores.pq_store.pop(item.scheduler_id)
+ popped_items, count = self.mock_ctx.datastores.pq_store.pop(item.scheduler_id)
# Assert
- self.assertIsNotNone(popped_item)
- self.assertEqual(popped_item.id, created_item.id)
+ self.assertIsNotNone(popped_items)
+ self.assertEqual(count, 1)
+ self.assertEqual(popped_items[0].id, created_item.id)
def test_pop_status_not_queued(self):
# Arrange
- item = functions.create_item(scheduler_id=uuid.uuid4().hex, priority=1)
+ item = functions.create_task(scheduler_id=uuid.uuid4().hex, organisation=self.organisation.id, priority=1)
item.status = models.TaskStatus.PENDING
created_item = self.mock_ctx.datastores.pq_store.push(item)
- popped_item = self.mock_ctx.datastores.pq_store.pop(item.scheduler_id)
+ popped_items, count = self.mock_ctx.datastores.pq_store.pop(item.scheduler_id)
# Assert
self.assertIsNotNone(created_item)
- self.assertIsNone(popped_item)
+ self.assertEqual(count, 0)
+ self.assertEqual(len(popped_items), 0)
diff --git a/mula/tests/integration/test_report_scheduler.py b/mula/tests/integration/test_report_scheduler.py
index ee35f7ab25a..269d0dd9759 100644
--- a/mula/tests/integration/test_report_scheduler.py
+++ b/mula/tests/integration/test_report_scheduler.py
@@ -29,10 +29,10 @@ def setUp(self):
)
# Scheduler
+ self.scheduler = schedulers.ReportScheduler(ctx=self.mock_ctx)
+
+ # Organisation
self.organisation = OrganisationFactory()
- self.scheduler = schedulers.ReportScheduler(
- ctx=self.mock_ctx, scheduler_id=self.organisation.id, organisation=self.organisation
- )
def tearDown(self):
self.scheduler.stop()
@@ -51,48 +51,16 @@ def setUp(self):
def tearDown(self):
mock.patch.stopall()
- def test_enable_scheduler(self):
- # Disable scheduler first
- self.scheduler.disable()
-
- # Threads should be stopped
- self.assertEqual(0, len(self.scheduler.threads))
-
- # Queue should be empty
- self.assertEqual(0, self.scheduler.queue.qsize())
-
- # Re-enable scheduler
- self.scheduler.enable()
-
- # Threads should be started
- self.assertGreater(len(self.scheduler.threads), 0)
-
- # Scheduler should be enabled
- self.assertTrue(self.scheduler.is_enabled())
-
- # Stop the scheduler
- self.scheduler.stop()
-
- def test_disable_scheduler(self):
- # Disable scheduler
- self.scheduler.disable()
-
- # Threads should be stopped
- self.assertEqual(0, len(self.scheduler.threads))
-
- # Queue should be empty
- self.assertEqual(0, self.scheduler.queue.qsize())
-
- # Scheduler should be disabled
- self.assertFalse(self.scheduler.is_enabled())
-
- def test_push_tasks_for_rescheduling(self):
+ def test_process_rescheduling(self):
"""When the deadline of schedules have passed, the resulting task should be added to the queue"""
# Arrange
report_task = models.ReportTask(organisation_id=self.organisation.id, report_recipe_id="123")
schedule = models.Schedule(
- scheduler_id=self.scheduler.scheduler_id, hash=report_task.hash, data=report_task.model_dump()
+ scheduler_id=self.scheduler.scheduler_id,
+ hash=report_task.hash,
+ data=report_task.model_dump(),
+ organisation=self.organisation.id,
)
schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
@@ -101,7 +69,7 @@ def test_push_tasks_for_rescheduling(self):
self.mock_get_schedules.return_value = ([schedule_db], 1)
# Act
- self.scheduler.push_tasks_for_rescheduling()
+ self.scheduler.process_rescheduling()
# Assert: new item should be on queue
self.assertEqual(1, self.scheduler.queue.qsize())
@@ -115,13 +83,16 @@ def test_push_tasks_for_rescheduling(self):
self.assertIsNotNone(task_db)
self.assertEqual(peek.id, task_db.id)
- def test_push_tasks_for_rescheduling_item_on_queue(self):
+ def test_process_rescheduling_item_on_queue(self):
"""When the deadline of schedules have passed, the resulting task should be added to the queue"""
# Arrange
report_task = models.ReportTask(organisation_id=self.organisation.id, report_recipe_id="123")
schedule = models.Schedule(
- scheduler_id=self.scheduler.scheduler_id, hash=report_task.hash, data=report_task.model_dump()
+ scheduler_id=self.scheduler.scheduler_id,
+ hash=report_task.hash,
+ data=report_task.model_dump(),
+ organisation=self.organisation.id,
)
schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
@@ -130,7 +101,7 @@ def test_push_tasks_for_rescheduling_item_on_queue(self):
self.mock_get_schedules.return_value = ([schedule_db], 1)
# Act
- self.scheduler.push_tasks_for_rescheduling()
+ self.scheduler.process_rescheduling()
# Assert: new item should be on queue
self.assertEqual(1, self.scheduler.queue.qsize())
@@ -145,7 +116,7 @@ def test_push_tasks_for_rescheduling_item_on_queue(self):
self.assertEqual(peek.id, task_db.id)
# Act: push again
- self.scheduler.push_tasks_for_rescheduling()
+ self.scheduler.process_rescheduling()
# Should only be one task on queue
self.assertEqual(1, self.scheduler.queue.qsize())
diff --git a/mula/tests/integration/test_schedule_store.py b/mula/tests/integration/test_schedule_store.py
index df957b82171..e6bf2e894a1 100644
--- a/mula/tests/integration/test_schedule_store.py
+++ b/mula/tests/integration/test_schedule_store.py
@@ -6,6 +6,7 @@
from scheduler import config, models, storage
from scheduler.storage import filters, stores
+from tests.factories.organisation import OrganisationFactory
from tests.utils import functions
@@ -28,27 +29,40 @@ def setUp(self):
}
)
+ # Organisation
+ self.organisation = OrganisationFactory()
+
def tearDown(self):
models.Base.metadata.drop_all(self.dbconn.engine)
self.dbconn.engine.dispose()
def test_create_schedule_calculate_deadline_at(self):
"""When a schedule is created, the deadline_at should be calculated."""
- schedule = models.Schedule(scheduler_id="test_scheduler_id", schedule="* * * * *", data={})
+ schedule = models.Schedule(
+ scheduler_id="test_scheduler_id", organisation=self.organisation.id, schedule="* * * * *", data={}
+ )
self.assertIsNotNone(schedule.deadline_at)
def test_create_schedule_explicit_deadline_at(self):
"""When a schedule is created, the deadline_at should be set if it is provided."""
now = datetime.now(timezone.utc)
- schedule = models.Schedule(scheduler_id="test_scheduler_id", data={}, deadline_at=now)
+ schedule = models.Schedule(
+ scheduler_id="test_scheduler_id", organisation=self.organisation.id, data={}, deadline_at=now
+ )
self.assertEqual(schedule.deadline_at, now)
def test_create_schedule_deadline_at_takes_precedence(self):
"""When a schedule is created, the deadline_at should be set if it is provided."""
now = datetime.now(timezone.utc)
- schedule = models.Schedule(scheduler_id="test_scheduler_id", schedule="* * * * *", data={}, deadline_at=now)
+ schedule = models.Schedule(
+ scheduler_id="test_scheduler_id",
+ schedule="* * * * *",
+ organisation=self.organisation.id,
+ data={},
+ deadline_at=now,
+ )
self.assertEqual(schedule.deadline_at, now)
@@ -56,8 +70,10 @@ def test_create_schedule(self):
# Arrange
scheduler_id = "test_scheduler_id"
- task = functions.create_item(scheduler_id, 1)
- schedule = models.Schedule(scheduler_id=scheduler_id, hash=task.hash, data=task.model_dump())
+ task = functions.create_task(scheduler_id=scheduler_id, organisation=self.organisation.id, priority=1)
+ schedule = models.Schedule(
+ scheduler_id=scheduler_id, organisation=self.organisation.id, hash=task.hash, data=task.model_dump()
+ )
# Act
schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
@@ -69,14 +85,18 @@ def test_get_schedules(self):
# Arrange
scheduler_one = "test_scheduler_one"
for i in range(5):
- task = functions.create_item(scheduler_one, 1)
- schedule = models.Schedule(scheduler_id=scheduler_one, hash=task.hash, data=task.model_dump())
+ task = functions.create_task(scheduler_id=scheduler_one, organisation=self.organisation.id, priority=1)
+ schedule = models.Schedule(
+ scheduler_id=scheduler_one, organisation=self.organisation.id, hash=task.hash, data=task.model_dump()
+ )
self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
scheduler_two = "test_scheduler_two"
for i in range(5):
- task = functions.create_item(scheduler_two, 1)
- schedule = models.Schedule(scheduler_id=scheduler_two, hash=task.hash, data=task.model_dump())
+ task = functions.create_task(scheduler_id=scheduler_two, organisation=self.organisation.id, priority=1)
+ schedule = models.Schedule(
+ scheduler_id=scheduler_two, organisation=self.organisation.id, hash=task.hash, data=task.model_dump()
+ )
self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
schedules_scheduler_one, schedules_scheduler_one_count = self.mock_ctx.datastores.schedule_store.get_schedules(
@@ -99,8 +119,10 @@ def test_get_schedules(self):
def test_get_schedule(self):
# Arrange
scheduler_id = "test_scheduler_id"
- task = functions.create_item(scheduler_id, 1)
- schedule = models.Schedule(scheduler_id=scheduler_id, hash=task.hash, data=task.model_dump())
+ task = functions.create_task(scheduler_id=scheduler_id, organisation=self.organisation.id, priority=1)
+ schedule = models.Schedule(
+ scheduler_id=scheduler_id, organisation=self.organisation.id, hash=task.hash, data=task.model_dump()
+ )
schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
# Act
@@ -113,7 +135,9 @@ def test_get_schedule_by_hash(self):
# Arrange
scheduler_id = "test_scheduler_id"
data = functions.create_test_model()
- schedule = models.Schedule(scheduler_id=scheduler_id, hash=data.hash, data=data.model_dump())
+ schedule = models.Schedule(
+ scheduler_id=scheduler_id, organisation=self.organisation.id, hash=data.hash, data=data.model_dump()
+ )
schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
# Act
@@ -127,8 +151,10 @@ def test_get_schedule_by_hash(self):
def test_update_schedule(self):
# Arrange
scheduler_id = "test_scheduler_id"
- task = functions.create_item(scheduler_id, 1)
- schedule = models.Schedule(scheduler_id=scheduler_id, hash=task.hash, data=task.model_dump())
+ task = functions.create_task(scheduler_id=scheduler_id, organisation=self.organisation.id, priority=1)
+ schedule = models.Schedule(
+ scheduler_id=scheduler_id, organisation=self.organisation.id, hash=task.hash, data=task.model_dump()
+ )
schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
# Assert
@@ -145,8 +171,10 @@ def test_update_schedule(self):
def test_delete_schedule(self):
# Arrange
scheduler_id = "test_scheduler_id"
- task = functions.create_item(scheduler_id, 1)
- schedule = models.Schedule(scheduler_id=scheduler_id, hash=task.hash, data=task.model_dump())
+ task = functions.create_task(scheduler_id=scheduler_id, organisation=self.organisation.id, priority=1)
+ schedule = models.Schedule(
+ scheduler_id=scheduler_id, organisation=self.organisation.id, hash=task.hash, data=task.model_dump()
+ )
schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
# Act
@@ -160,8 +188,10 @@ def test_delete_schedule_ondelete(self):
"""When a schedule is deleted, its tasks should NOT be deleted."""
# Arrange
scheduler_id = "test_scheduler_id"
- task = functions.create_item(scheduler_id, 1)
- schedule = models.Schedule(scheduler_id=scheduler_id, hash=task.hash, data=task.model_dump())
+ task = functions.create_task(scheduler_id=scheduler_id, organisation=self.organisation.id, priority=1)
+ schedule = models.Schedule(
+ scheduler_id=scheduler_id, organisation=self.organisation.id, hash=task.hash, data=task.model_dump()
+ )
schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
task.schedule_id = schedule_db.id
@@ -178,11 +208,16 @@ def test_delete_schedule_ondelete(self):
self.assertIsNotNone(is_task_deleted)
self.assertIsNone(is_task_deleted.schedule_id)
+ # NOTE: skipping this test until task relationship is re-enabled, disabled
+ # it for now when we use the model relationship
+ @unittest.skip("Disabled until task relationship is re-enabled")
def test_relationship_schedule_tasks(self):
# Arrange
scheduler_id = "test_scheduler_id"
- task = functions.create_task(scheduler_id)
- schedule = models.Schedule(scheduler_id=scheduler_id, hash=task.hash, data=task.model_dump())
+ task = functions.create_task(scheduler_id=scheduler_id, organisation=self.organisation.id)
+ schedule = models.Schedule(
+ scheduler_id=scheduler_id, organisation=self.organisation.id, hash=task.hash, data=task.model_dump()
+ )
schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
task.schedule_id = schedule_db.id
@@ -198,8 +233,10 @@ def test_relationship_schedule_tasks(self):
def test_get_tasks_filter_related(self):
# Arrange
scheduler_id = "test_scheduler_id"
- task = functions.create_task(scheduler_id)
- schedule = models.Schedule(scheduler_id=scheduler_id, hash=task.hash, data=task.model_dump())
+ task = functions.create_task(scheduler_id=scheduler_id, organisation=self.organisation.id)
+ schedule = models.Schedule(
+ scheduler_id=scheduler_id, organisation=self.organisation.id, hash=task.hash, data=task.model_dump()
+ )
schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
task.schedule_id = schedule_db.id
diff --git a/mula/tests/integration/test_scheduler.py b/mula/tests/integration/test_scheduler.py
index aecda637a09..bec6e2346bc 100644
--- a/mula/tests/integration/test_scheduler.py
+++ b/mula/tests/integration/test_scheduler.py
@@ -5,10 +5,10 @@
from unittest import mock
from scheduler import config, models, storage
-from scheduler.schedulers.queue import InvalidItemError, NotAllowedError, QueueEmptyError, QueueFullError
+from scheduler.schedulers.queue import InvalidItemError, QueueEmptyError, QueueFullError
from scheduler.storage import stores
-from structlog.testing import capture_logs
+from tests.factories import OrganisationFactory
from tests.mocks import item as mock_item
from tests.mocks import queue as mock_queue
from tests.mocks import scheduler as mock_scheduler
@@ -49,6 +49,9 @@ def setUp(self):
ctx=self.mock_ctx, scheduler_id=identifier, queue=queue, create_schedule=True
)
+ # Organisation
+ self.organisation = OrganisationFactory()
+
def tearDown(self):
self.scheduler.stop()
models.Base.metadata.drop_all(self.dbconn.engine)
@@ -58,7 +61,9 @@ def test_push_items_to_queue(self):
# Arrange
items = []
for i in range(10):
- item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=i + 1)
+ item = functions.create_task(
+ scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=i + 1
+ )
items.append(item)
# Act
@@ -84,7 +89,9 @@ def test_push_items_to_queue(self):
def test_push_item_to_queue(self):
# Arrange
- item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1)
+ item = functions.create_task(
+ scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1
+ )
# Act
self.scheduler.push_item_to_queue(item)
@@ -108,7 +115,9 @@ def test_push_item_to_queue_create_schedule_false(self):
# Arrange
self.scheduler.create_schedule = False
- item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1)
+ item = functions.create_task(
+ scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1
+ )
# Act
self.scheduler.push_item_to_queue(item)
@@ -130,7 +139,9 @@ def test_push_item_to_queue_create_schedule_false(self):
def test_push_item_to_queue_full(self):
# Arrange
- item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1)
+ item = functions.create_task(
+ scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1
+ )
self.scheduler.queue.maxsize = 1
@@ -147,7 +158,9 @@ def test_push_item_to_queue_full(self):
def test_push_item_to_queue_invalid(self):
# Arrange
- item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1)
+ item = functions.create_task(
+ scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1
+ )
item.data = {"invalid": "data"}
# Assert
@@ -156,16 +169,24 @@ def test_push_item_to_queue_invalid(self):
def test_pop_item_from_queue(self):
# Arrange
- item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1)
+ item = functions.create_task(
+ scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1
+ )
self.scheduler.push_item_to_queue(item)
# Act
- popped_item = self.scheduler.pop_item_from_queue()
+ popped_items, count = self.scheduler.pop_item_from_queue()
# Assert
self.assertEqual(0, self.scheduler.queue.qsize())
- self.assertEqual(item.id, popped_item.id)
+ self.assertEqual(1, count)
+ self.assertEqual(1, len(popped_items))
+ self.assertEqual(popped_items[0].id, item.id)
+
+ # Status should be dispatched
+ task_db = self.mock_ctx.datastores.task_store.get_task(str(item.id))
+ self.assertEqual(task_db.status, models.TaskStatus.DISPATCHED)
def test_pop_item_from_queue_empty(self):
self.assertEqual(0, self.scheduler.queue.qsize())
@@ -175,7 +196,9 @@ def test_pop_item_from_queue_empty(self):
def test_post_push(self):
"""When a task is added to the queue, it should be added to the database"""
# Arrange
- item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1)
+ item = functions.create_task(
+ scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1
+ )
# Act
self.scheduler.push_item_to_queue(item)
@@ -207,7 +230,9 @@ def test_post_push(self):
def test_post_push_schedule_enabled(self):
# Arrange
- item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1)
+ item = functions.create_task(
+ scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1
+ )
# Act
self.scheduler.push_item_to_queue(item)
@@ -237,35 +262,11 @@ def test_post_push_schedule_enabled(self):
# grace period
self.assertGreater(schedule_db.deadline_at, datetime.now(timezone.utc))
- def test_post_push_schedule_disabled(self):
- # Arrange
- first_item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1)
-
- # Act
- first_item_db = self.scheduler.push_item_to_queue(first_item)
-
- initial_schedule_db = self.mock_ctx.datastores.schedule_store.get_schedule(first_item_db.schedule_id)
-
- # Pop
- self.scheduler.pop_item_from_queue()
-
- # Disable this schedule
- initial_schedule_db.enabled = False
- self.mock_ctx.datastores.schedule_store.update_schedule(initial_schedule_db)
-
- # Act
- second_item = first_item_db.model_copy()
- second_item.id = uuid.uuid4()
- second_item_db = self.scheduler.push_item_to_queue(second_item)
-
- with capture_logs() as cm:
- self.scheduler.post_push(second_item_db)
-
- self.assertIn("is disabled, not updating deadline", cm[-1].get("event"))
-
def test_post_push_schedule_update_schedule(self):
# Arrange
- first_item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1)
+ first_item = functions.create_task(
+ scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1
+ )
# Act
first_item_db = self.scheduler.push_item_to_queue(first_item)
@@ -294,10 +295,16 @@ def test_post_push_schedule_update_schedule(self):
def test_post_push_schedule_is_not_none(self):
"""When a schedule is provided, it should be used to set the deadline"""
# Arrange
- first_item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1)
+ first_item = functions.create_task(
+ scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1
+ )
schedule = models.Schedule(
- scheduler_id=self.scheduler.scheduler_id, schedule="0 0 * * *", hash=first_item.hash, data=first_item.data
+ scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
+ schedule="0 0 * * *",
+ hash=first_item.hash,
+ data=first_item.data,
)
schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
@@ -317,9 +324,16 @@ def test_post_push_schedule_is_not_none(self):
def test_post_push_schedule_is_none(self):
"""When a schedule is not provided, the deadline should be set to None"""
# Arrange
- first_item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1)
+ first_item = functions.create_task(
+ scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1
+ )
- schedule = models.Schedule(scheduler_id=self.scheduler.scheduler_id, hash=first_item.hash, data=first_item.data)
+ schedule = models.Schedule(
+ scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
+ hash=first_item.hash,
+ data=first_item.data,
+ )
schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
first_item.schedule_id = schedule_db.id
@@ -336,9 +350,16 @@ def test_post_push_schedule_auto_calculate_deadline(self):
# Arrange
self.scheduler.auto_calculate_deadline = True
- first_item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1)
+ first_item = functions.create_task(
+ scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1
+ )
- schedule = models.Schedule(scheduler_id=self.scheduler.scheduler_id, hash=first_item.hash, data=first_item.data)
+ schedule = models.Schedule(
+ scheduler_id=self.scheduler.scheduler_id,
+ organisation=self.organisation.id,
+ hash=first_item.hash,
+ data=first_item.data,
+ )
schedule_db = self.mock_ctx.datastores.schedule_store.create_schedule(schedule)
first_item.schedule_id = schedule_db.id
@@ -354,10 +375,8 @@ def test_post_push_schedule_auto_calculate_deadline(self):
def test_post_pop(self):
"""When a task is popped from the queue, it should be removed from the database"""
# Arrange
- item = functions.create_item(
- scheduler_id=self.scheduler.scheduler_id,
- priority=1,
- task=functions.create_task(self.scheduler.scheduler_id),
+ item = functions.create_task(
+ scheduler_id=self.scheduler.scheduler_id, organisation=self.organisation.id, priority=1
)
# Act
@@ -381,101 +400,3 @@ def test_post_pop(self):
task_db = self.mock_ctx.datastores.task_store.get_task(str(item.id))
self.assertEqual(task_db.id, item.id)
self.assertEqual(task_db.status, models.TaskStatus.DISPATCHED)
-
- def test_disable_scheduler(self):
- # Arrange: start scheduler
- self.scheduler.run()
-
- # Arrange: add tasks
- item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1)
- self.scheduler.push_item_to_queue(item)
-
- # Assert: task should be on priority queue
- pq_item = self.scheduler.queue.peek(0)
- self.assertEqual(1, self.scheduler.queue.qsize())
- self.assertEqual(pq_item.id, item.id)
-
- # Assert: task should be in datastore, and queued
- task_db = self.mock_ctx.datastores.task_store.get_task(str(item.id))
- self.assertEqual(task_db.id, item.id)
- self.assertEqual(task_db.status, models.TaskStatus.QUEUED)
-
- # Assert: listeners should be running
- self.assertGreater(len(self.scheduler.listeners), 0)
-
- # Assert: threads should be running
- self.assertGreater(len(self.scheduler.threads), 0)
-
- # Act
- self.scheduler.disable()
-
- # Listeners should be stopped
- self.assertEqual(0, len(self.scheduler.listeners))
-
- # Threads should be stopped
- self.assertEqual(0, len(self.scheduler.threads))
-
- # Queue should be empty
- self.assertEqual(0, self.scheduler.queue.qsize())
-
- # All tasks on queue should be set to CANCELLED
- tasks, _ = self.mock_ctx.datastores.task_store.get_tasks(self.scheduler.scheduler_id)
- for task in tasks:
- self.assertEqual(task.status, models.TaskStatus.CANCELLED)
-
- # Scheduler should be disabled
- self.assertFalse(self.scheduler.is_enabled())
-
- with self.assertRaises(NotAllowedError):
- self.scheduler.push_item_to_queue(item)
-
- def test_enable_scheduler(self):
- # Arrange: start scheduler
- self.scheduler.run()
-
- # Arrange: add tasks
- item = functions.create_item(scheduler_id=self.scheduler.scheduler_id, priority=1)
- self.scheduler.push_item_to_queue(item)
-
- # Assert: listeners should be running
- self.assertGreater(len(self.scheduler.listeners), 0)
-
- # Assert: threads should be running
- self.assertGreater(len(self.scheduler.threads), 0)
-
- # Disable scheduler first
- self.scheduler.disable()
-
- # Listeners should be stopped
- self.assertEqual(0, len(self.scheduler.listeners))
-
- # Threads should be stopped
- self.assertEqual(0, len(self.scheduler.threads))
-
- # Queue should be empty
- self.assertEqual(0, self.scheduler.queue.qsize())
-
- # All tasks on queue should be set to CANCELLED
- tasks, _ = self.mock_ctx.datastores.task_store.get_tasks(self.scheduler.scheduler_id)
- for task in tasks:
- self.assertEqual(task.status, models.TaskStatus.CANCELLED)
-
- # Re-enable scheduler
- self.scheduler.enable()
-
- # Threads should be started
- self.assertGreater(len(self.scheduler.threads), 0)
-
- # Scheduler should be enabled
- self.assertTrue(self.scheduler.is_enabled())
-
- # Push item to the queue
- self.scheduler.push_item_to_queue(item)
-
- # Assert: task should be on priority queue
- pq_item = self.scheduler.queue.peek(0)
- self.assertEqual(1, self.scheduler.queue.qsize())
- self.assertEqual(pq_item.id, item.id)
-
- # Stop the scheduler
- self.scheduler.stop()
diff --git a/mula/tests/integration/test_task_store.py b/mula/tests/integration/test_task_store.py
index c672fc78557..30d7cc81857 100644
--- a/mula/tests/integration/test_task_store.py
+++ b/mula/tests/integration/test_task_store.py
@@ -37,14 +37,14 @@ def tearDown(self):
self.dbconn.engine.dispose()
def test_create_task(self):
- task = functions.create_task(scheduler_id=self.organisation.id)
+ task = functions.create_task(scheduler_id=self.organisation.id, organisation=self.organisation.id)
created_task = self.mock_ctx.datastores.task_store.create_task(task)
self.assertIsNotNone(created_task)
def test_get_tasks(self):
# Arrange
for i in range(5):
- task = functions.create_task(scheduler_id=self.organisation.id)
+ task = functions.create_task(scheduler_id=self.organisation.id, organisation=self.organisation.id)
self.mock_ctx.datastores.task_store.create_task(task)
# Act
@@ -57,7 +57,7 @@ def test_get_tasks(self):
def get_tasks_by_type(self):
# Arrange
for i in range(5):
- task = functions.create_task(scheduler_id=self.organisation.id)
+ task = functions.create_task(scheduler_id=self.organisation.id, organisation=self.organisation.id)
self.mock_ctx.datastores.task_store.create_task(task)
# Act
@@ -74,7 +74,9 @@ def test_get_tasks_by_hash(self):
hashes = []
data = functions.create_test_model()
for i in range(5):
- task = functions.create_task(scheduler_id=self.organisation.id, data=data)
+ task = functions.create_task(
+ scheduler_id=self.organisation.id, organisation=self.organisation.id, data=data
+ )
self.mock_ctx.datastores.task_store.create_task(task)
hashes.append(task.hash)
@@ -89,7 +91,7 @@ def test_get_tasks_by_hash(self):
def test_get_task(self):
# Arrange
- task = functions.create_task(scheduler_id=self.organisation.id)
+ task = functions.create_task(scheduler_id=self.organisation.id, organisation=self.organisation.id)
created_task = self.mock_ctx.datastores.task_store.create_task(task)
# Act
@@ -103,7 +105,9 @@ def test_get_latest_task_by_hash(self):
hashes = []
data = functions.create_test_model()
for i in range(5):
- task = functions.create_task(scheduler_id=self.organisation.id, data=data)
+ task = functions.create_task(
+ scheduler_id=self.organisation.id, organisation=self.organisation.id, data=data
+ )
self.mock_ctx.datastores.task_store.create_task(task)
hashes.append(task.hash)
@@ -118,7 +122,7 @@ def test_get_latest_task_by_hash(self):
def test_update_task(self):
# Arrange
- task = functions.create_task(scheduler_id=self.organisation.id)
+ task = functions.create_task(scheduler_id=self.organisation.id, organisation=self.organisation.id)
created_task = self.mock_ctx.datastores.task_store.create_task(task)
# Act
@@ -131,7 +135,7 @@ def test_update_task(self):
def test_cancel_task(self):
# Arrange
- task = functions.create_task(scheduler_id=self.organisation.id)
+ task = functions.create_task(scheduler_id=self.organisation.id, organisation=self.organisation.id)
created_task = self.mock_ctx.datastores.task_store.create_task(task)
# Act
@@ -163,6 +167,7 @@ def test_get_status_counts(self):
data = functions.create_test_model()
task = models.Task(
scheduler_id=self.organisation.id,
+ organisation=self.organisation.id,
priority=1,
status=status,
type=functions.TestModel.type,
@@ -203,6 +208,7 @@ def test_get_status_count_per_hour(self):
data = functions.create_test_model()
task = models.Task(
scheduler_id=self.organisation.id,
+ organisation=self.organisation.id,
priority=1,
status=status,
type=functions.TestModel.type,
diff --git a/mula/tests/unit/test_queue.py b/mula/tests/unit/test_queue.py
index 2861d442257..c55e6a7947b 100644
--- a/mula/tests/unit/test_queue.py
+++ b/mula/tests/unit/test_queue.py
@@ -43,7 +43,7 @@ def _check_queue_empty(self):
def test_push(self):
"""When adding an item to the priority queue, the item should be
added"""
- item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
self.pq.push(item)
item_db = self.pq_store.get(self.pq.pq_id, item.id)
@@ -57,7 +57,7 @@ def test_push_item_not_found_in_db(self, mock_push):
"""When adding an item to the priority queue, but the item is not
found in the database, the item shouldn't be added.
"""
- item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
mock_push.return_value = None
@@ -84,7 +84,7 @@ def test_push_invalid_item(self):
"""When pushing an item that can not be validated, the item shouldn't
be pushed.
"""
- item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
item.data = {"invalid": "data"}
with self.assertRaises(InvalidItemError):
@@ -100,7 +100,7 @@ def test_push_replace_not_allowed(self):
self.pq.allow_replace = False
# Add an item to the queue
- initial_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ initial_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
self.pq.push(initial_item)
self.assertEqual(1, self.pq.qsize())
@@ -119,7 +119,7 @@ def test_push_replace_allowed(self):
self.pq.allow_replace = True
# Add an item to the queue
- initial_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ initial_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
self.pq.push(initial_item)
self.assertEqual(1, self.pq.qsize())
@@ -139,7 +139,7 @@ def test_push_updates_not_allowed(self):
self.pq.allow_updates = False
# Add an item to the queue
- initial_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ initial_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
self.pq.push(initial_item)
self.assertEqual(1, self.pq.qsize())
@@ -164,7 +164,7 @@ def test_push_updates_allowed(self):
self.pq.allow_updates = True
# Add an item to the queue
- initial_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ initial_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
self.pq.push(initial_item)
self.assertEqual(1, self.pq.qsize())
@@ -189,7 +189,7 @@ def test_push_priority_updates_not_allowed(self):
self.pq.allow_priority_updates = False
# Add an item to the queue
- initial_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ initial_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
self.pq.push(initial_item)
self.assertEqual(1, self.pq.qsize())
@@ -215,7 +215,7 @@ def test_push_priority_updates_allowed(self):
self.pq.allow_priority_updates = True
# Add an item to the queue
- initial_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ initial_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
self.pq.push(initial_item)
self.assertEqual(1, self.pq.qsize())
@@ -237,7 +237,7 @@ def test_remove_item(self):
removed, and the item should be removed from the entry_finder.
"""
# Add an item to the queue
- item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
self.pq.push(item)
self.assertEqual(1, self.pq.qsize())
@@ -255,11 +255,11 @@ def test_push_maxsize_not_allowed(self):
self.pq.maxsize = 1
# Add an item to the queue
- first_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ first_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
self.pq.push(first_item)
# Add another item to the queue
- second_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=2)
+ second_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=2)
with self.assertRaises(_queue.Full):
self.pq.push(second_item)
@@ -280,11 +280,11 @@ def test_push_maxsize_allowed(self):
self.pq.maxsize = 0
# Add an item to the queue
- first_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ first_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
self.pq.push(first_item)
# Add another item to the queue
- second_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=2)
+ second_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=2)
self.pq.push(second_item)
# The queue should now have 2 items
@@ -310,11 +310,11 @@ def test_push_maxsize_allowed_high_priority(self):
self.pq.maxsize = 1
# Add an item to the queue
- first_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ first_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
self.pq.push(first_item)
# Add another item to the queue
- second_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ second_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
self.pq.push(second_item)
# The queue should now have 2 items
@@ -340,11 +340,11 @@ def test_push_maxsize_not_allowed_low_priority(self):
self.pq.maxsize = 1
# Add an item to the queue
- first_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ first_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
self.pq.push(first_item)
# Add another item to the queue
- second_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=2)
+ second_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=2)
with self.assertRaises(_queue.Full):
self.pq.push(second_item)
@@ -362,15 +362,15 @@ def test_pop(self):
it from the queue.
"""
# Add an item to the queue
- first_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ first_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
self.pq.push(first_item)
# The queue should now have 1 item
self.assertEqual(1, self.pq.qsize())
# Pop the item
- popped_item = self.pq.pop()
- self.assertEqual(first_item.data, popped_item.data)
+ popped_items, _ = self.pq.pop()
+ self.assertEqual(first_item.data, popped_items[0].data)
# The queue should now be empty
self.assertEqual(0, self.pq.qsize())
@@ -380,8 +380,8 @@ def test_pop_with_lock(self):
thread to pop an item.
"""
# Arrange
- first_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
- second_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ first_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
+ second_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
self.pq.push(first_item)
self.pq.push(second_item)
@@ -392,21 +392,21 @@ def test_pop_with_lock(self):
# it will set a timeout so we can test the lock.
def first_pop(event):
with self.pq.lock:
- item = self.pq_store.pop(self.pq.pq_id, None)
+ items, _ = self.pq_store.pop(self.pq.pq_id, None)
event.set()
time.sleep(5)
- self.pq_store.remove(self.pq.pq_id, item.id)
+ self.pq_store.remove(self.pq.pq_id, items[0].id)
- queue.put(item)
+ queue.put(items[0])
def second_pop(event):
# Wait for thread 1 to set the event before continuing
event.wait()
- item = self.pq.pop()
- queue.put(item)
+ items, _ = self.pq.pop()
+ queue.put(items[0])
# Act; with thread 1 we will create a lock on the queue, and then with
# thread 2 we try to pop an item while the lock is active.
@@ -430,8 +430,8 @@ def test_pop_without_lock(self):
NOTE: Here we test the procedure when a lock isn't set.
"""
# Arrange
- first_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
- second_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ first_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
+ second_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
self.pq.push(first_item)
self.pq.push(second_item)
@@ -441,21 +441,21 @@ def test_pop_without_lock(self):
# This function is similar to the pop() function of the queue, but
# it will set a timeout. We have omitted the lock here.
def first_pop(event):
- item = self.pq_store.pop(self.pq.pq_id, None)
+ items, _ = self.pq_store.pop(self.pq.pq_id, None)
event.set()
time.sleep(5)
- self.pq_store.remove(self.pq.pq_id, item.id)
+ self.pq_store.remove(self.pq.pq_id, items[0].id)
- queue.put(item)
+ queue.put(items[0])
def second_pop(event):
# Wait for thread 1 to set the event before continuing
event.wait()
- item = self.pq.pop()
- queue.put(item)
+ items, _ = self.pq.pop()
+ queue.put(items[0])
# Act; with thread 1 we won't create a lock, and then with thread 2 we
# try to pop an item while the timeout is active.
@@ -484,26 +484,26 @@ def test_pop_highest_priority(self):
priority
"""
# Add an item to the queue
- first_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ first_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
self.pq.push(first_item)
# Add another item to the queue
- second_item = functions.create_item(scheduler_id=self.pq.pq_id, priority=2)
+ second_item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=2)
self.pq.push(second_item)
# The queue should now have 2 items
self.assertEqual(2, self.pq.qsize())
# Pop the item
- popped_item = self.pq.pop()
- self.assertEqual(first_item.priority, popped_item.priority)
+ popped_items, _ = self.pq.pop()
+ self.assertEqual(first_item.priority, popped_items[0].priority)
def test_is_item_on_queue(self):
"""When checking if an item is on the queue, it should return True if
the item is on the queue, and False if it isn't.
"""
# Add an item to the queue
- item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
self.pq.push(item)
# Check if the item is on the queue
@@ -514,7 +514,7 @@ def test_is_item_not_on_queue(self):
the item is on the queue, and False if it isn't.
"""
# Add an item to the queue
- item = functions.create_item(scheduler_id=self.pq.pq_id, priority=1)
+ item = functions.create_task(scheduler_id=self.pq.pq_id, organisation=self.pq.pq_id, priority=1)
# Check if the item is on the queue
self.assertFalse(self.pq.is_item_on_queue(item))
diff --git a/mula/tests/utils/functions.py b/mula/tests/utils/functions.py
index 8eeeb875d2d..506cbf0bf52 100644
--- a/mula/tests/utils/functions.py
+++ b/mula/tests/utils/functions.py
@@ -34,23 +34,11 @@ def create_test_model() -> TestModel:
return TestModel(id=uuid.uuid4().hex, name=uuid.uuid4().hex)
-def create_task_in(priority: int, data: TestModel | None = None) -> str:
+def create_task_in(priority: int, organisation: str, data: TestModel | None = None) -> str:
if data is None:
data = TestModel(id=uuid.uuid4().hex, name=uuid.uuid4().hex)
- return json.dumps({"priority": priority, "data": data.model_dump()})
-
-
-def create_item(scheduler_id: str, priority: int, task: models.Task | None = None) -> models.Task:
- if task is None:
- task = create_task(scheduler_id)
-
- item = models.Task(**task.model_dump())
-
- if priority is not None:
- item.priority = priority
-
- return item
+ return json.dumps({"priority": priority, "organisation": organisation, "data": data.model_dump()})
def create_schedule(scheduler_id: str, data: Any | None = None) -> models.Schedule:
@@ -58,11 +46,18 @@ def create_schedule(scheduler_id: str, data: Any | None = None) -> models.Schedu
return models.Schedule(scheduler_id=scheduler_id, hash=item.hash, data=item.model_dump())
-def create_task(scheduler_id: str, data: Any | None = None) -> models.Task:
+def create_task(scheduler_id: str, organisation: str, priority: int = 0, data: Any | None = None) -> models.Task:
if data is None:
data = TestModel(id=uuid.uuid4().hex, name=uuid.uuid4().hex)
- return models.Task(scheduler_id=scheduler_id, type=TestModel.type, hash=data.hash, data=data.model_dump())
+ return models.Task(
+ scheduler_id=scheduler_id,
+ organisation=organisation,
+ priority=priority,
+ type=TestModel.type,
+ hash=data.hash,
+ data=data.model_dump(),
+ )
def create_boefje() -> models.Boefje:
diff --git a/octopoes/octopoes/core/service.py b/octopoes/octopoes/core/service.py
index ce019172c25..1793bfd0269 100644
--- a/octopoes/octopoes/core/service.py
+++ b/octopoes/octopoes/core/service.py
@@ -264,21 +264,19 @@ def recalculate_scan_profiles(self, valid_time: datetime) -> None:
# fetch all scan profiles
all_scan_profiles = self.scan_profile_repository.list_scan_profiles(None, valid_time=valid_time)
- # cache all declared
- all_declared_scan_profiles = {
- scan_profile for scan_profile in all_scan_profiles if isinstance(scan_profile, DeclaredScanProfile)
- }
- # cache all inherited
- inherited_scan_profiles = {
- scan_profile.reference: scan_profile
- for scan_profile in all_scan_profiles
- if isinstance(scan_profile, InheritedScanProfile)
- }
-
- # track all scan level assignments
- assigned_scan_levels: dict[Reference, ScanLevel] = {
- scan_profile.reference: scan_profile.level for scan_profile in all_declared_scan_profiles
- }
+ all_declared_scan_profiles: set[DeclaredScanProfile] = set()
+ inherited_scan_profiles: dict[Reference, InheritedScanProfile] = {}
+ assigned_scan_levels: dict[Reference, ScanLevel] = {}
+ source_scan_profile_references: set[Reference] = set()
+
+ # fill profile caches
+ for scan_profile in all_scan_profiles:
+ if isinstance(scan_profile, DeclaredScanProfile):
+ all_declared_scan_profiles.add(scan_profile)
+ assigned_scan_levels[scan_profile.reference] = scan_profile.level
+ source_scan_profile_references.add(scan_profile.reference)
+ elif isinstance(scan_profile, InheritedScanProfile):
+ inherited_scan_profiles[scan_profile.reference] = scan_profile
for current_level in range(4, 0, -1):
# start point: all scan profiles with current level + all higher scan levels
@@ -331,7 +329,6 @@ def recalculate_scan_profiles(self, valid_time: datetime) -> None:
# Save all assigned scan levels
update_count = 0
- source_scan_profile_references = {sp.reference for sp in all_declared_scan_profiles}
for reference, scan_level in assigned_scan_levels.items():
# Skip source scan profiles
if reference in source_scan_profile_references:
diff --git a/octopoes/octopoes/events/manager.py b/octopoes/octopoes/events/manager.py
index dc935b4c8a4..b5600da071e 100644
--- a/octopoes/octopoes/events/manager.py
+++ b/octopoes/octopoes/events/manager.py
@@ -26,6 +26,7 @@ class ScanProfileMutation(BaseModel):
operation: OperationType
primary_key: str
value: AbstractOOI | None = None
+ client_id: str
thread_local = threading.local()
@@ -126,7 +127,9 @@ def _publish(self, event: DBEvent) -> None:
)
# publish mutations
- mutation = ScanProfileMutation(operation=event.operation_type, primary_key=event.primary_key)
+ mutation = ScanProfileMutation(
+ operation=event.operation_type, primary_key=event.primary_key, client_id=event.client
+ )
if event.operation_type != OperationType.DELETE:
mutation.value = AbstractOOI(
@@ -137,7 +140,7 @@ def _publish(self, event: DBEvent) -> None:
self.channel.basic_publish(
"",
- f"{event.client}__scan_profile_mutations",
+ "scan_profile_mutations",
mutation.model_dump_json().encode(),
properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent),
)
@@ -165,4 +168,4 @@ def _try_connect(self):
def _connect(self) -> None:
self.channel = self.channel_factory(self.queue_uri)
self.channel.queue_declare(queue=f"{self.client}__scan_profile_increments", durable=True)
- self.channel.queue_declare(queue=f"{self.client}__scan_profile_mutations", durable=True)
+ self.channel.queue_declare(queue="scan_profile_mutations", durable=True)
diff --git a/octopoes/octopoes/repositories/scan_profile_repository.py b/octopoes/octopoes/repositories/scan_profile_repository.py
index c27954e0009..b8af824bc15 100644
--- a/octopoes/octopoes/repositories/scan_profile_repository.py
+++ b/octopoes/octopoes/repositories/scan_profile_repository.py
@@ -15,6 +15,8 @@
from octopoes.xtdb.client import XTDBSession
from octopoes.xtdb.query_builder import generate_pull_query
+scan_profile_adapter = TypeAdapter(ScanProfile)
+
class ScanProfileRepository(Repository):
def __init__(self, event_manager: EventManager):
@@ -62,7 +64,7 @@ def serialize(cls, scan_profile: ScanProfile) -> dict[str, Any]:
@classmethod
def deserialize(cls, data: dict[str, Any]) -> ScanProfileBase:
- return TypeAdapter(ScanProfile).validate_python(data)
+ return scan_profile_adapter.validate_python(data)
def list_scan_profiles(self, scan_profile_type: str | None, valid_time: datetime) -> list[ScanProfileBase]:
where = {"type": self.object_type}
diff --git a/octopoes/tests/robot/robot.resource b/octopoes/tests/robot/robot.resource
index 44c41a9fe18..3109ffc5c53 100644
--- a/octopoes/tests/robot/robot.resource
+++ b/octopoes/tests/robot/robot.resource
@@ -18,7 +18,7 @@ ${RABBIT_MQ_API_URI} http://ci_user:ci_pass@localhost:29003/api
${VALID_TIME} 2022-01-01T00:00:00+00:00
${SCAN_PROFILE_INCREMENT_QUEUE} _dev__scan_profile_increments
-${SCAN_PROFILE_MUTATION_QUEUE} _dev__scan_profile_mutations
+${SCAN_PROFILE_MUTATION_QUEUE} scan_profile_mutations
*** Keywords ***
@@ -73,7 +73,7 @@ Wait For XTDB Synced
Get All Document Ids
${query} Set Variable {:query {:find [?e] :where [[?e :xt/id]]}}
- ${headers} Create Dictionary Content-Type=application/edn Accept=application/json
+ ${headers} Create Dictionary Content-Type=application/end Accept=application/json
${response} Post ${XTDB_URI}/query data=${query} headers=${headers}
${rows} Set Variable ${response.json()}
${ids} Create List
diff --git a/octopoes/tests/test_event_manager.py b/octopoes/tests/test_event_manager.py
index 3d99c4f62c7..b3c5f938eff 100644
--- a/octopoes/tests/test_event_manager.py
+++ b/octopoes/tests/test_event_manager.py
@@ -77,10 +77,10 @@ def test_event_manager_create_empty_scan_profile(mocker, empty_scan_profile):
channel_mock.basic_publish.assert_called_once_with(
"",
- "test__scan_profile_mutations",
+ "scan_profile_mutations",
b'{"operation":"create","primary_key":"test|reference","value":{"primary_key":"test|reference",'
b'"object_type":"test","scan_profile":{"scan_profile_type":"empty","reference":"test|reference",'
- b'"level":0,"user_id":null}}}',
+ b'"level":0,"user_id":null}},"client_id":"test"}',
properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent),
)
@@ -134,12 +134,12 @@ def test_event_manager_create_declared_scan_profile(mocker, declared_scan_profil
),
mocker.call(
"",
- "test__scan_profile_mutations",
+ "scan_profile_mutations",
b'{"operation": "create", "primary_key": "test|reference", '
b'"value": {"primary_key": "test|reference", '
b'"object_type": "test", '
b'"scan_profile": {"scan_profile_type": "declared", "reference": "test|reference",\
- "level": 2, "user_id": None}}}',
+ "level": 2, "user_id": None}}, "client_id": "test"}',
properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent),
),
)
@@ -179,7 +179,7 @@ def test_event_manager_delete_empty_scan_profile(mocker, empty_scan_profile):
channel_mock.basic_publish.assert_called_once_with(
"",
- "test__scan_profile_mutations",
- b'{"operation":"delete","primary_key":"test|reference","value":null}',
+ "scan_profile_mutations",
+ b'{"operation":"delete","primary_key":"test|reference","value":null,"client_id":"test"}',
properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent),
)
diff --git a/rocky/assets/css/themes/soft/manon/collapsing-element.scss b/rocky/assets/css/themes/soft/manon/collapsing-element.scss
index ecf2afda0d2..4a11e20bd62 100644
--- a/rocky/assets/css/themes/soft/manon/collapsing-element.scss
+++ b/rocky/assets/css/themes/soft/manon/collapsing-element.scss
@@ -9,6 +9,29 @@ body header nav.collapsible {
.collapsing-element {
position: relative;
+ form {
+ &.inline {
+ width: 100%;
+ }
+
+ button {
+ background: transparent;
+ border-radius: 0;
+ border: 0;
+ font-weight: normal;
+ width: 100%;
+ max-width: 100%;
+ height: var(--header-navigation-button-min-height);
+ padding-top: var(--collapsing-element-list-item-link-padding-top);
+ padding-right: var(--collapsing-element-list-item-link-padding-right);
+ padding-bottom: var(--collapsing-element-list-item-link-padding-bottom);
+ padding-left: var(--collapsing-element-list-item-link-padding-left);
+ color: var(--collapsing-element-list-item-link-text-color);
+ justify-content: flex-start;
+ line-height: var(--header-navigation-link-line-height);
+ }
+ }
+
.collapsible {
position: static;
}
diff --git a/rocky/onboarding/views.py b/rocky/onboarding/views.py
index cfc82434c2c..4aadb196b45 100644
--- a/rocky/onboarding/views.py
+++ b/rocky/onboarding/views.py
@@ -39,7 +39,6 @@
)
from rocky.exceptions import RockyError
from rocky.messaging import clearance_level_warning_dns_report
-from rocky.scheduler import scheduler_client
from rocky.views.indemnification_add import IndemnificationAddView
from rocky.views.ooi_view import SingleOOIMixin, SingleOOITreeMixin
from rocky.views.scheduler import SchedulerView
@@ -327,17 +326,12 @@ class OnboardingSetupScanOOIDetailView(
permission_required = "tools.can_scan_organization"
task_type = "report"
- @staticmethod
- def is_scheduler_enabled(organization: Organization) -> bool:
- scheduler_id = f"report-{organization.code}"
- return scheduler_client(organization.code).is_scheduler_ready(scheduler_id)
-
def post(self, request, *args, **kwargs):
report_name_format = self.get_initial_report_name()
parent_report_type = self.get_parent_report_type()
report_recipe = self.create_report_recipe(report_name_format, parent_report_type, None)
- if self.is_scheduler_enabled(self.organization):
- self.create_report_schedule(report_recipe, datetime.now(timezone.utc) + timedelta(minutes=2))
+
+ self.create_report_schedule(report_recipe, datetime.now(timezone.utc) + timedelta(minutes=2))
return redirect(
reverse("step_report", kwargs={"organization_code": self.organization.code})
diff --git a/rocky/reports/runner/worker.py b/rocky/reports/runner/worker.py
index ecdff5d3261..ffcf880c369 100644
--- a/rocky/reports/runner/worker.py
+++ b/rocky/reports/runner/worker.py
@@ -8,7 +8,6 @@
import structlog
from django.conf import settings
from httpx import HTTPError
-from pydantic import ValidationError
from reports.runner.models import ReportRunner, WorkerManager
from reports.runner.report_runner import LocalReportRunner
@@ -76,57 +75,33 @@ def _fill_queue(self, task_queue: Queue):
return
try:
- queues = self.scheduler.get_queues()
+ p_item = self.scheduler.pop_item("report")
except HTTPError:
- # Scheduler is having issues, so make note of it and try again
- logger.exception("Getting the queues from the scheduler failed")
- time.sleep(self.poll_interval) # But not immediately
+ logger.exception("Popping task from scheduler failed")
+ time.sleep(self.poll_interval)
return
- # We do not target a specific queue since we start one runtime for all organisations
- # and queue ids contain the organisation_id
- queues = [q for q in queues if q.id.startswith("report") and q.size > 0]
-
- logger.debug("Found queues: %s", [queue.id for queue in queues])
-
- all_queues_empty = True
-
- for queue in queues:
- logger.debug("Popping from queue %s", queue.id)
-
- try:
- p_item = self.scheduler.pop_item(queue.id)
- except (HTTPError, ValidationError):
- logger.error("Popping task from scheduler failed")
- time.sleep(self.poll_interval)
- continue
-
- if not p_item:
- logger.debug("Queue %s empty", queue.id)
- continue
+ if not p_item:
+ logger.debug("Queue empty, sleeping %f seconds", self.poll_interval)
+ time.sleep(self.poll_interval)
+ return
- all_queues_empty = False
+ logger.info("Handling task[%s]", p_item.id)
- logger.info("Handling task[%s]", p_item.id)
+ try:
+ task_queue.put(p_item)
+ logger.info("Dispatched task[%s]", p_item.id)
+ except: # noqa
+ logger.error("Exiting worker...")
+ logger.info("Patching scheduler task[id=%s] to %s", p_item.id, TaskStatus.FAILED.value)
try:
- task_queue.put(p_item)
- logger.info("Dispatched task[%s]", p_item.id)
- except: # noqa
- logger.error("Exiting worker...")
- logger.info("Patching scheduler task[id=%s] to %s", p_item.id, TaskStatus.FAILED.value)
-
- try:
- self.scheduler.patch_task(p_item.id, TaskStatus.FAILED)
- logger.info("Set task status to %s in the scheduler for task[id=%s]", TaskStatus.FAILED, p_item.id)
- except HTTPError:
- logger.error("Could not patch scheduler task to %s", TaskStatus.FAILED.value)
-
- raise
+ self.scheduler.patch_task(p_item.id, TaskStatus.FAILED)
+ logger.info("Set task status to %s in the scheduler for task[id=%s]", TaskStatus.FAILED, p_item.id)
+ except HTTPError:
+ logger.error("Could not patch scheduler task to %s", TaskStatus.FAILED.value)
- if all_queues_empty:
- logger.debug("All queues empty, sleeping %f seconds", self.poll_interval)
- time.sleep(self.poll_interval)
+ raise
def _check_workers(self) -> None:
new_workers = []
diff --git a/rocky/reports/templates/report_overview/report_history_table.html b/rocky/reports/templates/report_overview/report_history_table.html
index c3d4e94f680..a8f6674ef54 100644
--- a/rocky/reports/templates/report_overview/report_history_table.html
+++ b/rocky/reports/templates/report_overview/report_history_table.html
@@ -87,24 +87,24 @@
{% for report in reports %}
- {% if report.total_asset_reports >= 1 %}
-
-
diff --git a/rocky/reports/viewsets.py b/rocky/reports/viewsets.py
index 6d055cb9b43..e75fc302927 100644
--- a/rocky/reports/viewsets.py
+++ b/rocky/reports/viewsets.py
@@ -148,7 +148,8 @@ def perform_create(self, serializer: ReportRecipeSerializer) -> None:
deadline_at = datetime.now(timezone.utc).date().isoformat()
schedule_request = ScheduleRequest(
- scheduler_id=f"report-{self.organization.code}",
+ scheduler_id="report",
+ organisation=self.organization.code,
data=report_task,
schedule=report_recipe.cron_expression,
deadline_at=deadline_at,
diff --git a/rocky/rocky/locale/de/LC_MESSAGES/django.po b/rocky/rocky/locale/de/LC_MESSAGES/django.po
index 23d11cb032c..68ec54d186b 100644
--- a/rocky/rocky/locale/de/LC_MESSAGES/django.po
+++ b/rocky/rocky/locale/de/LC_MESSAGES/django.po
@@ -1283,8 +1283,12 @@ msgstr ""
msgid "Age"
msgstr ""
-#: katalogus/templates/plugin_container_image.html tools/forms/boefje.py
-msgid "Scan frequency"
+#: katalogus/templates/plugin_container_image.html
+msgid "Scan interval"
+msgstr ""
+
+#: katalogus/templates/plugin_container_image.html
+msgid "Run on"
msgstr ""
#: katalogus/templates/plugin_container_image.html
@@ -4766,10 +4770,32 @@ msgstr ""
msgid "Output mime types"
msgstr ""
+#: tools/forms/boefje.py
+msgid "Scan type"
+msgstr ""
+
+#: tools/forms/boefje.py
+msgid "Interval amount"
+msgstr ""
+
#: tools/forms/boefje.py
msgid ""
-"Specify the scanning frequency for this Boefje in minutes. The default is 24 "
-"hours. For example: 5 minutes will let the boefje scan every 5 minutes."
+"Specify the scanning interval for this Boefje. The default is 24 hours. For "
+"example: 5 minutes will let the Boefje scan every 5 minutes."
+msgstr ""
+
+#: tools/forms/boefje.py
+msgid "Interval frequency"
+msgstr ""
+
+#: tools/forms/boefje.py
+msgid "Object creation/change"
+msgstr ""
+
+#: tools/forms/boefje.py
+msgid ""
+"Choose weather a the Boefje should run after creating and/or changing an "
+"object. "
msgstr ""
#: tools/forms/finding_type.py
@@ -5111,6 +5137,12 @@ msgid ""
"
-
-
-
- {{ report.report.name }}
-
-
-