diff --git a/docker-compose.yaml b/docker-compose.yaml
index 7882bf306..d1941626a 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -105,8 +105,10 @@ services:
       - AWS_ENDPOINT_URL=http://localhost:4566
       - S3_BUCKET=lumigator-storage
       - PYTHONPATH=/mzai/lumigator/python/mzai/backend
-      - PIP_REQS=/mzai/lumigator/python/mzai/evaluator/requirements.txt
-      - EVALUATOR_WORK_DIR=/mzai/lumigator/python/mzai/evaluator
+      - EVALUATOR_PIP_REQS=/mzai/lumigator/python/mzai/jobs/evaluator/requirements.txt
+      - EVALUATOR_WORK_DIR=/mzai/lumigator/python/mzai/jobs/evaluator
+      - INFERENCE_PIP_REQS=/mzai/lumigator/python/mzai/jobs/inference/requirements.txt
+      - INFERENCE_WORK_DIR=/mzai/lumigator/python/mzai/jobs/inference
       - RAY_DASHBOARD_PORT=8265
       - RAY_HEAD_NODE_HOST=ray
       - MISTRAL_API_KEY=${MISTRAL_API_KEY}
diff --git a/lumigator/python/mzai/backend/.python-version b/lumigator/python/mzai/backend/.python-version
index 2c0733315..2419ad5b0 100644
--- a/lumigator/python/mzai/backend/.python-version
+++ b/lumigator/python/mzai/backend/.python-version
@@ -1 +1 @@
-3.11
+3.11.9
diff --git a/lumigator/python/mzai/backend/backend/api/routes/jobs.py b/lumigator/python/mzai/backend/backend/api/routes/jobs.py
index 77c94ce17..c4a366ae0 100644
--- a/lumigator/python/mzai/backend/backend/api/routes/jobs.py
+++ b/lumigator/python/mzai/backend/backend/api/routes/jobs.py
@@ -14,14 +14,15 @@
 router = APIRouter()
 
 
-@router.post("/inference", status_code=status.HTTP_201_CREATED)
+@router.post("/inference/", status_code=status.HTTP_201_CREATED)
 def create_inference_job(
     service: JobServiceDep,
     request: JobCreate,
 ) -> JobResponse:
     return service.create_inference_job(request)
 
-@router.post("/evaluate", status_code=status.HTTP_201_CREATED)
+
+@router.post("/evaluate/", status_code=status.HTTP_201_CREATED)
 def create_evaluation_job(
     service: JobServiceDep,
     request: JobCreate,
diff --git a/lumigator/python/mzai/backend/backend/services/jobs.py b/lumigator/python/mzai/backend/backend/services/jobs.py
index 002f0bb76..d7c0f7c6e 100644
--- a/lumigator/python/mzai/backend/backend/services/jobs.py
+++ b/lumigator/python/mzai/backend/backend/services/jobs.py
@@ -66,9 +66,7 @@ def _get_results_s3_key(self, job_id: UUID) -> str:
 
         return str(
             Path(settings.S3_JOB_RESULTS_PREFIX)
-            / settings.S3_JOB_RESULTS_FILENAME.format(
-                job_name=record.name, job_id=record.id
-            )
+            / settings.S3_JOB_RESULTS_FILENAME.format(job_name=record.name, job_id=record.id)
         )
 
     def create_inference_job(self, request: JobCreate) -> JobResponse:
@@ -80,7 +78,7 @@ def create_inference_job(self, request: JobCreate) -> JobResponse:
         dataset_s3_path = self.data_service.get_dataset_s3_path(request.dataset)
 
         # set storage path
-        storage_path = f"s3://{Path(settings.S3_BUCKET) / settings.S3_JOB_RESULTS_PREFIX}/"
+        storage_path = f"s3://{ Path(settings.S3_BUCKET) / settings.S3_JOB_RESULTS_PREFIX }/"
 
         # fill up model url with default openai url
         if request.model.startswith("oai://"):
@@ -105,7 +103,6 @@ def create_inference_job(self, request: JobCreate) -> JobResponse:
             "system_prompt": request.system_prompt,
         }
 
-
         # load a config template and fill it up with config_params
         if request.config_infer_template is not None:
             config_template = request.config_infer_template
@@ -121,16 +118,13 @@ def create_inference_job(self, request: JobCreate) -> JobResponse:
             "--config": config_template.format(**config_params),
         }
 
-        #TODO Add inference module as entrypoint
-        infer_command = f"{settings.LD_PRELOAD_PREFIX} python -m inference infer huggingface"
-
         # Prepare the job configuration that will be sent to submit the ray job.
         # This includes both the command that is going to be executed and its
         # arguments defined in infer_config_args
         ray_config = JobConfig(
             job_id=record.id,
             job_type=JobType.INFERENCE,
-            command=infer_command,
+            command=settings.INFERENCE_COMMAND,
             args=infer_config_args,
         )
 
@@ -145,8 +139,8 @@ def create_inference_job(self, request: JobCreate) -> JobResponse:
             worker_gpus = settings.RAY_WORKER_GPUS
 
         runtime_env = {
-            "pip": settings.PIP_REQS,
-            "working_dir": settings.EVALUATOR_WORK_DIR,
+            "pip": settings.INFERENCE_PIP_REQS,
+            "working_dir": settings.INFERENCE_WORK_DIR,
             "env_vars": runtime_env_vars,
         }
 
@@ -215,20 +209,13 @@ def create_evaluation_job(self, request: JobCreate) -> JobResponse:
             "--config": config_template.format(**config_params),
         }
 
-        # Pre-loading libgomp with LD_PRELOAD resolves allocation issues on aarch64
-        # (see https://github.com/mozilla-ai/lumigator/issues/156). The path where
-        # libs are stored on worker nodes contains a hash that depends on the
-        # installed libraries, so we get it dynamically right before running the
-        # command (more info in settings.py)
-        eval_command = f"{settings.LD_PRELOAD_PREFIX} python -m evaluator evaluate huggingface"
-
         # Prepare the job configuration that will be sent to submit the ray job.
         # This includes both the command that is going to be executed and its
         # arguments defined in eval_config_args
         ray_config = JobConfig(
             job_id=record.id,
             job_type=JobType.EVALUATION,
-            command=eval_command,
+            command=settings.EVALUATOR_COMMAND,
             args=eval_config_args,
         )
 
@@ -243,7 +230,7 @@ def create_evaluation_job(self, request: JobCreate) -> JobResponse:
             worker_gpus = settings.RAY_WORKER_GPUS
 
         runtime_env = {
-            "pip": settings.PIP_REQS,
+            "pip": settings.EVALUATOR_PIP_REQS,
             "working_dir": settings.EVALUATOR_WORK_DIR,
             "env_vars": runtime_env_vars,
         }
@@ -298,9 +285,7 @@ def get_job_result(self, job_id: UUID) -> JobResultResponse:
             )
         return JobResultResponse.model_validate(result_record)
 
-    def get_job_result_download(
-        self, job_id: UUID
-    ) -> JobResultDownloadResponse:
+    def get_job_result_download(self, job_id: UUID) -> JobResultDownloadResponse:
         """Return job results file URL for downloading."""
         # Generate presigned download URL for the object
         result_key = self._get_results_s3_key(job_id)
diff --git a/lumigator/python/mzai/backend/backend/settings.py b/lumigator/python/mzai/backend/backend/settings.py
index 2e97f59d0..aaf134b4c 100644
--- a/lumigator/python/mzai/backend/backend/settings.py
+++ b/lumigator/python/mzai/backend/backend/settings.py
@@ -42,8 +42,24 @@ class BackendSettings(BaseSettings):
     MISTRAL_API_URL: str = "https://api.mistral.ai/v1"
     DEFAULT_SUMMARIZER_PROMPT: str = "You are a helpful assistant, expert in text summarization. For every prompt you receive, provide a summary of its contents in at most two sentences."  # noqa: E501
 
-    # Eval
+    # Eval job details
     EVALUATOR_WORK_DIR: str | None = None
+    EVALUATOR_PIP_REQS: str | None = None
+
+    @computed_field
+    @property
+    def EVALUATOR_COMMAND(self) -> str:  # noqa: N802
+        """Returns the command required to run evaluator.
+
+        The prefix is provided to fix an issue loading libgomp (an sklearn dependency)
+        on the aarch64 ray image (see LD_PRELOAD_PREFIX definition below for more details)
+        """
+        return f"{self.LD_PRELOAD_PREFIX} python -m evaluator evaluate huggingface"
+
+    # Inference job details
+    INFERENCE_WORK_DIR: str | None = None
+    INFERENCE_PIP_REQS: str | None = None
+    INFERENCE_COMMAND: str = "python inference.py"
 
     def inherit_ray_env(self, runtime_env_vars: Mapping[str, str]):
         for env_var_name in self.RAY_WORKER_ENV_VARS:
@@ -83,8 +99,6 @@ def RAY_WORKER_GPUS(self) -> float:  # noqa: N802
     def RAY_WORKER_GPUS_FRACTION(self) -> float:  # noqa: N802
         return float(os.environ.get(self.RAY_WORKER_GPUS_FRACTION_ENV_VAR, 1.0))
 
-    PIP_REQS: str | None = None
-
     @computed_field
     @property
     def RAY_DASHBOARD_URL(self) -> str:  # noqa: N802
diff --git a/lumigator/python/mzai/backend/backend/tests/api/routes/test_api_workflows.py b/lumigator/python/mzai/backend/backend/tests/api/routes/test_api_workflows.py
index a7a408c97..67003c91f 100644
--- a/lumigator/python/mzai/backend/backend/tests/api/routes/test_api_workflows.py
+++ b/lumigator/python/mzai/backend/backend/tests/api/routes/test_api_workflows.py
@@ -10,17 +10,19 @@
 
 @app.on_event("startup")
 def test_health_ok(local_client: TestClient):
-        response = local_client.get("/health/")
-        assert response.status_code == 200
+    response = local_client.get("/health/")
+    assert response.status_code == 200
+
 
 def test_upload_data_launch_job(local_client: TestClient, dialog_dataset):
     response = local_client.get("/health")
     assert response.status_code == 200
 
-    create_response = local_client.post("/datasets",
-            data={},
-            files={"dataset": dialog_dataset, "format": (None, DatasetFormat.JOB.value)},
-        )
+    create_response = local_client.post(
+        "/datasets/",
+        data={},
+        files={"dataset": dialog_dataset, "format": (None, DatasetFormat.JOB.value)},
+    )
 
     assert create_response.status_code == 201
 
@@ -40,9 +42,15 @@ def test_upload_data_launch_job(local_client: TestClient, dialog_dataset):
         "config_template": "string",
     }
 
-    create_experiment_response = local_client.post("/jobs/evaluate", headers=headers, json=payload
+    create_evaluation_job_response = local_client.post(
+        "/jobs/evaluate/", headers=headers, json=payload
+    )
+    assert create_evaluation_job_response.status_code == 201
+
+    create_inference_job_response = local_client.post(
+        "/jobs/inference/", headers=headers, json=payload
     )
-    assert create_experiment_response.status_code == 201
+    assert create_inference_job_response.status_code == 201
 
 
 def test_experiment_non_existing(local_client: TestClient):
diff --git a/lumigator/python/mzai/evaluator/README.md b/lumigator/python/mzai/jobs/evaluator/README.md
similarity index 100%
rename from lumigator/python/mzai/evaluator/README.md
rename to lumigator/python/mzai/jobs/evaluator/README.md
diff --git a/lumigator/python/mzai/evaluator/evaluator/__init__.py b/lumigator/python/mzai/jobs/evaluator/evaluator/__init__.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/__init__.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/__init__.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/__main__.py b/lumigator/python/mzai/jobs/evaluator/evaluator/__main__.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/__main__.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/__main__.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/configs/__init__.py b/lumigator/python/mzai/jobs/evaluator/evaluator/configs/__init__.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/configs/__init__.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/configs/__init__.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/configs/common.py b/lumigator/python/mzai/jobs/evaluator/evaluator/configs/common.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/configs/common.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/configs/common.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/configs/huggingface.py b/lumigator/python/mzai/jobs/evaluator/evaluator/configs/huggingface.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/configs/huggingface.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/configs/huggingface.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/configs/jobs/__init__.py b/lumigator/python/mzai/jobs/evaluator/evaluator/configs/jobs/__init__.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/configs/jobs/__init__.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/configs/jobs/__init__.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/configs/jobs/common.py b/lumigator/python/mzai/jobs/evaluator/evaluator/configs/jobs/common.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/configs/jobs/common.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/configs/jobs/common.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/configs/jobs/hf_evaluate.py b/lumigator/python/mzai/jobs/evaluator/evaluator/configs/jobs/hf_evaluate.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/configs/jobs/hf_evaluate.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/configs/jobs/hf_evaluate.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/configs/jobs/lm_harness.py b/lumigator/python/mzai/jobs/evaluator/evaluator/configs/jobs/lm_harness.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/configs/jobs/lm_harness.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/configs/jobs/lm_harness.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/configs/vllm.py b/lumigator/python/mzai/jobs/evaluator/evaluator/configs/vllm.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/configs/vllm.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/configs/vllm.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/configs/wandb.py b/lumigator/python/mzai/jobs/evaluator/evaluator/configs/wandb.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/configs/wandb.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/configs/wandb.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/constants.py b/lumigator/python/mzai/jobs/evaluator/evaluator/constants.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/constants.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/constants.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/entrypoint.py b/lumigator/python/mzai/jobs/evaluator/evaluator/entrypoint.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/entrypoint.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/entrypoint.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/jobs/__init__.py b/lumigator/python/mzai/jobs/evaluator/evaluator/jobs/__init__.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/jobs/__init__.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/jobs/__init__.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/jobs/asset_loader.py b/lumigator/python/mzai/jobs/evaluator/evaluator/jobs/asset_loader.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/jobs/asset_loader.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/jobs/asset_loader.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/jobs/common.py b/lumigator/python/mzai/jobs/evaluator/evaluator/jobs/common.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/jobs/common.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/jobs/common.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/jobs/evaluation/__init__.py b/lumigator/python/mzai/jobs/evaluator/evaluator/jobs/evaluation/__init__.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/jobs/evaluation/__init__.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/jobs/evaluation/__init__.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/jobs/evaluation/conversation.py b/lumigator/python/mzai/jobs/evaluator/evaluator/jobs/evaluation/conversation.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/jobs/evaluation/conversation.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/jobs/evaluation/conversation.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/jobs/evaluation/hf_evaluate.py b/lumigator/python/mzai/jobs/evaluator/evaluator/jobs/evaluation/hf_evaluate.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/jobs/evaluation/hf_evaluate.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/jobs/evaluation/hf_evaluate.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/jobs/evaluation/lm_harness.py b/lumigator/python/mzai/jobs/evaluator/evaluator/jobs/evaluation/lm_harness.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/jobs/evaluation/lm_harness.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/jobs/evaluation/lm_harness.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/jobs/evaluation/metrics.py b/lumigator/python/mzai/jobs/evaluator/evaluator/jobs/evaluation/metrics.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/jobs/evaluation/metrics.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/jobs/evaluation/metrics.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/jobs/model_clients.py b/lumigator/python/mzai/jobs/evaluator/evaluator/jobs/model_clients.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/jobs/model_clients.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/jobs/model_clients.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/jobs/utils.py b/lumigator/python/mzai/jobs/evaluator/evaluator/jobs/utils.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/jobs/utils.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/jobs/utils.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/paths.py b/lumigator/python/mzai/jobs/evaluator/evaluator/paths.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/paths.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/paths.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/preprocessing.py b/lumigator/python/mzai/jobs/evaluator/evaluator/preprocessing.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/preprocessing.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/preprocessing.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/__init__.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/__init__.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/__init__.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/__init__.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/conftest.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/conftest.py
similarity index 96%
rename from lumigator/python/mzai/evaluator/evaluator/tests/conftest.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/conftest.py
index 58976dd9e..e9a25be49 100644
--- a/lumigator/python/mzai/evaluator/evaluator/tests/conftest.py
+++ b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/conftest.py
@@ -12,7 +12,7 @@
 
 @pytest.fixture(scope="session")
 def examples_dir():
-    return Path(__file__).parents[1] / "examples"
+    return Path(__file__).parents[2] / "examples"
 
 
 @pytest.fixture(scope="session")
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/integration/README.md b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/integration/README.md
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/integration/README.md
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/integration/README.md
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/integration/__init__.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/integration/__init__.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/integration/__init__.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/integration/__init__.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/integration/conftest.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/integration/conftest.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/integration/conftest.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/integration/conftest.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/integration/test_integration_setup.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/integration/test_integration_setup.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/integration/test_integration_setup.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/integration/test_integration_setup.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/integration/test_lm_harness.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/integration/test_lm_harness.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/integration/test_lm_harness.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/integration/test_lm_harness.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/README.md b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/README.md
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/README.md
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/README.md
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/__init__.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/__init__.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/__init__.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/__init__.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/__init__.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/__init__.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/__init__.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/__init__.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/tiny_shakespeare/__init__.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/tiny_shakespeare/__init__.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/tiny_shakespeare/__init__.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/tiny_shakespeare/__init__.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/tiny_shakespeare/create_tiny_shakespeare.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/tiny_shakespeare/create_tiny_shakespeare.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/tiny_shakespeare/create_tiny_shakespeare.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/tiny_shakespeare/create_tiny_shakespeare.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/tiny_shakespeare/data-00000-of-00001.arrow b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/tiny_shakespeare/data-00000-of-00001.arrow
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/tiny_shakespeare/data-00000-of-00001.arrow
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/tiny_shakespeare/data-00000-of-00001.arrow
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/tiny_shakespeare/dataset_info.json b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/tiny_shakespeare/dataset_info.json
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/tiny_shakespeare/dataset_info.json
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/tiny_shakespeare/dataset_info.json
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/tiny_shakespeare/state.json b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/tiny_shakespeare/state.json
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/tiny_shakespeare/state.json
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/tiny_shakespeare/state.json
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/xyz/__init__.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/xyz/__init__.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/xyz/__init__.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/xyz/__init__.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/xyz/create_xyz.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/xyz/create_xyz.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/xyz/create_xyz.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/xyz/create_xyz.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/xyz/data-00000-of-00001.arrow b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/xyz/data-00000-of-00001.arrow
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/xyz/data-00000-of-00001.arrow
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/xyz/data-00000-of-00001.arrow
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/xyz/dataset_info.json b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/xyz/dataset_info.json
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/xyz/dataset_info.json
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/xyz/dataset_info.json
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/xyz/state.json b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/xyz/state.json
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/datasets/xyz/state.json
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/datasets/xyz/state.json
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/models/__init__.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/models/__init__.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/models/__init__.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/models/__init__.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/models/tiny_gpt2/__init__.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/models/tiny_gpt2/__init__.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/models/tiny_gpt2/__init__.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/models/tiny_gpt2/__init__.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/models/tiny_gpt2/config.json b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/models/tiny_gpt2/config.json
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/models/tiny_gpt2/config.json
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/models/tiny_gpt2/config.json
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/models/tiny_gpt2/create_tiny_gpt2.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/models/tiny_gpt2/create_tiny_gpt2.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/models/tiny_gpt2/create_tiny_gpt2.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/models/tiny_gpt2/create_tiny_gpt2.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/models/tiny_gpt2/merges.txt b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/models/tiny_gpt2/merges.txt
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/models/tiny_gpt2/merges.txt
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/models/tiny_gpt2/merges.txt
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/models/tiny_gpt2/model.safetensors b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/models/tiny_gpt2/model.safetensors
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/models/tiny_gpt2/model.safetensors
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/models/tiny_gpt2/model.safetensors
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/models/tiny_gpt2/special_tokens_map.json b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/models/tiny_gpt2/special_tokens_map.json
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/models/tiny_gpt2/special_tokens_map.json
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/models/tiny_gpt2/special_tokens_map.json
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/models/tiny_gpt2/tokenizer_config.json b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/models/tiny_gpt2/tokenizer_config.json
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/models/tiny_gpt2/tokenizer_config.json
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/models/tiny_gpt2/tokenizer_config.json
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/resources/models/tiny_gpt2/vocab.json b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/models/tiny_gpt2/vocab.json
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/resources/models/tiny_gpt2/vocab.json
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/resources/models/tiny_gpt2/vocab.json
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/test_utils.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/test_utils.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/test_utils.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/test_utils.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/unit/__init__.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/__init__.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/unit/__init__.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/__init__.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/unit/configs/__init__.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/configs/__init__.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/unit/configs/__init__.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/configs/__init__.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/unit/configs/jobs/__init__.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/configs/jobs/__init__.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/unit/configs/jobs/__init__.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/configs/jobs/__init__.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/unit/configs/jobs/conftest.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/configs/jobs/conftest.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/unit/configs/jobs/conftest.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/configs/jobs/conftest.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/unit/configs/jobs/test_lm_harness_config.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/configs/jobs/test_lm_harness_config.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/unit/configs/jobs/test_lm_harness_config.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/configs/jobs/test_lm_harness_config.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/unit/configs/test_adapter_config.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/configs/test_adapter_config.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/unit/configs/test_adapter_config.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/configs/test_adapter_config.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/unit/configs/test_common.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/configs/test_common.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/unit/configs/test_common.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/configs/test_common.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/unit/configs/test_dataset_config.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/configs/test_dataset_config.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/unit/configs/test_dataset_config.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/configs/test_dataset_config.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/unit/configs/test_quantization_config.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/configs/test_quantization_config.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/unit/configs/test_quantization_config.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/configs/test_quantization_config.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/unit/configs/test_run_config.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/configs/test_run_config.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/unit/configs/test_run_config.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/configs/test_run_config.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/unit/jobs/__init__.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/jobs/__init__.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/unit/jobs/__init__.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/jobs/__init__.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/unit/jobs/test_asset_loader.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/jobs/test_asset_loader.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/unit/jobs/test_asset_loader.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/jobs/test_asset_loader.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/unit/test_paths.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/test_paths.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/unit/test_paths.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/test_paths.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tests/unit/test_preprocessing.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/test_preprocessing.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tests/unit/test_preprocessing.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tests/unit/test_preprocessing.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tracking/__init__.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tracking/__init__.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tracking/__init__.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tracking/__init__.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tracking/artifact_utils.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tracking/artifact_utils.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tracking/artifact_utils.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tracking/artifact_utils.py
diff --git a/lumigator/python/mzai/evaluator/evaluator/tracking/run_utils.py b/lumigator/python/mzai/jobs/evaluator/evaluator/tracking/run_utils.py
similarity index 100%
rename from lumigator/python/mzai/evaluator/evaluator/tracking/run_utils.py
rename to lumigator/python/mzai/jobs/evaluator/evaluator/tracking/run_utils.py
diff --git a/lumigator/python/mzai/evaluator/examples/configs/evaluation/hf_evaluate_config.yaml b/lumigator/python/mzai/jobs/evaluator/examples/configs/evaluation/hf_evaluate_config.yaml
similarity index 100%
rename from lumigator/python/mzai/evaluator/examples/configs/evaluation/hf_evaluate_config.yaml
rename to lumigator/python/mzai/jobs/evaluator/examples/configs/evaluation/hf_evaluate_config.yaml
diff --git a/lumigator/python/mzai/evaluator/examples/configs/evaluation/hf_evaluate_inference_server_config.yaml b/lumigator/python/mzai/jobs/evaluator/examples/configs/evaluation/hf_evaluate_inference_server_config.yaml
similarity index 100%
rename from lumigator/python/mzai/evaluator/examples/configs/evaluation/hf_evaluate_inference_server_config.yaml
rename to lumigator/python/mzai/jobs/evaluator/examples/configs/evaluation/hf_evaluate_inference_server_config.yaml
diff --git a/lumigator/python/mzai/evaluator/examples/configs/evaluation/hf_evaluate_openai_config.yaml b/lumigator/python/mzai/jobs/evaluator/examples/configs/evaluation/hf_evaluate_openai_config.yaml
similarity index 100%
rename from lumigator/python/mzai/evaluator/examples/configs/evaluation/hf_evaluate_openai_config.yaml
rename to lumigator/python/mzai/jobs/evaluator/examples/configs/evaluation/hf_evaluate_openai_config.yaml
diff --git a/lumigator/python/mzai/evaluator/examples/configs/evaluation/lm_harness_hf_config.yaml b/lumigator/python/mzai/jobs/evaluator/examples/configs/evaluation/lm_harness_hf_config.yaml
similarity index 100%
rename from lumigator/python/mzai/evaluator/examples/configs/evaluation/lm_harness_hf_config.yaml
rename to lumigator/python/mzai/jobs/evaluator/examples/configs/evaluation/lm_harness_hf_config.yaml
diff --git a/lumigator/python/mzai/evaluator/examples/configs/evaluation/lm_harness_inference_server_config.yaml b/lumigator/python/mzai/jobs/evaluator/examples/configs/evaluation/lm_harness_inference_server_config.yaml
similarity index 100%
rename from lumigator/python/mzai/evaluator/examples/configs/evaluation/lm_harness_inference_server_config.yaml
rename to lumigator/python/mzai/jobs/evaluator/examples/configs/evaluation/lm_harness_inference_server_config.yaml
diff --git a/lumigator/python/mzai/evaluator/requirements.txt b/lumigator/python/mzai/jobs/evaluator/requirements.txt
similarity index 100%
rename from lumigator/python/mzai/evaluator/requirements.txt
rename to lumigator/python/mzai/jobs/evaluator/requirements.txt
diff --git a/lumigator/python/mzai/jobs/inference/README.md b/lumigator/python/mzai/jobs/inference/README.md
new file mode 100644
index 000000000..7ca4956d4
--- /dev/null
+++ b/lumigator/python/mzai/jobs/inference/README.md
@@ -0,0 +1 @@
+# Inference Documentation
diff --git a/lumigator/python/mzai/jobs/inference/inference.py b/lumigator/python/mzai/jobs/inference/inference.py
new file mode 100644
index 000000000..52974ba36
--- /dev/null
+++ b/lumigator/python/mzai/jobs/inference/inference.py
@@ -0,0 +1,127 @@
+"""python job to run batch inference"""
+
+import argparse
+import json
+from collections.abc import Iterable
+from pathlib import Path
+
+import s3fs
+from box import Box
+from datasets import load_from_disk
+from loguru import logger
+from model_clients import (
+    BaseModelClient,
+    MistralModelClient,
+    OpenAIModelClient,
+)
+from tqdm import tqdm
+
+
+def predict(dataset_iterable: Iterable, model_client: BaseModelClient) -> list:
+    predictions = []
+
+    for sample_txt in dataset_iterable:
+        predictions.append(model_client.predict(sample_txt))
+
+    return predictions
+
+
+def save_to_disk(local_path: Path, data_dict: dict):
+    logger.info(f"Storing into {local_path}...")
+    local_path.parent.mkdir(exist_ok=True, parents=True)
+    with local_path.open("w") as f:
+        json.dump(data_dict, f)
+
+
+def save_to_s3(config: Box, local_path: Path, storage_path: str):
+    s3 = s3fs.S3FileSystem()
+    if storage_path.endswith("/"):
+        storage_path = "s3://" + str(
+            Path(storage_path[5:]) / config.name / "inference_results.json"
+        )
+    logger.info(f"Storing into {storage_path}...")
+    s3.put_file(local_path, storage_path)
+
+
+def save_outputs(config: Box, inference_results: dict) -> Path:
+    storage_path = config.evaluation.storage_path
+
+    # generate local temp file ANYWAY:
+    # - if storage_path is not provided, it will be stored and kept into a default dir
+    # - if storage_path is provided AND saving to S3 is successful, local file is deleted
+    local_path = Path(
+        Path.home() / ".lumigator" / "results" / config.name / "inference_results.json"
+    )
+
+    try:
+        save_to_disk(local_path, inference_results)
+
+        # copy to s3 and return path
+        if storage_path is not None and storage_path.startswith("s3://"):
+            save_to_s3(config, local_path, storage_path)
+            Path.unlink(local_path)
+            Path.rmdir(local_path.parent)
+            return storage_path
+        else:
+            return local_path
+
+    except Exception as e:
+        logger.error(e)
+
+
+def run_inference(config: Box) -> Path:
+    # initialize output dictionary
+    output = {}
+
+    # Load dataset given its URI
+    dataset = load_from_disk(config.dataset.path)
+
+    # Limit dataset length if max_samples is specified
+    max_samples = config.evaluation.max_samples
+    if max_samples is not None and max_samples > 0:
+        if max_samples > len(dataset):
+            logger.info(f"max_samples ({max_samples}) resized to dataset size ({len(dataset)})")
+            max_samples = len(dataset)
+        dataset = dataset.select(range(max_samples))
+
+    # Enable / disable tqdm
+    input_samples = dataset["examples"]
+    dataset_iterable = tqdm(input_samples) if config.evaluation.enable_tqdm else input_samples
+
+    # Choose which model client to use
+    if config.model.inference is not None:
+        # a model *inference service* is passed
+        base_url = config.model.inference.base_url
+        output_model_name = config.model.inference.engine
+        if "mistral" in base_url:
+            # run the mistral client
+            logger.info(f"Using Mistral client. Endpoint: {base_url}")
+            model_client = MistralModelClient(base_url, config.model)
+        else:
+            # run the openai client
+            logger.info(f"Using OAI client. Endpoint: {base_url}")
+            model_client = OpenAIModelClient(base_url, config.model)
+
+    # run inference
+    output["predictions"] = predict(dataset_iterable, model_client)
+    output["examples"] = dataset["examples"]
+    output["ground_truth"] = dataset["ground_truth"]
+    output["model"] = output_model_name
+
+    output_path = save_outputs(config, output)
+    return output_path
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config", type=str, help="Configuration in JSON format")
+    args = parser.parse_args()
+
+    if not args.config:
+        parser.print_help()  # Print the usage message and exit
+        err_str = "No input configuration provided. Please pass one using the --config flag"
+        logger.error(err_str)
+    else:
+        config = json.loads(args.config)
+        result_dataset_path = run_inference(Box(config, default_box=True, default_box_attr=None))
+        logger.info(f"Inference results stored at {result_dataset_path}")
diff --git a/lumigator/python/mzai/jobs/inference/model_clients.py b/lumigator/python/mzai/jobs/inference/model_clients.py
new file mode 100644
index 000000000..a41378207
--- /dev/null
+++ b/lumigator/python/mzai/jobs/inference/model_clients.py
@@ -0,0 +1,143 @@
+import os
+import re
+from abc import abstractmethod
+
+from box import Box
+from loguru import logger
+from mistralai.client import MistralClient
+from openai import OpenAI, OpenAIError
+from openai.types import Completion
+
+
+def strip_path_prefix(path: str) -> str:
+    """Strip the 'scheme://' prefix from the start of a string."""
+    pattern = "^\w+\:\/\/"
+    return re.sub(pattern, "", path)
+
+
+class BaseModelClient:
+    """Abstract class for a model client, used to provide a uniform interface
+    (currentnly just a simple predict method) to models served in different
+    ways (e.g. HF models loaded locally, OpenAI endpoints, vLLM inference
+    servers, llamafile).
+    """
+
+    @abstractmethod
+    def __init__(self, model: str, config: Box):
+        """Used to initialize the model / inference service."""
+        pass
+
+    @abstractmethod
+    def predict(self, prompt: str) -> str:
+        """Given a prompt, return a prediction."""
+        pass
+
+
+class APIModelClient(BaseModelClient):
+    """General model client for APIs."""
+
+    def __init__(self, config: Box):
+        self._config = config
+        self._engine = strip_path_prefix(config.inference.engine)
+        self._system = config.inference.system_prompt
+
+    @abstractmethod
+    def _chat_completion(
+        self,
+        config: Box,
+        client: OpenAI | MistralClient,
+        prompt: str,
+        system: str,
+    ) -> Completion:
+        """Connects to the API and returns a chat completion holding the model's response."""
+        pass
+
+    def _get_response_with_retries(
+        self,
+        config: Box,
+        prompt: str,
+    ) -> tuple[str, str]:
+        current_retry_attempt = 1
+        max_retries = 1 if config.inference.max_retries is None else config.inference.max_retries
+        while current_retry_attempt <= max_retries:
+            try:
+                response = self._chat_completion(self._config, self._client, prompt, self._system)
+                break
+            except OpenAIError as e:
+                logger.warning(f"{e.message}: Retrying ({current_retry_attempt}/{max_retries})")
+                current_retry_attempt += 1
+                if current_retry_attempt > max_retries:
+                    raise e
+        return response
+
+    def predict(self, prompt):
+        response = self._get_response_with_retries(self._config, prompt)
+
+        return response.choices[0].message.content
+
+
+class OpenAIModelClient(APIModelClient):
+    """Model client for models served via openai-compatible API.
+    For OpenAI models:
+    - The base_url is fixed
+    - Choose an engine name (see https://platform.openai.com/docs/models)
+    - Customize the system prompt if needed
+
+    For compatible models:
+    - Works with local/remote vLLM-served models and llamafiles
+    - Provide base_url and engine
+    - Customize the system prompt if needed
+    """
+
+    def __init__(self, base_url: str, config: Box):
+        super().__init__(config)
+        self._client = OpenAI(base_url=base_url)
+
+    def _chat_completion(
+        self,
+        config: Box,
+        client: OpenAI,
+        prompt: str,
+        system: str = "You are a helpful assisant.",
+    ) -> Completion:
+        """Connects to a remote OpenAI-API-compatible endpoint
+        and returns a chat completion holding the model's response.
+        """
+        return client.chat.completions.create(
+            model=self._engine,
+            messages=[{"role": "system", "content": system}, {"role": "user", "content": prompt}],
+            max_tokens=config.max_tokens,
+            frequency_penalty=config.frequency_penalty,
+            temperature=config.temperature,
+            top_p=config.top_p,
+        )
+
+
+class MistralModelClient(APIModelClient):
+    """Model client for models served via Mistral API.
+    - The base_url is fixed
+    - Choose an engine name (see https://docs.mistral.ai/getting-started/models/)
+    - Customize the system prompt if needed
+    """
+
+    def __init__(self, base_url: str, config: Box):
+        super().__init__(config)
+        self._client = MistralClient(api_key=os.environ["MISTRAL_API_KEY"])
+
+    def _chat_completion(
+        self,
+        config: Box,
+        client: MistralClient,
+        prompt: str,
+        system: str = "You are a helpful assisant.",
+    ) -> Completion:
+        """Connects to a Mistral endpoint
+        and returns a chat completion holding the model's response.
+        """
+        return client.chat(
+            model=self._engine,
+            messages=[{"role": "system", "content": system}, {"role": "user", "content": prompt}],
+            max_tokens=config.max_tokens,
+            temperature=config.temperature,
+            top_p=config.top_p,
+        )
diff --git a/lumigator/python/mzai/jobs/inference/requirements.txt b/lumigator/python/mzai/jobs/inference/requirements.txt
new file mode 100644
index 000000000..e20397092
--- /dev/null
+++ b/lumigator/python/mzai/jobs/inference/requirements.txt
@@ -0,0 +1,6 @@
+loguru==0.7.2
+s3fs==2024.5.0
+datasets==2.20.0
+python-box==7.2.0
+mistralai==0.4.2
+openai==1.52.0