From 5c2fc96f7716eec5334f29bce6e62d277881cdbc Mon Sep 17 00:00:00 2001 From: Nick Frasser <1693461+nfrasser@users.noreply.github.com> Date: Mon, 13 Jan 2025 14:41:41 -0500 Subject: [PATCH] refactor(spm): initial refactor of internals to work with new API (#114) Interface remains largely the same, with some deprecations and changes to return values for asset endpoints * update unit tests to work with new api client: Mocks client functions rather than API requests. This is not the greatest, but mocking the HTTP requests would have been way more complicated * stream, dataset and type fixes * python 3.8 correct import and type issues * additional async dump stream helpers * update api and model types * initial changelog * improved stream protocol names * docstring updates and improvements * clear_cached_property for convenience --- .pre-commit-config.yaml | 2 +- CHANGELOG.md | 32 + cryosparc/api.py | 22 +- cryosparc/api.pyi | 1650 ++++++++++++++++++++++++------ cryosparc/command.py | 271 ----- cryosparc/constants.py | 9 + cryosparc/controller.py | 82 ++ cryosparc/dataset.py | 21 +- cryosparc/dtype.py | 34 +- cryosparc/errors.py | 69 +- cryosparc/job.py | 670 ++++++------ cryosparc/model_registry.py | 6 +- cryosparc/models/api_response.py | 9 +- cryosparc/models/asset.py | 34 + cryosparc/models/event.py | 2 +- cryosparc/models/exposure.py | 112 +- cryosparc/models/external.py | 17 + cryosparc/models/job.py | 4 +- cryosparc/models/job_spec.py | 111 +- cryosparc/models/mongo.py | 13 - cryosparc/models/project.py | 3 +- cryosparc/models/service.py | 12 +- cryosparc/models/session.py | 2 +- cryosparc/models/workspace.py | 3 +- cryosparc/project.py | 125 +-- cryosparc/registry.py | 6 +- cryosparc/spec.py | 976 +++--------------- cryosparc/stream.py | 182 +++- cryosparc/stream_registry.py | 2 + cryosparc/tools.py | 632 ++++++------ cryosparc/util.py | 31 +- cryosparc/workspace.py | 101 +- pyproject.toml | 1 - tests/conftest.py | 421 ++++---- tests/test_api.py | 5 + tests/test_command.py | 5 - tests/test_job.py | 344 +++---- tests/test_project.py | 2 + tests/test_tools.py | 82 +- 39 files changed, 3102 insertions(+), 3003 deletions(-) delete mode 100644 cryosparc/command.py create mode 100644 cryosparc/constants.py create mode 100644 cryosparc/controller.py create mode 100644 cryosparc/models/asset.py create mode 100644 cryosparc/models/external.py delete mode 100644 cryosparc/models/mongo.py create mode 100644 tests/test_api.py delete mode 100644 tests/test_command.py create mode 100644 tests/test_project.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d1ffba0a..12b7c650 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,4 +28,4 @@ repos: hooks: - id: pyright additional_dependencies: - [cython, httpretty, httpx, numpy, pydantic, pytest, setuptools] + [cython, httpx, numpy, pydantic, pytest, setuptools] diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e5e4f66..76293caf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,37 @@ # Changelog +## Next + +- BREAKING: replaced low-level `CryoSPARC.cli`, `CryoSPARC.rtp` and `CryoSPARC.vis` attributes with single unified `CryoSPARC.api` +- BREAKING: `CryoSPARC.download_asset(fileid, target)` no longer accepts a directory target. Must specify a filename. +- BREAKING: removed `CryoSPARC.get_job_specs()`. Use `CryoSPARC.job_register` instead +- BREAKING: `CryoSPARC.list_assets()` and `Job.list_assets()` return list of models instead of list of dictionaries, accessible with dot-notation + - OLD: `job.list_assets()[0]['filename']` + - NEW: `job.list_assets()[0].filename` +- BREAKING: `CryoSPARC.get_lanes()` now returns a list of models instead of dictionaries + - OLD: `cs.get_lanes()[0]['name']` + - NEW: `cs.get_lanes()[0].name` +- BREAKING: `CryoSPARC.get_targets` now returns a list of models instead of dictionaries + - OLD: `cs.get_targets()[0]['hostname']` + - NEW: `cs.get_targets()[0].hostname` + - Some top-level target attributes have also been moved into the `.config` attribute +- BREAKING: Restructured schema for Job models, many `Job.doc` properties have been internally rearranged +- Added: `CryoSPARC.job_register` property +- Added: `job.load_input()` and `job.load_output()` now accept `"default"`, `"passthrough"` and `"all"` keywords for their `slots` argument +- Added: `job.alloc_output()` now accepts `dtype_params` argument for fields with dynamic shapes +- Updated: Improved type definitions +- Deprecated: When adding external inputs and outputs, expanded slot definitions now expect `"name"` key instead of `"prefix"`, support for which will be removed in a future release. + - OLD: `job.add_input("particle", slots=[{"prefix": "component_mode_1", "dtype": "component", "required": True}])` + - NEW: `job.add_input("particle", slots=[{"name": "component_mode_1", "dtype": "component", "required": True}])` +- Deprecated: `license` argument no longer required when creating a `CryoSPARC` + instance, will be removed in a future release +- Deprecated: `external_job.stop()` now expects optional error string instead of boolean, support for boolean errors will be removed in a future release +- Deprecated: `CryoSPARC.get_job_sections()` will be removed in a future release, + use `CryoSPARC.job_register` instead +- Deprecated: Most functions no longer require a `refresh` argument, including + `job.set_param()`, `job.connect()`, `job.disconnect()` and `external_job.save_output()` +- Deprecated: Attributes `Project.doc`, `Workspace.doc` and `Job.doc` will be removed in a future release, use `.model` attribute instead + ## v4.6.1 - Added: Python 3.13 support diff --git a/cryosparc/api.py b/cryosparc/api.py index 2516d5bc..4cf8cb3c 100644 --- a/cryosparc/api.py +++ b/cryosparc/api.py @@ -4,7 +4,7 @@ import warnings from contextlib import contextmanager from enum import Enum -from typing import Any, Dict, Iterator, Optional, Tuple, TypedDict, Union +from typing import Any, Dict, Iterator, List, Optional, Tuple, TypedDict, Union import httpx @@ -16,7 +16,7 @@ _BASE_RESPONSE_TYPES = {"string", "integer", "number", "boolean"} -Auth = Union[str, tuple[str, str]] +Auth = Union[str, Tuple[str, str]] """ Auth token or email/password. """ @@ -101,7 +101,7 @@ def _construct_request(self, _path: str, _schema, *args, **kwargs) -> Tuple[str, else: streamable = None - if streamable: + if streamable is not None: if not isinstance(streamable, Streamable): raise TypeError(f"[API] {func_name}() invalid argument {streamable}; expected Streamable type") request_body = self._prepare_request_stream(streamable) @@ -155,7 +155,10 @@ def _handle_response(self, schema, res: httpx.Response): if stream_mime_type is not None: # This is a streaming type stream_class = registry.get_stream_class(stream_mime_type) assert stream_class - return stream_class.from_iterator(res.iter_bytes()) + return stream_class.from_iterator( + res.iter_bytes(), + media_type=res.headers.get("Content-Type", stream_mime_type), + ) elif "text/plain" in content_schema: return res.text elif "application/json" in content_schema: @@ -222,7 +225,7 @@ def _call(self, _method: str, _path: str, _schema, *args, **kwargs): with ctx as res: return self._handle_response(_schema, res) except httpx.HTTPStatusError as err: - raise APIError("received error response", res=err.response) + raise APIError("received error response", res=err.response) from err class APIClient(APINamespace): @@ -235,7 +238,7 @@ def __init__( base_url: Optional[str] = None, *, auth: Optional[Auth] = None, # token or email/password - headers: Dict[str, str] | None = None, + headers: Optional[Dict[str, str]] = None, timeout: float = 300, http_client: Optional[httpx.Client] = None, ): @@ -330,7 +333,7 @@ def _authorize(self, auth: Auth): self._client.headers["Authorization"] = f"{token.token_type.title()} {token.access_token}" -def sort_params_schema(path: str, param_schema: list[dict]): +def sort_params_schema(path: str, param_schema: List[dict]): """ Sort the OpenAPI endpoint parameters schema in order that path params appear in the given URI. @@ -399,9 +402,10 @@ def _decode_json_response(value: Any, schema: dict): if "type" in schema and schema["type"] in _BASE_RESPONSE_TYPES: return value - # Recursively decode list + # Recursively decode list or tuple if "type" in schema and schema["type"] == "array": - return [_decode_json_response(item, schema["items"]) for item in value] + collection_type, items_key = (tuple, "prefixItems") if "prefixItems" in schema else (list, "items") + return collection_type(_decode_json_response(item, schema[items_key]) for item in value) # Recursively decode object if "type" in schema and schema["type"] == "object": diff --git a/cryosparc/api.pyi b/cryosparc/api.pyi index 58e42369..61ed1bdc 100644 --- a/cryosparc/api.pyi +++ b/cryosparc/api.pyi @@ -15,13 +15,15 @@ from .models.api_response import ( WorkspaceAncestorUidsResponse, WorkspaceDescendantUidsResponse, ) +from .models.asset import GridFSAsset, GridFSFile from .models.auth import Token from .models.diagnostics import RuntimeDiagnostics from .models.event import CheckpointEvent, Event, ImageEvent, InteractiveEvent, TextEvent from .models.exposure import Exposure +from .models.external import ExternalOutputSpec from .models.job import Job, JobStatus from .models.job_register import JobRegister -from .models.job_spec import Category, OutputResult +from .models.job_spec import Category, InputSpec, InputSpecs, OutputResult, OutputSpec, OutputSpecs from .models.license import LicenseInstance, UpdateTag from .models.notification import Notification from .models.project import GenerateIntermediateResultsSettings, Project, ProjectSymlink @@ -34,6 +36,7 @@ from .models.session_params import LiveAbinitParams, LiveClass2DParams, LivePrep from .models.tag import Tag from .models.user import User from .models.workspace import Workspace +from .stream import Stream Auth = Union[str, Tuple[str, str]] """ @@ -47,7 +50,11 @@ class ConfigNamespace(APINamespace): """ Methods available in api.config, e.g., api.config.get_instance_uid(...) """ - def get_instance_uid(self) -> str: ... + def get_instance_uid(self) -> str: + """ + Gets this CryoSPARC instance's unique UID. + """ + ... def generate_new_instance_uid(self, *, force_takeover_projects: bool = False) -> str: """ Generates a new uid for the CryoSPARC instance @@ -55,10 +62,14 @@ class ConfigNamespace(APINamespace): otherwise if force_takeover_projects is False, only creates lockfile in projects that don't already have one """ ... - def set_default_job_priority(self, value: int) -> Any: ... + def set_default_job_priority(self, value: int) -> Any: + """ + Job priority + """ + ... def get_version(self) -> str: """ - Get the current CryoSPARC version (with patch suffix, if available) + Gets the current CryoSPARC version (with patch suffix, if available) """ ... def get_system_info(self) -> dict: @@ -68,14 +79,13 @@ class ConfigNamespace(APINamespace): ... def get(self, name: str, /, *, default: Any = "<>") -> Any: """ - Get config collection entry value for the given variable name. + Gets config collection entry value for the given variable name. """ ... def write(self, name: str, /, value: Any = ..., *, set_on_insert_only: bool = False) -> Any: """ - Set config collection entry. Specify `set_on_insert_only` to prevent - overwriting when the value already exists. Returns the value in the - database. + Sets config collection entry. Specify `set_on_insert_only` to prevent + overwriting when the value already exists. """ ... @@ -83,14 +93,26 @@ class InstanceNamespace(APINamespace): """ Methods available in api.instance, e.g., api.instance.get_update_tag(...) """ - def get_update_tag(self) -> UpdateTag | None: ... - def live_enabled(self) -> bool: ... - def ecl_enabled(self) -> bool: ... + def get_update_tag(self) -> UpdateTag | None: + """ + Gets information about updating to the next CryoSPARC version, if one is available. + """ + ... + def live_enabled(self) -> bool: + """ + Checks if CryoSPARC Live is enabled + """ + ... + def ecl_enabled(self) -> bool: + """ + Checks if embedded CryoSPARC Live is enabled + """ + ... def link_log( self, type: str, /, - data: Any = {}, + data: Any = ..., *, user_id: Optional[str] = ..., project_uid: Optional[str] = ..., @@ -100,9 +122,7 @@ class InstanceNamespace(APINamespace): def get_license_usage(self) -> List[LicenseInstance]: ... def browse_files(self, *, abs_path_glob: str) -> BrowseFileResponse: """ - Backend for the file browser in the cryosparc UI. Returns a list of files - for the UI to display. - + Backend for the file browser in the cryosparc UI. .. note:: abs_path_glob could have shell vars in it (i.e. $HOME, $SCRATCH) 0. expand vars @@ -124,12 +144,12 @@ class InstanceNamespace(APINamespace): max_lines: Optional[int] = ..., ) -> str: """ - Get cryosparc service logs, filterable by date, name, function, and level + Gets cryosparc service logs, filterable by date, name, function, and level """ ... def get_runtime_diagnostics(self) -> RuntimeDiagnostics: """ - Get runtime diagnostics for the CryoSPARC instance + Gets runtime diagnostics for the CryoSPARC instance """ ... @@ -142,9 +162,9 @@ class CacheNamespace(APINamespace): Returns None if the value is not set or expired """ ... - def set(self, key: str, /, value: Any = None, *, namespace: Optional[str] = ..., ttl: int = 60) -> None: + def set(self, key: str, /, value: Any = ..., *, namespace: Optional[str] = ..., ttl: int = 60) -> None: """ - Set the given key to the given value, with a ttl (Time-to-Live) in seconds + Sets key to the given value, with a ttl (Time-to-Live) in seconds """ ... @@ -164,8 +184,16 @@ class UsersNamespace(APINamespace): Show a table of all CryoSPARC user accounts """ ... - def me(self) -> User: ... - def find_one(self, user_id: str, /) -> User: ... + def me(self) -> User: + """ + Returns the current user + """ + ... + def find_one(self, user_id: str, /) -> User: + """ + Finds a user with a matching user ID or email + """ + ... def update( self, user_id: str, @@ -183,7 +211,7 @@ class UsersNamespace(APINamespace): ... def delete(self, user_id: str, /) -> None: """ - Remove a user from the CryoSPARC. Only authenticated admins may do this. + Removes a user from the CryoSPARC. Only authenticated admins may do this. """ ... def get_role(self, user_id: str, /) -> Literal["user", "admin"]: @@ -202,7 +230,7 @@ class UsersNamespace(APINamespace): role: Literal["user", "admin"] = "user", ) -> User: """ - Create a new CryoSPARC user account. Specify ``created_by_user_id`` as the + Creates a new CryoSPARC user account. Specify ``created_by_user_id`` as the id of user who is creating the new user. The password is expected as a SHA256 hash. @@ -210,30 +238,30 @@ class UsersNamespace(APINamespace): ... def request_reset_password(self, user_id: str, /) -> None: """ - Generate a password reset token for a user with the given email. The token + Generates a password reset token for a user with the given email. The token will appear in the Admin > User Management interface. """ ... def register(self, user_id: str, /, body: SHA256Password, *, token: str) -> None: """ - Register user with a token (unauthenticated). + Registers user with a token (unauthenticated). """ ... def reset_password(self, user_id: str, /, body: SHA256Password, *, token: str) -> None: """ - Reset password function with a token (unauthenticated). password is expected + Resets password function with a token (unauthenticated). password is expected as a sha256 hash. """ ... def set_role(self, user_id: str, /, role: Literal["user", "admin"]) -> User: """ - Change a user's from between "user" and "admin". Only admins may do this. + Changes a user's from between "user" and "admin". Only admins may do this. This revokes all access tokens for the given used ID. """ ... def get_my_state_var(self, key: str, /) -> Any: """ - Retrieve a user's state variable such as "licenseAccepted" or + Retrieves a user's state variable such as "licenseAccepted" or "recentProjects" """ ... @@ -247,7 +275,7 @@ class UsersNamespace(APINamespace): ... def get_state_var(self, user_id: str, key: str, /) -> Any: """ - Retrieve a given user's state variable such as "licenseAccepted" or + Retrieves a given user's state variable such as "licenseAccepted" or "recentProjects" """ ... @@ -263,62 +291,189 @@ class UsersNamespace(APINamespace): ... def get_lanes(self, user_id: str, /) -> List[str]: """ - Get the lanes a user has access to + Gets the lanes a user has access to + """ + ... + def set_lanes(self, user_id: str, /, lanes: List[str]) -> User: + """ + Restrict lanes the given user ID may to queue to. Only admins and account + owners may access this function. """ ... - def set_lanes(self, user_id: str, /, lanes: List[str]) -> User: ... class ResourcesNamespace(APINamespace): """ Methods available in api.resources, e.g., api.resources.find_lanes(...) """ - def find_lanes(self) -> List[SchedulerLane]: ... - def add_lane(self, body: SchedulerLane) -> SchedulerLane: ... - def find_lane(self, name: str, /, *, type: Literal["node", "cluster", None] = ...) -> SchedulerLane: ... - def remove_lane(self, name: str, /) -> None: ... - def find_targets(self, *, lane: Optional[str] = ...) -> List[SchedulerTarget]: ... - def find_nodes(self, *, lane: Optional[str] = ...) -> List[SchedulerTarget[Node]]: ... + def find_lanes(self) -> List[SchedulerLane]: + """ + Finds lanes that are registered with the master scheduler. + """ + ... + def add_lane(self, body: SchedulerLane) -> SchedulerLane: + """ + Adds a new lane to the master scheduler. + """ + ... + def find_lane(self, name: str, /, *, type: Literal["node", "cluster", None] = ...) -> SchedulerLane: + """ + Finds a lane registered to the master scheduler with a given name and optional type. + """ + ... + def remove_lane(self, name: str, /) -> None: + """ + Removes the specified lane and any targets assigned under the lane in the + master scheduler. + """ + ... + def find_targets(self, *, lane: Optional[str] = ...) -> List[SchedulerTarget]: + """ + Finds a list of targets that are registered with the master scheduler. + """ + ... + def find_nodes(self, *, lane: Optional[str] = ...) -> List[SchedulerTarget[Node]]: + """ + Finds a list of targets with type "node" that are registered with the master scheduler. + These correspond to discrete worker hostname accessible over SSH. + """ + ... def add_node(self, body: SchedulerTarget[Node]) -> SchedulerTarget[Node]: """ - Add a node or update an existing node. Updates existing node if they share + Adds a node or updates an existing node. Updates existing node if they share share the same name. """ ... - def find_clusters(self, *, lane: Optional[str] = ...) -> List[SchedulerTarget[Cluster]]: ... + def find_clusters(self, *, lane: Optional[str] = ...) -> List[SchedulerTarget[Cluster]]: + """ + Finds a list of targets with type "cluster" that are registered with the master scheduler. + These are multi-node clusters managed by workflow managers like SLURM or PBS and are accessible via submission script. + """ + ... def add_cluster(self, body: SchedulerTarget[Cluster]) -> SchedulerTarget[Cluster]: """ - Add a cluster or update an existing cluster. Updates existing cluster if + Adds a cluster or updates an existing cluster. Updates existing cluster if they share share the same name. """ ... - def find_target_by_hostname(self, hostname: str, /) -> SchedulerTarget: ... - def find_target_by_name(self, name: str, /) -> SchedulerTarget: ... - def find_node(self, name: str, /) -> SchedulerTarget[Node]: ... - def remove_node(self, name: str, /) -> None: ... - def find_cluster(self, name: str, /) -> SchedulerTarget[Cluster]: ... - def remove_cluster(self, name: str, /) -> None: ... - def find_cluster_script(self, name: str, /) -> str: ... + def find_target_by_hostname(self, hostname: str, /) -> SchedulerTarget: + """ + Finds a target with a given hostname. + """ + ... + def find_target_by_name(self, name: str, /) -> SchedulerTarget: + """ + Finds a target with a given name. + """ + ... + def find_node(self, name: str, /) -> SchedulerTarget[Node]: + """ + Finds a node with a given name. + """ + ... + def remove_node(self, name: str, /) -> None: + """ + Removes a target worker node from the master scheduler + """ + ... + def find_cluster(self, name: str, /) -> SchedulerTarget[Cluster]: + """ + Finds a cluster with a given name. + """ + ... + def remove_cluster(self, name: str, /) -> None: + """ + Removes the specified cluster/lane and any targets assigned under the lane + in the master scheduler + + Note: This will remove any worker node associated with the specified cluster/lane. + """ + ... + def find_cluster_script(self, name: str, /) -> str: + """ + Finds the cluster script for a cluster with a given name. + """ + ... def find_cluster_template_vars(self, name: str, /) -> List[str]: """ - Compute and retrieve all variable names defined in cluster templates. + Computes and retrieves all variable names defined in cluster templates. """ ... def find_cluster_template_custom_vars(self, name: str, /) -> List[str]: """ - Compute and retrieve all custom variables names defined in cluster templates + Computes and retrieves all custom variables names defined in cluster templates (i.e., all variables not in the internal list of known variable names). """ ... - def update_node_lane(self, name: str, /, lane: str) -> SchedulerTarget[Node]: ... + def update_node_lane(self, name: str, /, lane: str) -> SchedulerTarget[Node]: + """ + Changes the lane on the given target (assumed to exist). Target type must + match lane type. + """ + ... def refresh_nodes(self) -> Any: """ Asynchronously access target worker nodes. Load latest CPU, RAM and GPU info. """ ... - def verify_cluster(self, name: str, /) -> str: ... - def update_cluster_custom_vars(self, name: str, /, value: Dict[str, str]) -> SchedulerTarget[Cluster]: ... - def update_target_cache_path(self, name: str, /, value: Optional[str]) -> SchedulerTarget: ... + def verify_cluster(self, name: str, /) -> str: + """ + Ensures cluster has been properly configured by executing a generic 'info' + command + """ + ... + def update_cluster_custom_vars(self, name: str, /, value: Dict[str, str]) -> SchedulerTarget[Cluster]: + """ + Changes the custom cluster variables on the given target (assumed to exist) + """ + ... + def update_target_cache_path(self, name: str, /, value: Optional[str]) -> SchedulerTarget: + """ + Changes the cache path on the given target (assumed to exist) + """ + ... + +class AssetsNamespace(APINamespace): + """ + Methods available in api.assets, e.g., api.assets.find(...) + """ + def find(self, *, project_uid: Optional[str] = ..., job_uid: Optional[str] = ...) -> List[GridFSFile]: + """ + List assets associated with projects or jobs on the given instance. + Typically returns files creating during job runs, including plots and metadata. + """ + ... + def upload( + self, + project_uid: str, + job_uid: str, + /, + stream: Stream, + *, + filename: Optional[str] = ..., + format: Union[ + Literal["txt", "csv", "html", "json", "xml", "bild", "bld", "log"], + Literal["pdf", "gif", "jpg", "jpeg", "png", "svg"], + None, + ] = ..., + ) -> GridFSAsset: + """ + Upload a new asset associated with the given project/job. When calling + via HTTP, provide the contents of the file in the request body. At least + one of filename or format must be provided. + """ + ... + def download(self, id: str = "000000000000000000000000", /) -> Stream: + """ + Download the asset with the given ID. When calling via HTTP, file contents + will be in the response body. + """ + ... + def find_one(self, id: str = "000000000000000000000000", /) -> GridFSFile: + """ + Retrive the full details for an asset with the given ID. + """ + ... class JobsNamespace(APINamespace): """ @@ -343,8 +498,16 @@ class JobsNamespace(APINamespace): started_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., exported_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., deleted: Optional[bool] = False, - ) -> List[Job]: ... - def delete_many(self, project_job_uids: List[Tuple[str, str]], *, force: bool = False) -> List[Job]: ... + ) -> List[Job]: + """ + Finds all jobs that match the supplied query + """ + ... + def delete_many(self, project_job_uids: List[Tuple[str, str]], *, force: bool = False) -> None: + """ + Deletes the given jobs. Ignores protected jobs if `force` is `True`. + """ + ... def count( self, *, @@ -362,11 +525,21 @@ class JobsNamespace(APINamespace): started_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., exported_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., deleted: Optional[bool] = False, - ) -> int: ... - def get_active_count(self) -> int: ... - def find_in_project( - self, project_uid: str, /, *, sort: str = "created_at", order: Literal[1, -1] = 1 - ) -> List[Job]: ... + ) -> int: + """ + Counts number of jobs that match the supplied query. + """ + ... + def get_active_count(self) -> int: + """ + Counts number of active jobs. + """ + ... + def find_in_project(self, project_uid: str, /, *, sort: str = "created_at", order: Literal[1, -1] = 1) -> List[Job]: + """ + Finds all jobs in project. + """ + ... def clone_many( self, project_uid: str, @@ -375,8 +548,20 @@ class JobsNamespace(APINamespace): *, workspace_uid: Optional[str] = ..., new_workspace_title: Optional[str] = ..., - ) -> List[Job]: ... - def get_chain(self, project_uid: str, /, *, start_job_uid: str, end_job_uid: str) -> List[str]: ... + ) -> List[Job]: + """ + Clones the given list of jobs. If any jobs are related, it will try to + re-create the input connections between the cloned jobs (but maintain the + same connections to jobs that were not cloned) + """ + ... + def get_chain(self, project_uid: str, /, *, start_job_uid: str, end_job_uid: str) -> List[str]: + """ + Finds the chain of jobs between start job to end job. + A job chain is the intersection of the start job's descendants and the end job's + ancestors. + """ + ... def clone_chain( self, project_uid: str, @@ -386,10 +571,18 @@ class JobsNamespace(APINamespace): end_job_uid: str, workspace_uid: Optional[str] = ..., new_workspace_title: Optional[str] = ..., - ) -> List[Job]: ... + ) -> List[Job]: + """ + Clones jobs that directly descend from the start job UID up to the end job UID. + """ + ... def find_in_workspace( self, project_uid: str, workspace_uid: str, /, *, sort: str = "created_at", order: Literal[1, -1] = 1 - ) -> List[Job]: ... + ) -> List[Job]: + """ + Finds all jobs in workspace. + """ + ... def create( self, project_uid: str, @@ -399,47 +592,89 @@ class JobsNamespace(APINamespace): *, type: str, title: str = "", - description: str = "Enter a description.", + description: str = "", created_by_job_uid: Optional[str] = ..., enable_bench: bool = False, - ) -> Job: ... - def get_final_results(self, project_uid: str, /) -> GetFinalResultsResponse: ... - def find_one(self, project_uid: str, job_uid: str, /) -> Job: ... - def delete(self, project_uid: str, job_uid: str, /, *, force: bool = False) -> Job: ... - def get_directory(self, project_uid: str, job_uid: str, /) -> str: ... - def get_log(self, project_uid: str, job_uid: str, /) -> str: ... - def get_status(self, project_uid: str, job_uid: str, /) -> JobStatus: ... + ) -> Job: + """ + Creates a new job with the given type in the project/workspace + + To see all available job types and their parameters, see the `GET projects/{project_uid}:register` endpoint + """ + ... + def get_final_results(self, project_uid: str, /) -> GetFinalResultsResponse: + """ + Gets all final results within a project, along with the ancestors and non-ancestors of those jobs. + """ + ... + def find_one(self, project_uid: str, job_uid: str, /) -> Job: + """ + Finds the job. + """ + ... + def delete(self, project_uid: str, job_uid: str, /, *, force: bool = False) -> None: + """ + Deletes a job. Will kill (if running) and clearing the job before deleting. + """ + ... + def get_directory(self, project_uid: str, job_uid: str, /) -> str: + """ + Gets the job directory for a given job. + """ + ... + def get_log(self, project_uid: str, job_uid: str, /) -> str: + """ + Returns contents of the job.log file. Returns empty string if job.log does not exist. + """ + ... + def get_log_path(self, project_uid: str, job_uid: str, /) -> str: ... + def get_input_specs(self, project_uid: str, job_uid: str, /) -> InputSpecs: ... + def get_input_spec(self, project_uid: str, job_uid: str, input_name: str, /) -> InputSpec: ... + def add_external_input(self, project_uid: str, job_uid: str, input_name: str, /, body: InputSpec) -> Job: + """ + Add or replace an external job's input. + """ + ... + def get_output_specs(self, project_uid: str, job_uid: str, /) -> OutputSpecs: ... + def get_output_fields( + self, project_uid: str, job_uid: str, output_name: str, /, dtype_params: dict = {} + ) -> List[Tuple[str, str]]: + """ + Expected dataset column definitions for given job output, excluding passthroughs. + """ + ... + def get_output_spec(self, project_uid: str, job_uid: str, output_name: str, /) -> OutputSpec: ... + def add_external_output(self, project_uid: str, job_uid: str, output_name: str, /, body: OutputSpec) -> Job: + """ + Add or replace an external job's output. + """ + ... + def create_external_result(self, project_uid: str, workspace_uid: str, /, body: ExternalOutputSpec) -> Job: + """ + Create an external result with the given specification. Returns an external + job with the given output ready to be saved. Used with cryosparc-tools + """ + ... + def get_status(self, project_uid: str, job_uid: str, /) -> JobStatus: + """ + Gets the status of a job. + """ + ... def view(self, project_uid: str, workspace_uid: str, job_uid: str, /) -> Job: """ - Adds a project, workspace and job id to a user's "recentJobs" (recently - viewed workspaces) state key + Adds a project, workspace and job uid to a user's recently viewed jobs list + """ + ... + def set_param(self, project_uid: str, job_uid: str, param: str, /, *, value: Any) -> Job: + """ + Sets the given job parameter to the value + """ + ... + def clear_param(self, project_uid: str, job_uid: str, param: str, /) -> Job: + """ + Resets the given parameter to its default value. """ ... - def set_param(self, project_uid: str, job_uid: str, param: str, /, *, value: Any) -> Job: ... - def clear_param(self, project_uid: str, job_uid: str, param: str, /) -> Job: ... - def connect( - self, project_uid: str, job_uid: str, input_name: str, /, *, source_job_uid: str, source_output_name: str - ) -> Job: ... - def disconnect(self, project_uid: str, job_uid: str, input_name: str, connection_index: int, /) -> Job: ... - def find_output_result( - self, project_uid: str, job_uid: str, output_name: str, result_name: str, / - ) -> OutputResult: ... - def connect_result( - self, - project_uid: str, - job_uid: str, - input_name: str, - connection_index: int, - result_name: str, - /, - *, - source_job_uid: str, - source_output_name: str, - source_result_name: str, - ) -> Job: ... - def disconnect_result( - self, project_uid: str, job_uid: str, input_name: str, connection_index: int, result_name: str, / - ) -> Job: ... def load_input( self, project_uid: str, @@ -447,8 +682,8 @@ class JobsNamespace(APINamespace): input_name: str, /, *, - slots: Union[Literal["default", "passthrough", "all"], List[str]] = "default", force_join: bool = False, + slots: Union[Literal["default", "passthrough", "all"], List[str]] = "default", ) -> Dataset: """ Load job input dataset. Raises exception if no inputs are connected. @@ -461,13 +696,75 @@ class JobsNamespace(APINamespace): output_name: str, /, *, - slots: Union[Literal["default", "passthrough", "all"], List[str]] = "default", version: Union[int, str] = "F", + slots: Union[Literal["default", "passthrough", "all"], List[str]] = "default", ) -> Dataset: """ Load job output dataset. Raises exception if output is empty or does not exists. """ ... + def save_output( + self, + project_uid: str, + job_uid: str, + output_name: str, + /, + dataset: Dataset, + *, + filename: Optional[str] = ..., + version: int = 0, + ) -> Job: + """ + Save job output dataset. Job must be running or waiting. + """ + ... + def connect( + self, project_uid: str, job_uid: str, input_name: str, /, *, source_job_uid: str, source_output_name: str + ) -> Job: + """ + Connects the input slot on the child job to the output group on the + parent job. + """ + ... + def disconnect_all(self, project_uid: str, job_uid: str, input_name: str, /) -> Job: ... + def disconnect(self, project_uid: str, job_uid: str, input_name: str, connection_index: int, /) -> Job: + """ + Removes connected inputs on the given input. + + Optionally specify an index to disconnect a specific connection. + + Optionally provide specific results to disconnect from matching connections (other results will be preserved). + """ + ... + def find_output_result(self, project_uid: str, job_uid: str, output_name: str, result_name: str, /) -> OutputResult: + """ + Get a job's low-level output result. + """ + ... + def connect_result( + self, + project_uid: str, + job_uid: str, + input_name: str, + connection_index: int, + result_name: str, + /, + *, + source_job_uid: str, + source_output_name: str, + source_result_name: str, + ) -> Job: + """ + Adds or replaces a result within an input connection with the given output result from a different job. + """ + ... + def disconnect_result( + self, project_uid: str, job_uid: str, input_name: str, connection_index: int, result_name: str, / + ) -> Job: + """ + Removes an output result connected within the given input connection. + """ + ... def enqueue( self, project_uid: str, @@ -478,31 +775,109 @@ class JobsNamespace(APINamespace): hostname: Optional[str] = ..., gpus: List[int] = [], no_check_inputs_ready: bool = False, - ) -> Job: ... - def recalculate_intermediate_results_size(self, project_uid: str, job_uid: str, /) -> Job: ... - def recalculate_project_intermediate_results_size(self, project_uid: str, /) -> List[Job]: ... - def clear_intermediate_results( - self, project_uid: str, job_uid: str, /, *, always_keep_final: bool = True - ) -> Job: ... + ) -> Job: + """ + Adds the job to the queue for the given worker lane (default lane if not specified) + """ + ... + def recalculate_intermediate_results_size(self, project_uid: str, job_uid: str, /) -> Any: + """ + For a job, find intermediate results and recalculate their total size. + """ + ... + def recalculate_project_intermediate_results_size(self, project_uid: str, /) -> Any: + """ + Recaclulates intermediate result sizes for all jobs in a project. + """ + ... + def clear_intermediate_results(self, project_uid: str, job_uid: str, /, *, always_keep_final: bool = True) -> Any: + """ + Removes intermediate results from the job + """ + ... def export_output_results( self, project_uid: str, job_uid: str, output_name: str, /, result_names: Optional[List[str]] = ... - ) -> str: ... - def export(self, project_uid: str, job_uid: str, /) -> Job: ... + ) -> str: + """ + Prepares a job's output for import to another project or instance. Creates a folder in the project directory → exports subfolder, + then links the output's associated files there.. + Note that the returned .csg file's parent folder must be manually copied with symlinks resolved into the target project folder before importing. + """ + ... + def export(self, project_uid: str, job_uid: str, /) -> Job: + """ + Start export for the job into the project's exports directory + """ + ... def get_output_result_path( self, project_uid: str, job_uid: str, output_name: str, result_name: str, /, *, version: Union[int, str] = "F" - ) -> str: ... - def interactive_post( - self, project_uid: str, job_uid: str, /, body: dict, *, endpoint: str, timeout: int = 10 - ) -> Any: ... - def mark_completed(self, project_uid: str, job_uid: str, /) -> Job: ... - def add_event_log( - self, project_uid: str, job_uid: str, /, message: str, *, type: Literal["text", "warning", "error"] = "text" - ) -> TextEvent: ... + ) -> str: + """ + Get the absolute path for a job output's dataset or volume density. + """ + ... + def interactive_post( + self, project_uid: str, job_uid: str, /, body: dict, *, endpoint: str, timeout: int = 10 + ) -> Any: + """ + Sends a message to an interactive job. + """ + ... + def mark_running( + self, project_uid: str, job_uid: str, /, *, status: Literal["running", "waiting"] = "running" + ) -> Job: + """ + Indicate that an external job is running or waiting. + """ + ... + def mark_completed(self, project_uid: str, job_uid: str, /) -> Job: + """ + Mark a killed or failed job as completed. + """ + ... + def mark_failed(self, project_uid: str, job_uid: str, /, *, error: Optional[str] = ...) -> Job: + """ + Manually mark a job as failed. + """ + ... + def add_event_log( + self, project_uid: str, job_uid: str, /, text: str, *, type: Literal["text", "warning", "error"] = "text" + ) -> TextEvent: + """ + Add the message to the target job's event log. + """ + ... def get_event_logs( - self, project_uid: str, job_uid: str, / - ) -> List[Union[Event, CheckpointEvent, TextEvent, ImageEvent, InteractiveEvent]]: ... - def recalculate_size(self, project_uid: str, job_uid: str, /) -> Job: ... - def clear(self, project_uid: str, job_uid: str, /, *, force: bool = False) -> Job: ... + self, project_uid: str, job_uid: str, /, *, checkpoint: Optional[int] = ... + ) -> List[Union[Event, CheckpointEvent, TextEvent, ImageEvent, InteractiveEvent]]: + """ + Gets all event logs for a job. + + Note: this may return a lot of documents. + """ + ... + def add_image_log( + self, project_uid: str, job_uid: str, /, images: List[GridFSAsset], *, text: str, flags: List[str] = ["plots"] + ) -> ImageEvent: + """ + Add an image or figure to the target job's event log. + """ + ... + def add_checkpoint(self, project_uid: str, job_uid: str, /, meta: dict) -> CheckpointEvent: + """ + Add a checkpoint the target job's event log. + """ + ... + def recalculate_size(self, project_uid: str, job_uid: str, /) -> Job: + """ + Recalculates the size of a given job's directory. + """ + ... + def clear(self, project_uid: str, job_uid: str, /, *, force: bool = False) -> Job: + """ + Clears a job to get it back to building state (do not clear params or inputs). + """ + ... def clear_many( self, *, @@ -520,7 +895,11 @@ class JobsNamespace(APINamespace): started_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., exported_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., deleted: Optional[bool] = False, - ) -> List[Job]: ... + ) -> List[Job]: + """ + Clears all jobs that matches the query. + """ + ... def clone( self, project_uid: str, @@ -529,38 +908,123 @@ class JobsNamespace(APINamespace): *, workspace_uid: Optional[str] = ..., created_by_job_uid: Optional[str] = ..., - ) -> Job: ... - def kill(self, project_uid: str, job_uid: str, /) -> Job: ... + ) -> Job: + """ + Creates a new job as a clone of the provided job. + """ + ... + def kill(self, project_uid: str, job_uid: str, /) -> Job: + """ + Kills a running job + """ + ... def set_final_result(self, project_uid: str, job_uid: str, /, *, is_final_result: bool) -> Job: """ - Sets job final result flag and updates flags for all jobs in the project + Marks a job as a final result. A job marked as a final result and its ancestor jobs are protected during data cleanup. + """ + ... + def set_title(self, project_uid: str, job_uid: str, /, *, title: str) -> Job: + """ + Sets job title. + """ + ... + def set_description(self, project_uid: str, job_uid: str, /, description: str) -> Job: + """ + Sets job description. + """ + ... + def set_priority(self, project_uid: str, job_uid: str, /, *, priority: int) -> Job: + """ + Sets job priority + """ + ... + def set_cluster_custom_vars(self, project_uid: str, job_uid: str, /, cluster_custom_vars: dict) -> Job: + """ + Sets cluster custom variables for job + """ + ... + def get_active_licenses_count(self) -> int: + """ + Gets number of acquired licenses for running jobs + """ + ... + def get_types(self) -> Any: + """ + Gets list of available job types + """ + ... + def get_categories(self) -> Any: + """ + Gets job types by category + """ + ... + def find_ancestor_uids(self, project_uid: str, job_uid: str, /, *, workspace_uid: Optional[str] = ...) -> List[str]: + """ + Finds all ancestors of a single job and return a list of their UIDs """ ... - def set_title(self, project_uid: str, job_uid: str, /, *, title: str) -> Job: ... - def set_description(self, project_uid: str, job_uid: str, /, description: str) -> Job: ... - def set_priority(self, project_uid: str, job_uid: str, /, *, priority: int) -> Job: ... - def set_cluster_custom_vars(self, project_uid: str, job_uid: str, /, cluster_custom_vars: dict) -> Job: ... - def get_active_licenses_count(self) -> int: ... - def get_types(self) -> Any: ... - def get_categories(self) -> Any: ... - def find_ancestor_uids( - self, project_uid: str, job_uid: str, /, *, workspace_uid: Optional[str] = ... - ) -> List[str]: ... def find_descendant_uids( self, project_uid: str, job_uid: str, /, *, workspace_uid: Optional[str] = ... - ) -> List[str]: ... - def link_to_workspace(self, project_uid: str, job_uid: str, workspace_uid: str, /) -> Job: ... - def unlink_from_workspace(self, project_uid: str, job_uid: str, workspace_uid: str, /) -> Job: ... - def move(self, project_uid: str, job_uid: str, /, *, from_workspace_uid: str, to_workspace_uid: str) -> Job: ... - def update_directory_symlinks( - self, project_uid: str, job_uid: str, /, *, prefix_cut: str, prefix_new: str - ) -> int: ... - def add_tag(self, project_uid: str, job_uid: str, tag_uid: str, /) -> None: ... - def remove_tag(self, project_uid: str, job_uid: str, tag_uid: str, /) -> None: ... - def import_job(self, project_uid: str, workspace_uid: str, /, *, exported_job_dir_abs: str) -> Job: ... + ) -> List[str]: + """ + Find the list of all job UIDs that this job is an ancestor of based + on its outputs. + """ + ... + def link_to_workspace(self, project_uid: str, job_uid: str, workspace_uid: str, /) -> Job: + """ + Adds a job to a workspace. + """ + ... + def unlink_from_workspace(self, project_uid: str, job_uid: str, workspace_uid: str, /) -> Job: + """ + Removes a job from a workspace. + """ + ... + def move(self, project_uid: str, job_uid: str, /, *, from_workspace_uid: str, to_workspace_uid: str) -> Job: + """ + Moves a job from one workspace to another. + """ + ... + def update_directory_symlinks(self, project_uid: str, job_uid: str, /, *, prefix_cut: str, prefix_new: str) -> int: + """ + Rewrites all symbolic links in the job directory, modifying links prefixed with `prefix_cut` to instead be prefixed with `prefix_new`. + """ + ... + def add_tag(self, project_uid: str, job_uid: str, tag_uid: str, /) -> None: + """ + Tags a job with the given tag. + """ + ... + def remove_tag(self, project_uid: str, job_uid: str, tag_uid: str, /) -> None: + """ + Removes the given tag a job. + """ + ... + def import_job(self, project_uid: str, workspace_uid: str, /, *, exported_job_dir_abs: str) -> Job: + """ + Imports the exported job directory into the project. Exported job + directory must be copied to the target project directory with all its symbolic links resolved. + By convention, the exported job directory should be located in the project directory → exports subfolder + """ + ... def import_result_group( self, project_uid: str, workspace_uid: str, /, *, csg_path: str, lane: Optional[str] = ... - ) -> Job: ... + ) -> Job: + """ + Creates and enqueues an import result group job. + """ + ... + def star_job(self, project_uid: str, job_uid: str, /) -> Job: + """ + Stars a job for a user + """ + ... + def unstar_job(self, project_uid: str, job_uid: str, /) -> Job: + """ + Unstars a job for a user + """ + ... class WorkspacesNamespace(APINamespace): """ @@ -619,28 +1083,84 @@ class WorkspacesNamespace(APINamespace): Create a new workspace """ ... - def preview_delete(self, project_uid: str, workspace_uid: str, /) -> DeleteWorkspacePreview: ... + def preview_delete(self, project_uid: str, workspace_uid: str, /) -> DeleteWorkspacePreview: + """ + Get a list of jobs that would be removed when the given workspace is deleted. + """ + ... def find_one(self, project_uid: str, workspace_uid: str, /) -> Workspace: """ Find a specific workspace in a project """ ... - def delete(self, project_uid: str, workspace_uid: str, /) -> None: ... - def set_title(self, project_uid: str, workspace_uid: str, /, *, title: str) -> Workspace: ... - def set_description(self, project_uid: str, workspace_uid: str, /, description: str) -> Workspace: ... - def view(self, project_uid: str, workspace_uid: str, /) -> Workspace: ... - def delete_async(self, project_uid: str, workspace_uid: str, /) -> Any: ... - def add_tag(self, project_uid: str, workspace_uid: str, tag_uid: str, /) -> None: ... - def remove_tag(self, project_uid: str, workspace_uid: str, tag_uid: str, /) -> None: ... + def delete(self, project_uid: str, workspace_uid: str, /) -> None: + """ + Marks the workspace as "deleted". Deletes jobs that are only linked to this workspace + and no other workspace. + """ + ... + def set_title(self, project_uid: str, workspace_uid: str, /, *, title: str) -> Workspace: + """ + Set title of a workspace + """ + ... + def set_description(self, project_uid: str, workspace_uid: str, /, description: str) -> Workspace: + """ + Set description of a workspace + """ + ... + def view(self, project_uid: str, workspace_uid: str, /) -> Workspace: + """ + Adds a workspace uid to a user's recently viewed workspaces list. + """ + ... + def delete_async(self, project_uid: str, workspace_uid: str, /) -> Any: + """ + Starts the workspace deletion task. Deletes jobs that are only linked to this workspace + and no other workspace. + """ + ... + def add_tag(self, project_uid: str, workspace_uid: str, tag_uid: str, /) -> None: + """ + Tag the given workspace with the given tag. + """ + ... + def remove_tag(self, project_uid: str, workspace_uid: str, tag_uid: str, /) -> None: + """ + Removes a tag from a workspace. + """ + ... def clear_intermediate_results( self, project_uid: str, workspace_uid: str, /, *, always_keep_final: bool = False - ) -> Workspace: ... + ) -> Any: + """ + Remove intermediate results from a workspace. + """ + ... def find_workspace_ancestor_uids( self, project_uid: str, workspace_uid: str, /, job_uids: List[str] - ) -> WorkspaceAncestorUidsResponse: ... + ) -> WorkspaceAncestorUidsResponse: + """ + Finds ancestors of jobs in the workspace + """ + ... def find_workspace_descendant_uids( self, project_uid: str, workspace_uid: str, /, job_uids: List[str] - ) -> WorkspaceDescendantUidsResponse: ... + ) -> WorkspaceDescendantUidsResponse: + """ + Finds descendants of jobs in the workspace + """ + ... + def star_workspace(self, project_uid: str, workspace_uid: str, /) -> Workspace: + """ + Stars a workspace for a given user + """ + ... + def unstar_workspace(self, project_uid: str, workspace_uid: str, /) -> Workspace: + """ + Unstars a project for a given user + """ + ... class SessionsNamespace(APINamespace): """ @@ -648,20 +1168,25 @@ class SessionsNamespace(APINamespace): """ def find(self, *, project_uid: Optional[str] = ...) -> List[Session]: """ - List all sessions (optionally, in a project) + Lists all sessions (optionally, in a project) """ ... def count(self, *, project_uid: Optional[str]) -> int: """ - Count all sessions in a project + Counts all sessions in a project """ ... def find_one(self, project_uid: str, session_uid: str, /) -> Session: """ - Find a session + Finds a session + """ + ... + def delete(self, project_uid: str, session_uid: str, /) -> None: + """ + Sets the session document as "deleted" + Will throw an error if any undeleted jobs exist within the session. """ ... - def delete(self, project_uid: str, session_uid: str, /) -> None: ... def create( self, project_uid: str, @@ -672,32 +1197,70 @@ class SessionsNamespace(APINamespace): created_by_job_uid: Optional[str] = ..., ) -> Session: """ - Create a new session + Creates a new session + """ + ... + def find_exposure_groups(self, project_uid: str, session_uid: str, /) -> List[ExposureGroup]: + """ + Finds all exposure groups in a session. + """ + ... + def create_exposure_group(self, project_uid: str, session_uid: str, /) -> ExposureGroup: + """ + Creates an exposure group for a session. + """ + ... + def find_exposure_group(self, project_uid: str, session_uid: str, exposure_group_id: int, /) -> ExposureGroup: + """ + Finds an exposure group with a specific id for a session. """ ... - def find_exposure_groups(self, project_uid: str, session_uid: str, /) -> List[ExposureGroup]: ... - def create_exposure_group(self, project_uid: str, session_uid: str, /) -> ExposureGroup: ... - def find_exposure_group(self, project_uid: str, session_uid: str, exposure_group_id: int, /) -> ExposureGroup: ... def update_exposure_group( self, project_uid: str, session_uid: str, exposure_group_id: int, /, body: ExposureGroupUpdate - ) -> ExposureGroup: ... - def delete_exposure_group(self, project_uid: str, session_uid: str, exposure_group_id: int, /) -> Session: ... - def finalize_exposure_group( - self, project_uid: str, session_uid: str, exposure_group_id: int, / - ) -> ExposureGroup: ... + ) -> ExposureGroup: + """ + Updates properties of an exposure group. + """ + ... + def delete_exposure_group(self, project_uid: str, session_uid: str, exposure_group_id: int, /) -> Session: + """ + Deletes an exposure group from a session. + """ + ... + def finalize_exposure_group(self, project_uid: str, session_uid: str, exposure_group_id: int, /) -> ExposureGroup: + """ + Finalizes an exposure group. + """ + ... def start(self, project_uid: str, session_uid: str, /) -> Session: """ - Build and start a CryoSPARC Live Session. Builds file engines based on file + Builds and starts a CryoSPARC Live Session. Builds file engines based on file engine parameters in the session doc, builds phase one workers based on lane parameters in the session doc. """ ... - def pause(self, project_uid: str, session_uid: str, /) -> Session: ... + def pause(self, project_uid: str, session_uid: str, /) -> Session: + """ + Pauses a CryoSPARC Live Session. Gracefully stops and kills all phase one workers, file engines and phase two jobs + """ + ... def update_compute_configuration( self, project_uid: str, session_uid: str, /, body: LiveComputeResources - ) -> LiveComputeResources: ... - def add_tag(self, project_uid: str, session_uid: str, tag_uid: str, /) -> None: ... - def remove_tag(self, project_uid: str, session_uid: str, tag_uid: str, /) -> None: ... + ) -> LiveComputeResources: + """ + Updates compute configuration for a session. + """ + ... + def add_tag(self, project_uid: str, session_uid: str, tag_uid: str, /) -> None: + """ + Tags a session with the given tag. + """ + ... + def remove_tag(self, project_uid: str, session_uid: str, tag_uid: str, /) -> None: + """ + Removes the given tag from a session. + """ + ... def update_session_params( self, project_uid: str, @@ -707,7 +1270,11 @@ class SessionsNamespace(APINamespace): *, reprocess: bool = True, priority: int = 1, - ) -> Session: ... + ) -> Session: + """ + Updates a session's params. Updates each exposure inside the session with the new stage to start processing at (if there is one). + """ + ... def update_session_picker( self, project_uid: str, @@ -716,7 +1283,11 @@ class SessionsNamespace(APINamespace): *, activate_picker_type: Literal["blob", "template", "deep"], use_thresholds: bool = True, - ) -> Session: ... + ) -> Session: + """ + Updates a session's picker. + """ + ... def update_attribute_threshold( self, project_uid: str, @@ -726,26 +1297,65 @@ class SessionsNamespace(APINamespace): *, min_val: Optional[float] = ..., max_val: Optional[float] = ..., - ) -> Session: ... - def clear_session(self, project_uid: str, session_uid: str, /) -> Session: ... + ) -> Session: + """ + Updates thresholds for a given attribute. + """ + ... + def clear_session(self, project_uid: str, session_uid: str, /) -> Session: + """ + Deletes all file engine documents (removing all previously known files and + max timestamps), all Phase 1 Worker jobs and all associated + exposure documents. + """ + ... def view(self, project_uid: str, session_uid: str, /) -> Session: """ - Adds a project, workspace and job id to a user's "recentJobs" (recently - viewed workspaces) state key + Adds a project, workspace and job uid to a user's recently viewed sessions list + """ + ... + def setup_phase2_class2D(self, project_uid: str, session_uid: str, /, *, force_restart: bool = True) -> Job: + """ + Setup streaming 2D classification job for a session. + """ + ... + def enqueue_phase2_class2D(self, project_uid: str, session_uid: str, /) -> Job: + """ + Enqueues streaming 2D Classification job for a session + """ + ... + def stop_phase2_class2D(self, project_uid: str, session_uid: str, /) -> Session: + """ + Stops streaming 2D Classification job for a session + """ + ... + def clear_phase2_class2D(self, project_uid: str, session_uid: str, /) -> Session: + """ + Clears streaming 2D Classification job for a session + """ + ... + def update_phase2_class2D_params(self, project_uid: str, session_uid: str, /, body: LiveClass2DParams) -> Session: + """ + Updates streaming 2D Classification job params for session + """ + ... + def invert_template_phase2_class2D(self, project_uid: str, session_uid: str, template_idx: int, /) -> Session: + """ + Inverts selected template for the streaming 2D Classification job of a job + """ + ... + def invert_all_templates_phase2_class2D(self, project_uid: str, session_uid: str, /) -> Session: + """ + Inverts all templates for a session's streaming 2D classification job """ ... - def setup_phase2_class2D(self, project_uid: str, session_uid: str, /, *, force_restart: bool = True) -> Job: ... - def enqueue_phase2_class2D(self, project_uid: str, session_uid: str, /) -> Job: ... - def stop_phase2_class2D(self, project_uid: str, session_uid: str, /) -> Session: ... - def clear_phase2_class2D(self, project_uid: str, session_uid: str, /) -> Session: ... - def update_phase2_class2D_params( - self, project_uid: str, session_uid: str, /, body: LiveClass2DParams - ) -> Session: ... - def invert_template_phase2_class2D(self, project_uid: str, session_uid: str, template_idx: int, /) -> Session: ... - def invert_all_templates_phase2_class2D(self, project_uid: str, session_uid: str, /) -> Session: ... def set_all_templates_phase2_class2D( self, project_uid: str, session_uid: str, direction: Literal["select", "deselect"], / - ) -> Session: ... + ) -> Session: + """ + Sets all templates in the session's streaming 2D Classification job + """ + ... def select_direction_template_phase2_class2D( self, project_uid: str, @@ -755,8 +1365,16 @@ class SessionsNamespace(APINamespace): *, dimension: str, direction: Literal["above", "below"] = "above", - ) -> Session: ... - def start_extract_manual(self, project_uid: str, session_uid: str, /) -> None: ... + ) -> Session: + """ + Sets all templates above or below an index for a session's streaming 2D Classification + """ + ... + def start_extract_manual(self, project_uid: str, session_uid: str, /) -> None: + """ + Extracts manual picks from a session + """ + ... def set_session_exposure_processing_priority( self, project_uid: str, @@ -764,7 +1382,11 @@ class SessionsNamespace(APINamespace): /, *, exposure_processing_priority: Literal["normal", "oldest", "latest", "alternate"], - ) -> Session: ... + ) -> Session: + """ + Sets session exposure processing priority + """ + ... def update_picking_threshold_values( self, project_uid: str, @@ -775,9 +1397,21 @@ class SessionsNamespace(APINamespace): ncc_value: float, power_min_value: float, power_max_value: float, - ) -> Session: ... - def reset_attribute_threshold(self, project_uid: str, session_uid: str, attribute: str, /) -> Session: ... - def reset_all_attribute_thresholds(self, project_uid: str, session_uid: str, /) -> Session: ... + ) -> Session: + """ + Updates picking threshold values for a session + """ + ... + def reset_attribute_threshold(self, project_uid: str, session_uid: str, attribute: str, /) -> Session: + """ + Resets attribute threshold for a session + """ + ... + def reset_all_attribute_thresholds(self, project_uid: str, session_uid: str, /) -> Session: + """ + Resets all attribute thresholds for a session + """ + ... def setup_template_creation_class2D( self, project_uid: str, @@ -789,32 +1423,106 @@ class SessionsNamespace(APINamespace): num_mics: int, override_particle_diameter_A: Optional[float] = ..., uid_lte: Optional[int] = ..., - ) -> Session: ... - def set_template_creation_job(self, project_uid: str, session_uid: str, /, *, job_uid: str) -> Session: ... - def enqueue_template_creation_class2D(self, project_uid: str, session_uid: str, /) -> Job: ... - def clear_template_creation_class2D(self, project_uid: str, session_uid: str, /) -> Session: ... - def toggle_template_creation_template( - self, project_uid: str, session_uid: str, template_idx: int, / - ) -> Session: ... - def toggle_template_creation_all_templates(self, project_uid: str, session_uid: str, /) -> Session: ... + ) -> Session: + """ + Setup template creation class2D job for a session + """ + ... + def set_template_creation_job(self, project_uid: str, session_uid: str, /, *, job_uid: str) -> Session: + """ + Set template creation class2D job for a session + """ + ... + def enqueue_template_creation_class2D(self, project_uid: str, session_uid: str, /) -> Job: + """ + Enqueues template creation class2D job for a session + """ + ... + def clear_template_creation_class2D(self, project_uid: str, session_uid: str, /) -> Session: + """ + Clears template creation class2D job for a session + """ + ... + def toggle_template_creation_template(self, project_uid: str, session_uid: str, template_idx: int, /) -> Session: + """ + Toggles template for template creation job at a specific index for a session's template creation job + """ + ... + def toggle_template_creation_all_templates(self, project_uid: str, session_uid: str, /) -> Session: + """ + Toggles templates for all templates for a session's template creation job + """ + ... def select_template_creation_all( self, project_uid: str, session_uid: str, direction: Literal["select", "deselect"], / - ) -> Session: ... + ) -> Session: + """ + Selects or deselects all templates for a template creation job in a session + """ + ... def select_template_creation_in_direction( self, project_uid: str, session_uid: str, template_idx: int, direction: Literal["above", "below"], / - ) -> Session: ... - def setup_phase2_abinit(self, project_uid: str, session_uid: str, /) -> Job: ... - def set_phase2_abinit_job(self, project_uid: str, session_uid: str, /, *, job_uid: str) -> Session: ... - def enqueue_phase2_abinit(self, project_uid: str, session_uid: str, /) -> Job: ... - def clear_phase2_abinit(self, project_uid: str, session_uid: str, /) -> Session: ... - def update_phase2_abinit_params(self, project_uid: str, session_uid: str, /, body: LiveAbinitParams) -> Session: ... - def select_phase2_abinit_volume(self, project_uid: str, session_uid: str, /, *, volume_name: str) -> Session: ... - def stop_phase2_abinit(self, project_uid: str, session_uid: str, /) -> Session: ... - def clear_phase2_refine(self, project_uid: str, session_uid: str, /) -> Session: ... + ) -> Session: + """ + Selects all templates above or below an index in a template creation job for a session + """ + ... + def setup_phase2_abinit(self, project_uid: str, session_uid: str, /) -> Job: + """ + Setup Ab-Initio Reconstruction job for a session + """ + ... + def set_phase2_abinit_job(self, project_uid: str, session_uid: str, /, *, job_uid: str) -> Session: + """ + Sets an Ab-Initio Reconstruction job for the session + """ + ... + def enqueue_phase2_abinit(self, project_uid: str, session_uid: str, /) -> Job: + """ + Enqueues Ab-Initio Reconstruction job for a session + """ + ... + def clear_phase2_abinit(self, project_uid: str, session_uid: str, /) -> Session: + """ + Clears Ab-Initio Reconstruction job for a session + """ + ... + def update_phase2_abinit_params(self, project_uid: str, session_uid: str, /, body: LiveAbinitParams) -> Session: + """ + Updates Ab-Initio Reconstruction parameters for the session + """ + ... + def select_phase2_abinit_volume(self, project_uid: str, session_uid: str, /, *, volume_name: str) -> Session: + """ + Selects a volume for an Ab-Initio Reconstruction job in a session + """ + ... + def stop_phase2_abinit(self, project_uid: str, session_uid: str, /) -> Session: + """ + Stops an Ab-Initio Reconstruction job for a session. + """ + ... + def clear_phase2_refine(self, project_uid: str, session_uid: str, /) -> Session: + """ + Clears streaming Homogenous Refinement job for a session + """ + ... def setup_phase2_refine(self, project_uid: str, session_uid: str, /) -> Job: ... - def enqueue_phase2_refine(self, project_uid: str, session_uid: str, /) -> Job: ... - def stop_phase2_refine(self, project_uid: str, session_uid: str, /) -> Session: ... - def update_phase2_refine_params(self, project_uid: str, session_uid: str, /, body: LiveRefineParams) -> Session: ... + def enqueue_phase2_refine(self, project_uid: str, session_uid: str, /) -> Job: + """ + Enqueues a streaming Homogenous Refinement job for a session + """ + ... + def stop_phase2_refine(self, project_uid: str, session_uid: str, /) -> Session: + """ + Stops a streaming Homogenous Refinement job for a session + """ + ... + def update_phase2_refine_params(self, project_uid: str, session_uid: str, /, body: LiveRefineParams) -> Session: + """ + Updates parameters for a streaming Homogenous Refinement job for a session + """ + ... def create_and_enqueue_dump_particles( self, project_uid: str, @@ -825,12 +1533,28 @@ class SessionsNamespace(APINamespace): num_mics: Optional[int] = ..., uid_lte: Optional[int] = ..., test_only: bool = False, - ) -> Job: ... + ) -> Job: + """ + Creates and enqueues a dump particles job for a session + """ + ... def create_and_enqueue_dump_exposures( self, project_uid: str, session_uid: str, /, *, export_ignored: bool = False - ) -> Job: ... - def get_data_management_stats(self, project_uid: str, /) -> Dict[str, DataManagementStats]: ... - def mark_session_completed(self, project_uid: str, session_uid: str, /) -> Session: ... + ) -> Job: + """ + Creates and enqueues a dump exposures job for a session + """ + ... + def get_data_management_stats(self, project_uid: str, /) -> Dict[str, DataManagementStats]: + """ + Gets the data management stats of all sessions in a project. + """ + ... + def mark_session_completed(self, project_uid: str, session_uid: str, /) -> Session: + """ + Marks the session as completed + """ + ... def change_session_data_management_state( self, project_uid: str, @@ -839,54 +1563,118 @@ class SessionsNamespace(APINamespace): *, datatype: Literal["micrographs", "raw", "particles", "metadata", "thumbnails"], status: Literal["active", "archiving", "archived", "deleted", "deleting", "missing", "calculating"], - ) -> Session: ... - def update_session_datatype_sizes(self, project_uid: str, session_uid: str, /) -> int: ... + ) -> Session: + """ + Updates data management status of a session's datatype + """ + ... + def update_session_datatype_sizes(self, project_uid: str, session_uid: str, /) -> int: + """ + Updates the session's data_management information with the current size of each datatype. + """ + ... def get_datatype_size( self, project_uid: str, session_uid: str, datatype: Literal["micrographs", "raw", "particles", "metadata", "thumbnails"], /, - ) -> int: ... + ) -> int: + """ + Gets the total size of a datatype inside a session in bytes. + """ + ... def delete_live_datatype( self, project_uid: str, session_uid: str, datatype: Literal["micrographs", "raw", "particles", "metadata", "thumbnails"], /, - ) -> Job | None: ... - def update_all_sessions_datatype_sizes(self, project_uid: str, /) -> None: ... + ) -> Job | None: + """ + Deletes a specific datatype inside a session. + """ + ... + def update_all_sessions_datatype_sizes(self, project_uid: str, /) -> None: + """ + Asynchronously updates the datatype sizes of all sessions within a project + """ + ... def get_datatype_file_paths( self, project_uid: str, session_uid: str, datatype: Literal["micrographs", "raw", "particles", "metadata", "thumbnails"], /, - ) -> List[str]: ... - def get_configuration_profiles(self) -> List[SessionConfigProfile]: ... - def create_configuration_profile(self, body: SessionConfigProfileBody) -> SessionConfigProfile: ... - def apply_configuration_profile( - self, project_uid: str, session_uid: str, /, *, configuration_id: str - ) -> Session: ... + ) -> List[str]: + """ + Gets all the file paths associated with a specific datatype inside a session as a list + """ + ... + def get_configuration_profiles(self) -> List[SessionConfigProfile]: + """ + Gets all session configuration profiles + """ + ... + def create_configuration_profile(self, body: SessionConfigProfileBody) -> SessionConfigProfile: + """ + Creates a session configuration profile + """ + ... + def apply_configuration_profile(self, project_uid: str, session_uid: str, /, *, configuration_id: str) -> Session: + """ + Applies a configuration profile to a session, overwriting its resources, parameters, and exposure group + """ + ... def update_configuration_profile( self, configuration_id: str, /, body: SessionConfigProfileBody - ) -> SessionConfigProfile: ... - def delete_configuration_profile(self, configuration_id: str, /) -> None: ... - def compact_session(self, project_uid: str, session_uid: str, /) -> Any: ... - def restore_session(self, project_uid: str, session_uid: str, /, body: LiveComputeResources) -> Any: ... - def get_session_base_params(self) -> Any: ... + ) -> SessionConfigProfile: + """ + Updates a configuration profile + """ + ... + def delete_configuration_profile(self, configuration_id: str, /) -> None: + """ + Deletes a configuration profile + """ + ... + def compact_session(self, project_uid: str, session_uid: str, /) -> Any: + """ + Compacts a session by clearing each exposure group and its related files for each exposure in the session. + Also clears gridfs data. + """ + ... + def restore_session(self, project_uid: str, session_uid: str, /, body: LiveComputeResources) -> Any: + """ + Restores exposures of a compacted session. Starts phase 1 worker(s) on the specified lane to restore each exposure by re-processing starting from motion correction, skipping the + picking stage. + """ + ... + def get_session_base_params(self) -> Any: + """ + Gets base session parameters + """ + ... class ProjectsNamespace(APINamespace): """ Methods available in api.projects, e.g., api.projects.check_directory(...) """ - def check_directory(self, *, path: str) -> str: ... - def get_title_slug(self, *, title: str) -> str: ... + def check_directory(self, *, path: str) -> str: + """ + Checks if a candidate project directory exists, and if it is readable and writeable. + """ + ... + def get_title_slug(self, *, title: str) -> str: + """ + Returns a slugified version of a project title. + """ + ... def find( self, *, sort: str = "created_at", - order: Literal[1, -1] = 1, + order: int = 1, uid: Optional[List[str]] = ..., project_dir: Optional[str] = ..., owner_user_id: Optional[str] = ..., @@ -894,8 +1682,17 @@ class ProjectsNamespace(APINamespace): archived: Optional[bool] = ..., detached: Optional[bool] = ..., hidden: Optional[bool] = ..., - ) -> List[Project]: ... - def create(self, *, title: str, description: Optional[str] = ..., parent_dir: str) -> Project: ... + ) -> List[Project]: + """ + Finds projects matching the filter. + """ + ... + def create(self, *, title: str, description: Optional[str] = ..., parent_dir: str) -> Project: + """ + Creates a new project, project directory and creates a new document in + the project collection + """ + ... def count( self, *, @@ -906,38 +1703,181 @@ class ProjectsNamespace(APINamespace): archived: Optional[bool] = ..., detached: Optional[bool] = ..., hidden: Optional[bool] = ..., - ) -> int: ... - def set_title(self, project_uid: str, /, *, title: str) -> Project: ... - def set_description(self, project_uid: str, /, description: str) -> Project: ... - def view(self, project_uid: str, /) -> Project: ... - def get_job_register(self, project_uid: str, /) -> JobRegister: ... - def preview_delete(self, project_uid: str, /) -> DeleteProjectPreview: ... - def find_one(self, project_uid: str, /) -> Project: ... - def delete(self, project_uid: str, /) -> None: ... - def delete_async(self, project_uid: str, /) -> Any: ... - def get_directory(self, project_uid: str, /) -> str: ... - def get_owner_id(self, project_uid: str, /) -> str: ... - def set_owner(self, project_uid: str, user_id: str, /) -> Project: ... - def add_user_access(self, project_uid: str, user_id: str, /) -> Project: ... - def remove_user_access(self, project_uid: str, user_id: str, /) -> Project: ... - def refresh_size(self, project_uid: str, /) -> Project: ... - def refresh_size_async(self, project_uid: str, /) -> Any: ... - def get_symlinks(self, project_uid: str, /) -> List[ProjectSymlink]: ... - def set_default_param(self, project_uid: str, name: str, /, value: Union[bool, int, float, str]) -> Project: ... - def clear_default_param(self, project_uid: str, name: str, /) -> Project: ... + ) -> int: + """ + Counts the number of projects matching the filter. + """ + ... + def set_title(self, project_uid: str, /, *, title: str) -> Project: + """ + Sets the title of a project. + """ + ... + def set_description(self, project_uid: str, /, description: str) -> Project: + """ + Sets the description of a project. + """ + ... + def view(self, project_uid: str, /) -> Project: + """ + Adds a project uid to a user's recently viewed projects list. + """ + ... + def mkdir(self, project_uid: str, /, *, parents: bool = False, exist_ok: bool = False, path: str = "") -> str: + """ + Create a directory in the project directory at the given path. + """ + ... + def cp(self, project_uid: str, /, *, source: str, path: str = "") -> str: + """ + Copy the source file or directory to the project directory at the given + path. Returns the absolute path of the copied file. + """ + ... + def symlink(self, project_uid: str, /, *, source: str, path: str = "") -> str: + """ + Create a symlink from the source path in the project directory at the given path. + """ + ... + def upload_file(self, project_uid: str, /, stream: Stream, *, overwrite: bool = False, path: str = "") -> str: + """ + Upload a file to the project directory at the given path. Returns absolute + path of the uploaded file. + + Path may be specified as a filename, a relative path inside the project + directory, or a full absolute path. + """ + ... + def download_file(self, project_uid: str, /, *, path: str = "") -> Stream: + """ + Download a file from the project directory at the given path. + """ + ... + def ls(self, project_uid: str, /, *, recursive: bool = False, path: str = "") -> List[str]: + """ + List files in the project directory. Note that enabling recursive will + include parent directories in the result. + """ + ... + def get_job_register(self, project_uid: str, /) -> JobRegister: + """ + Gets the job register model for the project. The same for every project. + """ + ... + def preview_delete(self, project_uid: str, /) -> DeleteProjectPreview: + """ + Retrieves the workspaces and jobs that would be affected when the project is deleted. + """ + ... + def find_one(self, project_uid: str, /) -> Project: + """ + Finds a project by its UID + """ + ... + def delete(self, project_uid: str, /) -> None: + """ + Deletes the project, its full directory, and all associated workspaces, sessions, jobs and results. + """ + ... + def delete_async(self, project_uid: str, /) -> Any: + """ + Starts project deletion task. Will delete the project, its full directory, and all associated workspaces, sessions, jobs and results. + """ + ... + def get_directory(self, project_uid: str, /) -> str: + """ + Gets the project's absolute directory with all environment variables in the + path resolved + """ + ... + def get_owner_id(self, project_uid: str, /) -> str: + """ + Get user account ID for the owner of a project. + """ + ... + def set_owner(self, project_uid: str, user_id: str, /) -> Project: + """ + Sets owner of the project to the user + """ + ... + def add_user_access(self, project_uid: str, user_id: str, /) -> Project: + """ + Grants access to another user to view and edit the project. + May only be called by project owners and administrators. + """ + ... + def remove_user_access(self, project_uid: str, user_id: str, /) -> Project: + """ + Removes a user's access from a project. + """ + ... + def refresh_size(self, project_uid: str, /) -> Project: + """ + Walks the project directory and update the project size with the sum + of all the file sizes. + """ + ... + def refresh_size_async(self, project_uid: str, /) -> Any: + """ + Starts project size recalculation asynchronously. + """ + ... + def get_symlinks(self, project_uid: str, /) -> List[ProjectSymlink]: + """ + Gets all symbolic links in the project directory + """ + ... + def set_default_param(self, project_uid: str, name: str, /, value: Union[bool, int, float, str]) -> Project: + """ + Sets a default value for a parameter name globally for the project + """ + ... + def clear_default_param(self, project_uid: str, name: str, /) -> Project: + """ + Clears the per-project default value for a parameter name. + """ + ... def claim_instance_ownership(self, project_uid: str, /, *, force: bool = False) -> None: ... - def claim_all_instance_ownership(self, *, force: bool = False) -> None: ... - def archive(self, project_uid: str, /) -> Project: ... - def unarchive(self, project_uid: str, /, *, path: str) -> Project: ... - def detach(self, project_uid: str, /) -> Project: ... - def attach(self, *, path: str) -> Project: ... + def claim_all_instance_ownership(self, *, force: bool = False) -> None: + """ + Claims ownership of all projects in instance. Call when upgrading from an older CryoSPARC version that did not support project locks. + """ + ... + def archive(self, project_uid: str, /) -> Project: + """ + Archives a project. This means that the project can no longer be modified + and jobs cannot be created or run. Once archived, the project directory may + be safely moved to long-term storage. + """ + ... + def unarchive(self, project_uid: str, /, *, path: str) -> Project: + """ + Reverses archive operation. + """ + ... + def detach(self, project_uid: str, /) -> Project: + """ + Detaches a project, removing its lockfile. This hides the project from the interface and allows other + instances to attach and run this project. + """ + ... + def attach(self, *, path: str) -> Project: + """ + Attaches a project directory at a specified path and writes a new + lockfile. Must be run on a project directory without a lockfile. + """ + ... def move(self, project_uid: str, /, *, path: str) -> Project: """ - Rename the project directory for the given project. Provide either the new + Renames the project directory for a project. Provide either the new directory name or the full new directory path. """ ... - def get_next_exposure_group_id(self, project_uid: str, /) -> int: ... + def get_next_exposure_group_id(self, project_uid: str, /) -> int: + """ + Gets next exposure group ID + """ + ... def cleanup_data( self, project_uid: str, @@ -950,29 +1890,93 @@ class ProjectsNamespace(APINamespace): clear_categories: List[Category] = [], clear_types: List[str] = [], clear_statuses: List[JobStatus] = [], - ) -> int: ... - def add_tag(self, project_uid: str, tag_uid: str, /) -> None: ... - def remove_tag(self, project_uid: str, tag_uid: str, /) -> None: ... - def get_generate_intermediate_results_settings( - self, project_uid: str, / - ) -> GenerateIntermediateResultsSettings: ... + ) -> Any: + """ + Cleanup project or workspace data, clearing/deleting jobs based on final result status, sections, types, or job status + """ + ... + def add_tag(self, project_uid: str, tag_uid: str, /) -> None: + """ + Tags a project with the given tag. + """ + ... + def remove_tag(self, project_uid: str, tag_uid: str, /) -> None: + """ + Removes the given tag from a project. + """ + ... + def get_generate_intermediate_results_settings(self, project_uid: str, /) -> GenerateIntermediateResultsSettings: + """ + Gets generate intermediate result settings. + """ + ... def set_generate_intermediate_results_settings( self, project_uid: str, /, body: GenerateIntermediateResultsSettings - ) -> Project: ... - def clear_intermediate_results(self, project_uid: str, /, *, always_keep_final: bool = True) -> Project: ... - def get_generate_intermediate_results_job_types(self) -> List[str]: ... + ) -> Project: + """ + Sets settings for intermediate result generation. + """ + ... + def clear_intermediate_results(self, project_uid: str, /, *, always_keep_final: bool = True) -> Any: + """ + Removes intermediate results from the project. + """ + ... + def get_generate_intermediate_results_job_types(self) -> List[str]: + """ + Gets intermediate result job types + """ + ... + def star_project(self, project_uid: str, /) -> Project: + """ + Stars a project for a user + """ + ... + def unstar_project(self, project_uid: str, /) -> Project: + """ + Unstars a project for a user + """ + ... class ExposuresNamespace(APINamespace): """ Methods available in api.exposures, e.g., api.exposures.reset_manual_reject_exposures(...) """ - def reset_manual_reject_exposures(self, project_uid: str, session_uid: str, /) -> List[Exposure]: ... - def reset_all_exposures(self, project_uid: str, session_uid: str, /) -> None: ... - def reset_failed_exposures(self, project_uid: str, session_uid: str, /) -> None: ... - def reset_exposure(self, project_uid: str, session_uid: str, exposure_uid: int, /) -> Exposure: ... - def manual_reject_exposure(self, project_uid: str, session_uid: str, exposure_uid: int, /) -> Exposure: ... - def manual_unreject_exposure(self, project_uid: str, session_uid: str, exposure_uid: int, /) -> Exposure: ... - def toggle_manual_reject_exposure(self, project_uid: str, session_uid: str, exposure_uid: int, /) -> Exposure: ... + def reset_manual_reject_exposures(self, project_uid: str, session_uid: str, /) -> List[Exposure]: + """ + Resets manual rejection status on all exposures in a session. + """ + ... + def reset_all_exposures(self, project_uid: str, session_uid: str, /) -> None: + """ + Resets all exposures in a session to initial state. + """ + ... + def reset_failed_exposures(self, project_uid: str, session_uid: str, /) -> None: + """ + Resets all failed exposures in a session to initial state. + """ + ... + def reset_exposure(self, project_uid: str, session_uid: str, exposure_uid: int, /) -> Exposure: + """ + Resets exposure to intial state. + """ + ... + def manual_reject_exposure(self, project_uid: str, session_uid: str, exposure_uid: int, /) -> Exposure: + """ + Manually rejects exposure. + """ + ... + def manual_unreject_exposure(self, project_uid: str, session_uid: str, exposure_uid: int, /) -> Exposure: + """ + Manually unrejects exposure. + """ + ... + def toggle_manual_reject_exposure(self, project_uid: str, session_uid: str, exposure_uid: int, /) -> Exposure: + """ + Toggles manual rejection state on exposure. + """ + ... def reprocess_single_exposure( self, project_uid: str, @@ -981,11 +1985,21 @@ class ExposuresNamespace(APINamespace): /, body: LivePreprocessingParams, *, - picker_type: Literal["blob", "template", "deep"], - ) -> Exposure: ... + picker_type: Literal["blob", "template"], + ) -> Exposure: + """ + Reprocesses a single micrograph with the passed parameters. If there is a test micrograph + in the session already that is not the same micrograph that the user is currently trying to test, it will be reset + back to the "ctf" stage without the test flag. + """ + ... def add_manual_pick( self, project_uid: str, session_uid: str, exposure_uid: int, /, *, x_frac: float, y_frac: float - ) -> Exposure: ... + ) -> Exposure: + """ + Adds a manual pick to an exposure. + """ + ... def remove_manual_pick( self, project_uid: str, @@ -996,7 +2010,11 @@ class ExposuresNamespace(APINamespace): x_frac: float, y_frac: float, dist_frac: float = 0.02, - ) -> Exposure: ... + ) -> Exposure: + """ + Removes manual pick from an exposure + """ + ... def get_individual_picks( self, project_uid: str, @@ -1004,7 +2022,11 @@ class ExposuresNamespace(APINamespace): exposure_uid: int, picker_type: Literal["blob", "template", "manual"], /, - ) -> List[List[float]]: ... + ) -> List[List[float]]: + """ + Gets list of picks from an exposure + """ + ... class TagsNamespace(APINamespace): """ @@ -1018,7 +2040,11 @@ class TagsNamespace(APINamespace): created_by_user_id: Optional[str] = ..., type: Optional[List[Literal["general", "project", "workspace", "session", "job"]]] = ..., uid: Optional[str] = ..., - ) -> List[Tag]: ... + ) -> List[Tag]: + """ + Finds tags that match the given query. + """ + ... def create( self, *, @@ -1043,7 +2069,11 @@ class TagsNamespace(APINamespace): description: Optional[str] = ..., created_by_workflow: Optional[str] = ..., title: Optional[str], - ) -> Tag: ... + ) -> Tag: + """ + Creates a new tag + """ + ... def update( self, tag_uid: str, @@ -1068,16 +2098,36 @@ class TagsNamespace(APINamespace): ] = ..., description: Optional[str] = ..., title: Optional[str], - ) -> Tag: ... - def delete(self, tag_uid: str, /) -> None: ... - def get_tags_by_type(self) -> Dict[str, List[Tag]]: ... - def get_tag_count_by_type(self) -> Dict[str, int]: ... + ) -> Tag: + """ + Updates the title, colour and/or description of the given tag UID + """ + ... + def delete(self, tag_uid: str, /) -> None: + """ + Deletes a given tag + """ + ... + def get_tags_by_type(self) -> Dict[str, List[Tag]]: + """ + Gets all tags as a dictionary, where the types are the keys + """ + ... + def get_tag_count_by_type(self) -> Dict[str, int]: + """ + Gets a count of all tags by type + """ + ... class NotificationsNamespace(APINamespace): """ Methods available in api.notifications, e.g., api.notifications.deactivate_notification(...) """ - def deactivate_notification(self, notification_id: str, /) -> Notification: ... + def deactivate_notification(self, notification_id: str, /) -> Notification: + """ + Deactivates a notification + """ + ... class BlueprintsNamespace(APINamespace): """ @@ -1177,6 +2227,7 @@ class APIClient: cache: CacheNamespace users: UsersNamespace resources: ResourcesNamespace + assets: AssetsNamespace jobs: JobsNamespace workspaces: WorkspacesNamespace sessions: SessionsNamespace @@ -1217,6 +2268,11 @@ class APIClient: ... def keycloak_login(self, *, keycloak_access_token: str) -> Token: ... def verify_app_session(self, body: AppSession) -> str: ... + def job_register(self) -> JobRegister: + """ + Get a specification of available job types and their schemas. + """ + ... def start_and_migrate(self, *, license_id: str) -> Any: """ Start up CryoSPARC for the first time and perform database migrations diff --git a/cryosparc/command.py b/cryosparc/command.py deleted file mode 100644 index 7d25a6e8..00000000 --- a/cryosparc/command.py +++ /dev/null @@ -1,271 +0,0 @@ -""" -Provides classes and functions for communicating with CryoSPARC's command -servers. Generally should not be used directly. -""" - -import json -import os -import socket -import time -import uuid -from contextlib import contextmanager -from typing import Optional, Type -from urllib.error import HTTPError, URLError -from urllib.parse import urlencode -from urllib.request import Request, urlopen -from warnings import warn - -from .errors import CommandError - -MAX_ATTEMPTS = int(os.getenv("CRYOSPARC_COMMAND_RETRIES", 3)) -RETRY_INTERVAL = int(os.getenv("CRYOSPARC_COMMAND_RETRY_SECONDS", 30)) - - -class CommandClient: - """ - Class for communicating with CryoSPARC's ``command_core``, - ``command_vis`` and ``command_rtp`` HTTP services. - - Upon initialization, gets a list of available JSONRPC_ endpoints and creates - corresponding instance methods for each one. Reference of available methods - for the ``command_core`` service (a.k.a. "cli") is available in the - `CryoSPARC Guide`_. - - Args: - service (str, optional): Label for CryoSPARC Command service that this - instance connects to and communicates with, e.g., ``command_core``, - ``command_vis`` or ``command_rtp`` - host (str, optional): Domain name or IP address of CryoSPARC master. - Defaults to "localhost". - port (int, optional): Command server base port. Defaults to 39002. - url (str, optional): Base URL path prefix for all requests (e.g., "/v1"). - Defaults to "". - timeout (int, optional): How long to wait for a request to complete - before timing out, in seconds. Defaults to 300. - headers (dict, optional): Default HTTP headers to send with every - request. Defaults to {}. - cls (Type[JSONEncoder], optional): Class to handle JSON encoding of - special Python objects, such as numpy arrays. Defaults to None. - - Attributes: - - service (str): label of CryoSPARC Command service this instance connects to - and communicates with - - Examples: - - Connect to ``command_core`` - - >>> from cryosparc.command import CommandClient - >>> cli = CommandClient( - ... host="csmaster", - ... port=39002, - ... headers={"License-ID": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"} - ... ) - - Queue a job - - >>> cli.enqueue_job(project_uid="P3", job_uid="J42", lane="csworker") - "launched" - - .. _JSONRPC: - https://www.jsonrpc.org - - .. _CryoSPARC Guide: - https://guide.cryosparc.com/setup-configuration-and-management/management-and-monitoring/cli - - """ - - Error = CommandError - service: str - - def __init__( - self, - service: str = "command", - host: str = "localhost", - port: int = 39002, - url: str = "", - timeout: int = 300, - headers: dict = {}, - cls: Optional[Type[json.JSONEncoder]] = None, - ): - self.service = service - self._url = f"http://{host}:{port}{url}" - self._cls = cls - self._timeout = timeout - self._headers = headers - self._reload() # attempt connection immediately to gather methods - - def _get_callable(self, key): - def func(*args, **kwargs): - params = kwargs if len(kwargs) else args - data = {"jsonrpc": "2.0", "method": key, "params": params, "id": str(uuid.uuid4())} - res = None - try: - with make_json_request(self, "/api", data=data, _stacklevel=4) as request: - res = json.loads(request.read()) - except CommandError as err: - raise CommandError( - f'Encounted error from JSONRPC function "{key}" with params {params}', - url=self._url, - code=err.code, - data=err.data, - ) from err - - if not res: - raise CommandError( - f'JSON response not received from JSONRPC function "{key}" with params {params}', - url=self._url, - ) - elif "error" in res: - error = res["error"] - raise CommandError( - f'Encountered {error.get("name", "Error")} from JSONRPC function "{key}" with params {params}:\n' - f"{format_server_error(error)}", - url=self._url, - code=error.get("code"), - data=error.get("data"), - ) - else: - return res["result"] # OK - - return func - - def _reload(self): - system = self._get_callable("system.describe")() - self._endpoints = [p["name"] for p in system["procs"]] - for key in self._endpoints: - setattr(self, key, self._get_callable(key)) - - def __call__(self): - self._reload() - - -@contextmanager -def make_request( - client: CommandClient, - method: str = "POST", - url: str = "", - *, - query: dict = {}, - data=None, - headers: dict = {}, - _stacklevel=2, # controls warning line number -): - """ - Create a raw HTTP request/response context with the given command client. - - Args: - client (CommandClient): command client instance - method (str, optional): HTTP method. Defaults to "POST". - url (str, optional): URL to append to the client's initialized URL. Defaults to "". - query (dict, optional): Query string parameters. Defaults to {}. - data (any, optional): Request body data. Usually in binary. Defaults to None. - headers (dict, optional): HTTP headers. Defaults to {}. - - Raises: - CommandError: General error such as timeout, URL or HTTP - - Yields: - http.client.HTTPResponse: Use with a context manager to get HTTP response - - Example: - - >>> from cryosparc.command import CommandClient, make_request - >>> cli = CommandClient() - >>> with make_request(cli, url="/download_file", query={'path': '/file.txt'}) as response: - ... data = response.read() - - """ - url = f"{client._url}{url}{'?' + urlencode(query) if query else ''}" - headers = {"Originator": "client", **client._headers, **headers} - attempt = 1 - error_reason = "" - code = 500 - resdata = None - while attempt < MAX_ATTEMPTS: - request = Request(url, data=data, headers=headers, method=method) - response = None - try: - with urlopen(request, timeout=client._timeout) as response: - yield response - return - except HTTPError as error: # command server reported an error - code = error.code - error_reason = ( - f"HTTP Error {error.code} {error.reason}; " - f"please check cryosparcm log {client.service} for additional information." - ) - if error.readable(): - resdata = error.read() - error_reason += f"\nResponse from server: {resdata}" - if resdata and error.headers.get_content_type() == "application/json": - resdata = json.loads(resdata) - - warn(f"*** {type(client).__name__}: ({url}) {error_reason}", stacklevel=_stacklevel) - break - except URLError as error: # command server may be down - error_reason = f"URL Error {error.reason}" - warn( - f"*** {type(client).__name__}: ({url}) {error_reason}, attempt {attempt} of {MAX_ATTEMPTS}. " - f"Retrying in {RETRY_INTERVAL} seconds", - stacklevel=_stacklevel, - ) - time.sleep(RETRY_INTERVAL) - attempt += 1 - except (TimeoutError, socket.timeout): # slow network connection or request - error_reason = "Timeout Error" - warn( - f"*** {type(client).__name__}: command ({url}) " - f"did not reply within timeout of {client._timeout} seconds, " - f"attempt {attempt} of {MAX_ATTEMPTS}", - stacklevel=_stacklevel, - ) - attempt += 1 - - raise CommandError(error_reason, url=url, code=code, data=resdata) - - -def make_json_request(client: CommandClient, url="", *, query={}, data=None, headers={}, _stacklevel=3): - """ - Similar to ``make_request``, except sends request body data JSON and - receives arbitrary response. - - Args: - client (CommandClient): command client instance - url (str, optional): URL path to append to the client's initialized root - URL. Defaults to "". - query (dict, optional): Query string parameters. Defaults to {}. - data (any, optional): JSON-encodable request body. Defaults to None. - headers (dict, optional): HTTP headers. Defaults to {}. - - Yields: - http.client.HTTPResponse: Use with a context manager to get HTTP response - - Raises: - CommandError: General error such as timeout, URL or HTTP - - Example: - - >>> from cryosparc.command import CommandClient, make_json_request - >>> cli = CommandClient() - >>> with make_json_request(cli, url="/download_file", data={'path': '/file.txt'}) as response: - ... data = response.read() - - """ - headers = {"Content-Type": "application/json", **headers} - data = json.dumps(data, cls=client._cls).encode() - return make_request(client, url=url, query=query, data=data, headers=headers, _stacklevel=_stacklevel) - - -def format_server_error(error): - """ - :meta private: - """ - err = error["message"] if "message" in error else str(error) - if "data" in error and error["data"]: - if isinstance(error["data"], dict) and "traceback" in error["data"]: - err += "\n" + error["data"]["traceback"] - else: - err += "\n" + str(error["data"]) - return err diff --git a/cryosparc/constants.py b/cryosparc/constants.py new file mode 100644 index 00000000..5c5c5e30 --- /dev/null +++ b/cryosparc/constants.py @@ -0,0 +1,9 @@ +ONE_MIB = 2**20 +""" +Bytes in 1 mebibyte +""" + +EIGHT_MIB = 2**23 +""" +Bytes in 8 mebibytes +""" diff --git a/cryosparc/controller.py b/cryosparc/controller.py new file mode 100644 index 00000000..9d039e09 --- /dev/null +++ b/cryosparc/controller.py @@ -0,0 +1,82 @@ +""" +Core base classes and utilities for other cryosparc-tools modules. +""" + +# NOTE: This file should only include utilities required only by cryosparc-tools +# CryoSPARC should not depend on anything in this file. +import warnings +from abc import ABC, abstractmethod +from typing import Any, Dict, Generic, Optional, TypeVar, Union + +from pydantic import BaseModel + +from .models.job_spec import InputSlot, OutputSlot +from .spec import SlotSpec + +# API model +M = TypeVar("M", bound=BaseModel) + + +class Controller(ABC, Generic[M]): + """ + Abstract base class for Project, Workspace, Job classes and any other types + that have underlying Mongo database documents. + + Generic type argument D is a typed dictionary definition for a Mongo + document. + + :meta private: + """ + + _model: Optional[M] = None + + @property + def model(self) -> M: + """ + Representation of entitity data. Contents may change in CryoSPARC + over time, use use :py:meth:`refresh` to update. + """ + if not self._model: + self.refresh() + assert self._model, "Could not refresh database document" + return self._model + + @model.setter + def model(self, model: M): + self._model = model + + @model.deleter + def model(self): + self._model = None + + @property + def doc(self) -> Dict[str, Any]: + warnings.warn(".doc attribute is deprecated. Use .model attribute instead.", DeprecationWarning, stacklevel=2) + return self.model.model_dump(by_alias=True) + + @abstractmethod + def refresh(self): + # Must be implemented in subclasses + return self + + +def as_input_slot(spec: Union[SlotSpec, InputSlot]) -> InputSlot: + if isinstance(spec, str): + spec, required = (spec[1:], False) if spec[0] == "?" else (spec, True) + return InputSlot(name=spec, dtype=spec, required=required) + elif isinstance(spec, dict) and "dtype" in spec: + dtype = spec["dtype"] + name = spec.get("name") or spec.get("prefix") or dtype + required = spec.get("required", True) + return InputSlot(name=name, dtype=dtype, required=required) + return spec + + +def as_output_slot(spec: Union[SlotSpec, OutputSlot]) -> OutputSlot: + if isinstance(spec, str): + return OutputSlot(name=spec, dtype=spec) + elif isinstance(spec, dict) and "dtype" in spec: + dtype = spec["dtype"] + name = spec.get("name") or spec.get("prefix") or dtype + return OutputSlot(name=name, dtype=dtype) + return spec diff --git a/cryosparc/dataset.py b/cryosparc/dataset.py index c32d10fa..49cce16f 100644 --- a/cryosparc/dataset.py +++ b/cryosparc/dataset.py @@ -33,8 +33,8 @@ Callable, Collection, Dict, - Generator, Generic, + Iterator, List, Literal, Mapping, @@ -69,7 +69,7 @@ ) from .errors import DatasetLoadError from .row import R, Row, Spool -from .stream import AsyncBinaryIO, Streamable +from .stream import AsyncReadable, Streamable from .util import bopen, default_rng, random_integers, u32bytesle, u32intle if TYPE_CHECKING: @@ -108,7 +108,9 @@ class Dataset(Streamable, MutableMapping[str, Column], Generic[R]): """ Accessor class for working with CryoSPARC .cs files. - A dataset may be initialized with ``Dataset(data)`` where ``data`` is + Load a dataset from disk with ``Dataset.load("/path/to/dataset.cs")``. + + Initialize a new dataset with ``Dataset(data)`` where ``data`` is one of the following: * A size of items to allocate (e.g., 42) @@ -570,6 +572,7 @@ def load( prefixes: Optional[Sequence[str]] = None, fields: Optional[Sequence[str]] = None, cstrs: bool = False, + media_type: Optional[str] = None, # for interface, otherwise unused ): """ Read a dataset from path or file handle. @@ -720,7 +723,13 @@ def _load_stream( return dset @classmethod - async def from_async_stream(cls, stream: AsyncBinaryIO): + async def from_async_stream(cls, stream: AsyncReadable, *, media_type: Optional[str] = None): + prefix = await stream.read(6) + if prefix != FORMAT_MAGIC_PREFIXES[CSDAT_FORMAT]: + raise DatasetLoadError( + f"Incorrect async dataset stream format {prefix}. " + "Only CSDAT-formatted datasets may be loaded as async streams" + ) headersize = u32intle(await stream.read(4)) header = decode_dataset_header(await stream.read(headersize)) @@ -751,7 +760,7 @@ async def from_async_stream(cls, stream: AsyncBinaryIO): dset.to_pystrs() return dset - def save(self, file: Union[str, PurePath, IO[bytes]], format: int = DEFAULT_FORMAT): + def save(self, file: Union[str, PurePath, IO[bytes]], *, format: int = DEFAULT_FORMAT): """ Save a dataset to the given path or I/O buffer. @@ -779,7 +788,7 @@ def save(self, file: Union[str, PurePath, IO[bytes]], format: int = DEFAULT_FORM else: raise TypeError(f"Invalid dataset save format for {file}: {format}") - def stream(self, compression: Literal["lz4", None] = None) -> Generator[bytes, None, None]: + def stream(self, compression: Literal["lz4", None] = None) -> Iterator[bytes]: """ Generate a binary representation for this dataset. Results may be written to a file or buffer to be sent over the network. diff --git a/cryosparc/dtype.py b/cryosparc/dtype.py index 88818f5f..5bf5cce2 100644 --- a/cryosparc/dtype.py +++ b/cryosparc/dtype.py @@ -3,41 +3,17 @@ """ import json -from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Sequence, Tuple, Type, TypedDict, Union +from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Sequence, Type, TypedDict, Union import numpy as n from .core import Data, DsetType +from .errors import DatasetLoadError +from .spec import DType, Field if TYPE_CHECKING: from numpy.typing import DTypeLike, NDArray -Shape = Tuple[int, ...] -"""A numpy shape tuple from ndarray.shape""" - -DType = Union[str, Tuple[str, Shape]] -""" - - Can just be a single string such as "f4", "3u4" or "O". - A datatype description of a ndarray entry. - - Can also be the a tuple with a string datatype name and its shape. For - example, the following dtypes are equivalent. - - - "3u4" - - " DatasetHeader: compressed_fields=compressed_fields, ) except Exception as e: - raise ValueError(f"Incorrect dataset field format: {data.decode() if isinstance(data, bytes) else data}") from e + raise DatasetLoadError( + f"Incorrect dataset field format: {data.decode() if isinstance(data, bytes) else data}" + ) from e diff --git a/cryosparc/errors.py b/cryosparc/errors.py index 0a7b377f..e024bf8d 100644 --- a/cryosparc/errors.py +++ b/cryosparc/errors.py @@ -3,9 +3,7 @@ """ import json -from typing import TYPE_CHECKING, Any, List, TypedDict - -from .spec import Datafield, Datatype, SlotSpec +from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from httpx import Response @@ -48,71 +46,6 @@ def __str__(self): return s -class CommandError(Exception): - """ - Raised by failed request to a CryoSPARC command server. - """ - - code: int - data: Any - - def __init__(self, reason: str, *args: object, url: str = "", code: int = 500, data: Any = None) -> None: - msg = f"*** ({url}, code {code}) {reason}" - super().__init__(msg, *args) - self.code = code - self.data = data - - -class SlotsValidation(TypedDict): - """ - Data from validation error when specifying external result input/output slots. - - :meta private: - """ - - type: Datatype - valid: List[SlotSpec] - invalid: List[Datafield] - valid_dtypes: List[str] - - -class InvalidSlotsError(ValueError): - """ - Raised by functions that accept slots arguments when CryoSPARC reports that - given slots are not valid. - """ - - def __init__(self, caller: str, validation: SlotsValidation): - type = validation["type"] - valid_slots = validation["valid"] - invalid_slots = validation["invalid"] - valid_dtypes = validation["valid_dtypes"] - msg = "\n".join( - [ - f"Unknown {type} slot dtype(s): {', '.join(s['dtype'] for s in invalid_slots)}. " - "Only the following slot dtypes are valid:", - "", - ] - + [f" - {t}" for t in valid_dtypes] - + [ - "", - "If adding a dynamic result such as alignments_class_#, specify a " - "slots=... argument with a full data field specification:", - "", - f" {caller}(... , slots=[", - " ...", - ] - + [f" {repr(s)}," for s in valid_slots] - + [ - " {'dtype': '', 'prefix': '%s', 'required': True}," % s["dtype"] - for s in invalid_slots - ] - + [" ...", " ])"] - ) - - return super().__init__(msg) - - class ExternalJobError(Exception): """ Raised during external job lifecycle failures diff --git a/cryosparc/job.py b/cryosparc/job.py index 3514033d..9323159f 100644 --- a/cryosparc/job.py +++ b/cryosparc/job.py @@ -2,37 +2,48 @@ Defines the Job and External job classes for accessing CryoSPARC jobs. """ -import json -import math import re -import urllib.parse +import traceback +import warnings from contextlib import contextmanager from io import BytesIO from pathlib import PurePath, PurePosixPath from time import sleep, time -from typing import IO, TYPE_CHECKING, Any, Dict, Iterable, List, Literal, Optional, Pattern, Union, overload - -import numpy as n +from typing import ( + IO, + TYPE_CHECKING, + Any, + Dict, + Iterable, + List, + Literal, + Optional, + Pattern, + Sequence, + Tuple, + Union, + overload, +) -from .command import CommandError, make_json_request, make_request +from .controller import Controller, as_input_slot, as_output_slot from .dataset import DEFAULT_FORMAT, Dataset -from .errors import ExternalJobError, InvalidSlotsError +from .errors import ExternalJobError +from .models.asset import GridFSAsset, GridFSFile +from .models.job import Job, JobStatus +from .models.job_spec import InputSpec, OutputSpec from .spec import ( ASSET_CONTENT_TYPES, IMAGE_CONTENT_TYPES, TEXT_CONTENT_TYPES, - AssetDetails, AssetFormat, Datatype, - EventLogAsset, ImageFormat, - JobDocument, - JobStatus, - MongoController, + LoadableSlots, SlotSpec, TextFormat, ) -from .util import bopen, first, print_table +from .stream import Stream +from .util import first, print_table if TYPE_CHECKING: from numpy.typing import ArrayLike, NDArray @@ -46,17 +57,20 @@ """ -class Job(MongoController[JobDocument]): +class JobController(Controller[Job]): """ Accessor class to a job in CryoSPARC with ability to load inputs and - outputs, add to job log, download job files. Should be instantiated - through `CryoSPARC.find_job`_ or `Project.find_job`_. + outputs, add to job log, download job files. Should be created with + :py:meth:`cs.find_job() ` or + :py:meth:`project.find_job() `. + + Arguments: + job (tuple[str, str] | Job): either _(Project UID, Job UID)_ tuple or + Job model, e.g. ``("P3", "J42")`` Attributes: - uid (str): Job unique ID, e.g., "J42" - project_uid (str): Project unique ID, e.g., "P3" - doc (JobDocument): All job data from the CryoSPARC database. Database - contents may change over time, use the `refresh`_ method to update. + model (Workspace): All job data from the CryoSPARC database. + Contents may change over time, use :py:method:`refresh` to update. Examples: @@ -82,44 +96,60 @@ class Job(MongoController[JobDocument]): >>> job.queue() >>> job.status "queued" + """ - .. _CryoSPARC.find_job: - tools.html#cryosparc.tools.CryoSPARC.find_job - - .. _Project.find_job: - project.html#cryosparc.project.Project.find_job - - .. _refresh: - #cryosparc.job.Job.refresh + uid: str + """ + Job unique ID, e.g., "J42" + """ + project_uid: str + """ + Project unique ID, e.g., "P3" """ - def __init__(self, cs: "CryoSPARC", project_uid: str, uid: str) -> None: + def __init__(self, cs: "CryoSPARC", job: Union[Tuple[str, str], Job]) -> None: self.cs = cs - self.project_uid = project_uid - self.uid = uid + if isinstance(job, tuple): + self.project_uid, self.uid = job + self.refresh() + else: + self.project_uid = job.project_uid + self.uid = job.uid + self.model = job @property def type(self) -> str: """ Job type key """ - return self.doc["job_type"] + return self.model.spec.type @property def status(self) -> JobStatus: """ JobStatus: scheduling status. """ - return self.doc["status"] + return self.model.status + + @property + def full_spec(self): + """ + The full specification for job inputs, outputs and parameters, as + defined in the job register. + """ + spec = first(spec for spec in self.cs.job_register.specs if spec.type == self.type) + if not spec: + raise RuntimeError(f"Could not find job specification for type {type}") + return spec def refresh(self): """ Reload this job from the CryoSPARC database. Returns: - Job: self + JobController: self """ - self._doc = self.cs.cli.get_job(self.project_uid, self.uid) # type: ignore + self.model = self.cs.api.jobs.find_one(self.project_uid, self.uid) return self def dir(self) -> PurePosixPath: @@ -129,7 +159,7 @@ def dir(self) -> PurePosixPath: Returns: Path: job directory Pure Path instance """ - return PurePosixPath(self.cs.cli.get_job_dir_abs(self.project_uid, self.uid)) # type: ignore + return PurePosixPath(self.cs.api.jobs.get_directory(self.project_uid, self.uid)) def queue( self, @@ -140,13 +170,13 @@ def queue( ): """ Queue a job to a target lane. Available lanes may be queried with - `CryoSPARC.get_lanes`_. + `:py:meth:`cs.get_lanes() `. Optionally specify a hostname for a node or cluster in the given lane. Optionally specify specific GPUs indexes to use for computation. Available hostnames for a given lane may be queried with - `CryoSPARC.get_targets`_. + `:py:meth:`cs.get_targets() `. Args: lane (str, optional): Configuried compute lane to queue to. Leave @@ -172,36 +202,16 @@ def queue( >>> job.queue("worker") >>> job.status "queued" - - .. _CryoSPARC.get_lanes: - tools.html#cryosparc.tools.CryoSPARC.get_lanes - .. _CryoSPARC.get_targets: - tools.html#cryosparc.tools.CryoSPARC.get_targets """ if cluster_vars: - self.cs.cli.set_cluster_job_custom_vars( # type: ignore - project_uid=self.project_uid, - job_uid=self.uid, - cluster_job_custom_vars=cluster_vars, - ) - self.cs.cli.enqueue_job( # type: ignore - project_uid=self.project_uid, - job_uid=self.uid, - lane=lane, - user_id=self.cs.user_id, - hostname=hostname, - gpus=gpus if gpus else False, - ) - self.refresh() + self.cs.api.jobs.set_cluster_custom_vars(self.project_uid, self.uid, cluster_vars) + self.model = self.cs.api.jobs.enqueue(self.project_uid, self.uid, lane=lane, hostname=hostname, gpus=gpus) def kill(self): """ Kill this job. """ - self.cs.cli.kill_job( # type: ignore - project_uid=self.project_uid, job_uid=self.uid, killed_by_user_id=self.cs.user_id - ) - self.refresh() + self.model = self.cs.api.jobs.kill(self.project_uid, self.uid) def wait_for_status(self, status: Union[JobStatus, Iterable[JobStatus]], *, timeout: Optional[int] = None) -> str: """ @@ -231,12 +241,10 @@ def wait_for_status(self, status: Union[JobStatus, Iterable[JobStatus]], *, time """ statuses = {status} if isinstance(status, str) else set(status) tic = time() - self.refresh() - while self.status not in statuses: + while self.refresh().status not in statuses: if timeout is not None and time() - tic > timeout: break sleep(5) - self.refresh() return self.status def wait_for_done(self, *, error_on_incomplete: bool = False, timeout: Optional[int] = None) -> str: @@ -271,8 +279,8 @@ def interact(self, action: str, body: Any = {}, *, timeout: int = 10, refresh: b refresh (bool, optional): If True, refresh the job document after posting. Defaults to False. """ - result: Any = self.cs.cli.interactive_post( # type: ignore - project_uid=self.project_uid, job_uid=self.uid, endpoint=action, body=body, timeout=timeout + result: Any = self.cs.api.jobs.interactive_post( + self.project_uid, self.uid, body=body, endpoint=action, timeout=timeout ) if refresh: self.refresh() @@ -282,10 +290,9 @@ def clear(self): """ Clear this job and reset to building status. """ - self.cs.cli.clear_job(self.project_uid, self.uid) # type: ignore - self.refresh() + self.model = self.cs.api.jobs.clear(self.project_uid, self.uid) - def set_param(self, name: str, value: Any, *, refresh: bool = True) -> bool: + def set_param(self, name: str, value: Any, **kwargs) -> bool: """ Set the given param name on the current job to the given value. Only works if the job is in "building" status. @@ -293,8 +300,6 @@ def set_param(self, name: str, value: Any, *, refresh: bool = True) -> bool: Args: name (str): Param name, as defined in the job document's ``params_base``. value (any): Target parameter value. - refresh (bool, optional): Auto-refresh job document after - connecting. Defaults to True. Returns: bool: False if the job encountered a build error. @@ -308,14 +313,12 @@ def set_param(self, name: str, value: Any, *, refresh: bool = True) -> bool: >>> job.set_param("compute_num_gpus", 4) True """ - result: bool = self.cs.cli.job_set_param( # type: ignore - project_uid=self.project_uid, job_uid=self.uid, param_name=name, param_new_value=value - ) - if refresh: - self.refresh() - return result + if "refresh" in kwargs: + warnings.warn("refresh argument no longer applies", DeprecationWarning, stacklevel=2) + self.model = self.cs.api.jobs.set_param(self.project_uid, self.uid, name, value=value) + return True - def connect(self, target_input: str, source_job_uid: str, source_output: str, *, refresh: bool = True) -> bool: + def connect(self, target_input: str, source_job_uid: str, source_output: str, **kwargs) -> bool: """ Connect the given input for this job to an output with given job UID and name. @@ -326,8 +329,6 @@ def connect(self, target_input: str, source_job_uid: str, source_output: str, *, source_job_uid (str): Job UID to connect from, e.g., "J42" source_output (str): Job output name to connect from , e.g., "particles" - refresh (bool, optional): Auto-refresh job document after - connecting. Defaults to True. Returns: bool: False if the job encountered a build error. @@ -343,17 +344,16 @@ def connect(self, target_input: str, source_job_uid: str, source_output: str, *, >>> job.connect("input_micrographs", "J2", "micrographs") """ - assert source_job_uid != self.uid, f"Cannot connect job {self.uid} to itself" - result: bool = self.cs.cli.job_connect_group( # type: ignore - project_uid=self.project_uid, - source_group=f"{source_job_uid}.{source_output}", - dest_group=f"{self.uid}.{target_input}", + if "refresh" in kwargs: + warnings.warn("refresh argument no longer applies", DeprecationWarning, stacklevel=2) + if source_job_uid == self.uid: + raise ValueError(f"Cannot connect job {self.uid} to itself") + self.model = self.cs.api.jobs.connect( + self.project_uid, self.uid, target_input, source_job_uid=source_job_uid, source_output_name=source_output ) - if refresh: - self.refresh() - return result + return True - def disconnect(self, target_input: str, connection_idx: Optional[int] = None, *, refresh: bool = True): + def disconnect(self, target_input: str, connection_idx: Optional[int] = None, **kwargs): """ Clear the given job input group. @@ -362,39 +362,26 @@ def disconnect(self, target_input: str, connection_idx: Optional[int] = None, *, connection_idx (int, optional): Connection index to clear. Set to 0 to clear the first connection, 1 for the second, etc. If unspecified, clears all connections. Defaults to None. - refresh (bool, optional): Auto-refresh job document after - connecting. Defaults to True. - """ - if connection_idx is None: - # Clear all input connections - input_group = first(group for group in self.doc["input_slot_groups"] if group["name"] == target_input) - if not input_group: - raise ValueError(f"Unknown input group {target_input} for job {self.project_uid}-{self.uid}") - for _ in input_group["connections"]: - self.cs.cli.job_connected_group_clear( # type: ignore - project_uid=self.project_uid, - dest_group=f"{self.uid}.{target_input}", - connect_idx=0, - ) - else: - self.cs.cli.job_connected_group_clear( # type: ignore - project_uid=self.project_uid, - dest_group=f"{self.uid}.{target_input}", - connect_idx=connection_idx, - ) + """ + if "refresh" in kwargs: + warnings.warn("refresh argument no longer applies", DeprecationWarning, stacklevel=2) - if refresh: - self.refresh() + if connection_idx is None: # Clear all input connections + self.model = self.cs.api.jobs.disconnect_all(self.project_uid, self.uid, target_input) + else: + self.model = self.cs.api.jobs.disconnect(self.project_uid, self.uid, target_input, connection_idx) - def load_input(self, name: str, slots: Iterable[str] = []): + def load_input(self, name: str, slots: LoadableSlots = "all"): """ Load the dataset connected to the job's input with the given name. Args: name (str): Input to load - fields (list[str], optional): List of specific slots to load, such - as ``movie_blob`` or ``locations``, or all slots if not - specified. Defaults to []. + slots (Literal["default", "passthrough", "all"] | list[str], optional): + List of specific slots to load, such as ``movie_blob`` or + ``locations``, or all slots if not specified (including + passthrough). May also specify as keyword. Defaults to + "all". Raises: TypeError: If the job doesn't have the given input or the dataset @@ -403,27 +390,19 @@ def load_input(self, name: str, slots: Iterable[str] = []): Returns: Dataset: Loaded dataset """ - job = self.doc - group = first(s for s in job["input_slot_groups"] if s["name"] == name) - if not group: - raise TypeError(f"Job {self.project_uid}-{self.uid} does not have an input {name}") - - data = {"project_uid": self.project_uid, "job_uid": self.uid, "input_name": name, "slots": list(slots)} - with make_json_request(self.cs.vis, "/load_job_input", data=data) as response: - mime = response.headers.get("Content-Type") - if mime != "application/x-cryosparc-dataset": - raise TypeError(f"Unable to load dataset for job {self.project_uid}-{self.uid} input {name}") - return Dataset.load(response) + return self.cs.api.jobs.load_input(self.project_uid, self.uid, name, slots=slots) - def load_output(self, name: str, slots: Iterable[str] = [], version: Union[int, Literal["F"]] = "F"): + def load_output(self, name: str, slots: LoadableSlots = "all", version: Union[int, Literal["F"]] = "F"): """ Load the dataset for the job's output with the given name. Args: name (str): Output to load - slots (list[str], optional): List of specific slots to load, - such as ``movie_blob`` or ``locations``, or all slots if - not specified (including passthrough). Defaults to []. + slots (Literal["default", "passthrough", "all"] | list[str], optional): + List of specific slots to load, such as ``movie_blob`` or + ``locations``, or all slots if not specified (including + passthrough). May also specify as keyword. Defaults to + "all". version (int | Literal["F"], optional): Specific output version to load. Use this to load the output at different stages of processing. Leave unspecified to load final verion. Defaults to @@ -435,38 +414,7 @@ def load_output(self, name: str, slots: Iterable[str] = [], version: Union[int, Returns: Dataset: Loaded dataset """ - job = self.doc - slots = set(slots) - version = -1 if version == "F" else version - results = [ - result - for result in job["output_results"] - if result["group_name"] == name and (not slots or result["name"] in slots) - ] - if not slots: - # Requested all slots, but auto-filter results with no provided meta - # files - results = [result for result in results if result["metafiles"]] - if not results: - raise TypeError(f"Job {self.project_uid}-{self.uid} does not have any results for output {name}") - - metafiles = [] - for r in results: - if r["metafiles"]: - metafile = r["metafiles"][0 if r["passthrough"] else version] - if metafile not in metafiles: - metafiles.append(metafile) - else: - raise ValueError( - ( - f"Cannot load output {name} slot {r['name']} because " - "output does not have an associated dataset file. " - "Please exclude this output from the requested slots." - ) - ) - - datasets = [self.cs.download_dataset(self.project_uid, f) for f in metafiles] - return Dataset.innerjoin(*datasets) + return self.cs.api.jobs.load_output(self.project_uid, self.uid, name, slots=slots, version=version) def log(self, text: str, level: Literal["text", "warning", "error"] = "text"): """ @@ -480,9 +428,8 @@ def log(self, text: str, level: Literal["text", "warning", "error"] = "text"): Returns: str: Created log event ID """ - return self.cs.cli.job_send_streamlog( # type: ignore - project_uid=self.project_uid, job_uid=self.uid, message=text, error=level != "text" - ) + event = self.cs.api.jobs.add_event_log(self.project_uid, self.uid, text, type=level) + return event.id def log_checkpoint(self, meta: dict = {}): """ @@ -494,9 +441,8 @@ def log_checkpoint(self, meta: dict = {}): Returns: str: Created checkpoint event ID """ - return self.cs.cli.job_checkpoint_streamlog( # type: ignore - project_uid=self.project_uid, job_uid=self.uid, meta=meta - ) + event = self.cs.api.jobs.add_checkpoint(self.project_uid, self.uid, meta) + return event.id def log_plot( self, @@ -563,10 +509,8 @@ def log_plot( raw_data_format=raw_data_format, savefig_kw=savefig_kw, ) - - return self.cs.cli.job_send_streamlog( # type: ignore - project_uid=self.project_uid, job_uid=self.uid, message=text, flags=flags, imgfiles=imgfiles - ) + event = self.cs.api.jobs.add_image_log(self.project_uid, self.uid, imgfiles, text=text, flags=flags) + return event.id def list_files(self, prefix: Union[str, PurePosixPath] = "", recursive: bool = False) -> List[str]: """ @@ -656,7 +600,7 @@ def download_mrc(self, path: Union[str, PurePosixPath]): path = PurePosixPath(self.uid) / path return self.cs.download_mrc(self.project_uid, path) - def list_assets(self) -> List[AssetDetails]: + def list_assets(self) -> List[GridFSFile]: """ Get a list of files available in the database for this job. Returns a list with details about the assets. Each entry is a dict with a ``_id`` @@ -664,9 +608,9 @@ def list_assets(self) -> List[AssetDetails]: method. Returns: - list[AssetDetails]: Asset details + list[GridFSFile]: Asset details """ - return self.cs.vis.list_job_files(project_uid=self.project_uid, job_uid=self.uid) # type: ignore + return self.cs.list_assets(self.project_uid, self.uid) def download_asset(self, fileid: str, target: Union[str, PurePath, IO[bytes]]): """ @@ -675,11 +619,11 @@ def download_asset(self, fileid: str, target: Union[str, PurePath, IO[bytes]]): Args: fileid (str): GridFS file object ID - target (str | Path | IO): Local file path, directory path or - writeable file handle to write response data. + target (str | Path | IO): Local file path or writeable file handle + to write response data. Returns: - Path | IO: resulting target path or file handle. + str | Path | IO: resulting target path or file handle. """ return self.cs.download_asset(fileid, target) @@ -711,7 +655,7 @@ def upload_asset( file: Union[str, PurePath, IO[bytes]], filename: Optional[str] = None, format: Optional[AssetFormat] = None, - ) -> EventLogAsset: + ) -> GridFSAsset: """ Upload an image or text file to the current job. Specify either an image (PNG, JPG, GIF, PDF, SVG), text file (TXT, CSV, JSON, XML) or a binary @@ -741,33 +685,20 @@ def upload_asset( Returns: EventLogAsset: Dictionary including details about uploaded asset. """ + ext = None if format: - assert format in ASSET_CONTENT_TYPES, f"Invalid asset format {format}" + ext = format elif filename: - ext = filename.split(".")[-1] - assert ext in ASSET_CONTENT_TYPES, f"Invalid asset format {ext}" - format = ext + ext = filename.split(".")[-1].lower() elif isinstance(file, (str, PurePath)): file = PurePath(file) filename = file.name - ext = filename.split(".")[-1] - assert ext in ASSET_CONTENT_TYPES, f"Invalid asset format {ext}" - format = ext + ext = file.suffix[1:].lower() else: - raise ValueError("Must specify filename or format when saving binary asset handle") - - with bopen(file) as f: - url = f"/projects/{self.project_uid}/jobs/{self.uid}/files" - query = {"format": format} - if filename: - query["filename"] = filename - - with make_request(self.cs.vis, url=url, query=query, data=f) as res: - assert res.status >= 200 and res.status < 300, ( - f"Could not upload project {self.project_uid} asset {file}.\n" - f"Response from CryoSPARC: {res.read().decode()}" - ) - return json.loads(res.read()) + raise ValueError("Must specify filename or format when saving binary asset") + if ext not in ASSET_CONTENT_TYPES: + raise ValueError(f"Invalid asset format {ext}") + return self.cs.api.assets.upload(self.project_uid, self.uid, Stream.load(file), filename=filename, format=ext) def upload_plot( self, @@ -778,7 +709,7 @@ def upload_plot( raw_data_file: Union[str, PurePath, IO[bytes], None] = None, raw_data_format: Optional[TextFormat] = None, savefig_kw: dict = dict(bbox_inches="tight", pad_inches=0), - ) -> List[EventLogAsset]: + ) -> List[GridFSAsset]: """ Upload the given figure. Returns a list of the created asset objects. Avoid using directly; use ``log_plot`` instead. See ``log_plot`` @@ -814,7 +745,8 @@ def upload_plot( basename = name or "figure" if hasattr(figure, "savefig"): # matplotlib plot for fmt in formats: - assert fmt in IMAGE_CONTENT_TYPES, f"Invalid figure format {fmt}" + if fmt not in IMAGE_CONTENT_TYPES: + raise ValueError(f"Invalid figure format {fmt}") filename = f"{basename}.{fmt}" data = BytesIO() figure.savefig(data, format=fmt, **savefig_kw) # type: ignore @@ -823,13 +755,15 @@ def upload_plot( elif isinstance(figure, (str, PurePath)): # file path; assume format from filename path = PurePath(figure) basename = path.stem - fmt = str(figure).split(".")[-1] - assert fmt in IMAGE_CONTENT_TYPES, f"Invalid figure format {fmt}" + fmt = path.suffix[1:].lower() + if fmt not in IMAGE_CONTENT_TYPES: + raise ValueError(f"Invalid figure format {fmt}") filename = f"{name or path.stem}.{fmt}" figdata.append((figure, filename, fmt)) else: # Binary IO fmt = first(iter(formats)) - assert fmt in IMAGE_CONTENT_TYPES, f"Invalid or unspecified figure format {fmt}" + if fmt not in IMAGE_CONTENT_TYPES: + raise ValueError(f"Invalid or unspecified figure format {fmt}") filename = f"{basename}.{fmt}" figdata.append((figure, filename, fmt)) @@ -844,17 +778,13 @@ def upload_plot( raw_data_path = PurePath(raw_data_file) raw_data_filename = raw_data_path.name ext = raw_data_format or raw_data_filename.split(".")[-1] - assert ext in TEXT_CONTENT_TYPES, f"Invalid raw data filename {raw_data_file}" + if ext not in TEXT_CONTENT_TYPES: + raise ValueError(f"Invalid raw data filename {raw_data_file}") raw_data_format = ext - assets = [] - for data, filename, fmt in figdata: - asset = self.upload_asset(data, filename=filename, format=fmt) - assets.append(asset) - + assets = [self.upload_asset(data, filename, fmt) for data, filename, fmt in figdata] if raw_data_file: - raw_data_format = raw_data_format or "txt" - asset = self.upload_asset(raw_data_file, filename=raw_data_filename, format=raw_data_format) + asset = self.upload_asset(raw_data_file, raw_data_filename, raw_data_format or "txt") assets.append(asset) return assets @@ -1010,13 +940,13 @@ def subprocess( args = args if isinstance(args, str) else list(map(str, args)) with subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, **kwargs) as proc: - assert proc.stdout, f"Subprocess {args} has not standard output" + assert proc.stdout, f"Subprocess {args} has no standard output" if checkpoint: self.log_checkpoint() - self.log("======= Forwarding subprocess output for the following command =======") + self.log("─────── Forwarding subprocess output for the following command ───────") self.log(str(args)) - self.log("======================================================================") + self.log("──────────────────────────────────────────────────────────────────────") for line in proc.stdout: line = line.rstrip() @@ -1034,7 +964,7 @@ def subprocess( self.log(msg, level="error") raise RuntimeError(msg) - self.log("======================= Subprocess complete. =========================") + self.log("─────────────────────── Subprocess complete. ─────────────────────────") def print_param_spec(self): """ @@ -1058,10 +988,11 @@ def print_param_spec(self): """ headings = ["Param", "Title", "Type", "Default"] rows = [] - for key, details in self.doc["params_base"].items(): - if details["hidden"]: + for key, details in self.full_spec.params: + if details.get("hidden") is True: continue - rows.append([key, details["title"], details["type"], repr(details["value"])]) + type = (details["anyOf"][0] if "anyOf" in details else details).get("type", "Any") + rows.append([key, details["title"], type, repr(details.get("default", None))]) print_table(headings, rows) def print_input_spec(self): @@ -1089,21 +1020,22 @@ def print_input_spec(self): | | | | alignments2D | alignments2D | ✕ | | | | alignments3D | alignments3D | ✕ """ + specs = self.cs.api.jobs.get_input_specs(self.project_uid, self.uid) headings = ["Input", "Title", "Type", "Required?", "Input Slots", "Slot Types", "Slot Required?"] rows = [] - for group in self.doc["input_slot_groups"]: - name, title, type = group["name"], group["title"], group["type"] - required = f"✓ ({group['count_min']}" if group["count_min"] else "✕ (0" - if group["count_max"] in {None, 0, 0.0, math.inf, n.inf}: + for key, spec in specs.root.items(): + name, title, type = key, spec.title, spec.type + required = f"✓ ({spec.count_min}" if spec.count_min else "✕ (0" + if spec.count_max in (0, "inf"): required += "+)" # unlimited connections - elif group["count_min"] == group["count_max"]: + elif spec.count_min == spec.count_max: required += ")" else: - required += f"-{group['count_max']})" - for slot in group["slots"]: - slot_required = "✕" if slot["optional"] else "✓" - rows.append([name, title, type, required, slot["name"], slot["type"].split(".").pop(), slot_required]) - name, title, type, required = ("",) * 4 + required += f"-{spec.count_max})" + for slot in spec.slots: + slot = as_input_slot(slot) + rows.append([name, title, type, required, slot.name, slot.dtype, "✓" if slot.required else "✕"]) + name, title, type, required = ("",) * 4 # only show group info on first iter print_table(headings, rows) def print_output_spec(self): @@ -1129,17 +1061,19 @@ def print_output_spec(self): particles | Particles | particle | blob | blob | | | ctf | ctf """ + specs = self.cs.api.jobs.get_output_specs(self.project_uid, self.uid) headings = ["Output", "Title", "Type", "Result Slots", "Result Types"] rows = [] - for group in self.doc["output_result_groups"]: - name, title, type = group["name"], group["title"], group["type"] - for result in group["contains"]: - rows.append([name, title, type, result["name"], result["type"].split(".").pop()]) + for key, spec in specs.root.items(): + name, title, type = key, spec.title, spec.type + for slot in spec.slots: + slot = as_output_slot(slot) + rows.append([name, title, type, slot.name, slot.dtype]) name, title, type = "", "", "" # only these print once per group print_table(headings, rows) -class ExternalJob(Job): +class ExternalJobController(JobController): """ Mutable custom output job with customizeble input slots and output results. Use External jobs to save data save cryo-EM data generated by a software @@ -1149,14 +1083,10 @@ class ExternalJob(Job): an input. Its outputs must be created manually and may be configured to passthrough inherited input fields, just as with regular CryoSPARC jobs. - Create a new External Job with `Project.create_external_job`_. ExternalJob - is a subclass of `Job`_ and inherits all its methods. - - Attributes: - uid (str): Job unique ID, e.g., "J42" - project_uid (str): Project unique ID, e.g., "P3" - doc (JobDocument): All job data from the CryoSPARC database. Database - contents may change over time, use the `refresh`_ method to update. + Create a new External Job with :py:meth:`project.create_external_job() `. + or :py:meth:`workspace.create_external_job() `. + ``ExternalJobController`` is a subclass of :py:class:`JobController` + and inherits all its methods and attributes. Examples: @@ -1175,25 +1105,22 @@ class ExternalJob(Job): ... ) ... dset['movie_blob/path'] = ... # populate dataset ... job.save_output(output_name, dset) - - .. _Job: - #cryosparc.job.Job - - .. _refresh: - #cryosparc.job.Job.refresh - - .. _Project.create_external_job: - project.html#cryosparc.project.Project.create_external_job """ + def __init__(self, cs: "CryoSPARC", job: Union[Tuple[str, str], Job]) -> None: + super().__init__(cs, job) + if self.model.spec.type != "snowflake": + raise TypeError(f"Job {self.model.project_uid}-{self.model.uid} is not an external job") + def add_input( self, type: Datatype, name: Optional[str] = None, min: int = 0, max: Union[int, Literal["inf"]] = "inf", - slots: Iterable[SlotSpec] = [], + slots: Sequence[SlotSpec] = [], title: Optional[str] = None, + desc: Optional[str] = None, ): """ Add an input slot to the current job. May be connected to zero or more @@ -1202,17 +1129,21 @@ def add_input( Args: type (Datatype): cryo-EM data type for this output, e.g., "particle" name (str, optional): Output name key, e.g., "picked_particles". - Defaults to None. + Same as ``type`` if not specified. Defaults to None. min (int, optional): Minimum number of required input connections. Defaults to 0. max (int | Literal["inf"], optional): Maximum number of input connections. Specify ``"inf"`` for unlimited connections. Defaults to "inf". slots (list[SlotSpec], optional): List of slots that should - be connected to this input, such as ``"location"`` or ``"blob"`` + be connected to this input, such as ``"location"`` or ``"blob"``. + When connecting the input, if the source job output is missing + these slots, the external job cannot start or accept outputs. Defaults to []. title (str, optional): Human-readable title for this input. Defaults - to None. + to name. + desc (str, optional): Human-readable description for this input. + Defaults to None. Raises: CommandError: General CryoSPARC network access error such as @@ -1245,51 +1176,38 @@ def add_input( f'Invalid input name "{name}"; may only contain letters, numbers and underscores, ' "and must start with a letter" ) - try: - self.cs.vis.add_external_job_input( # type: ignore - project_uid=self.project_uid, - job_uid=self.uid, + if any(isinstance(s, dict) and "prefix" in s for s in slots): + warnings.warn("'prefix' slot key is deprecated. Use 'name' instead.", DeprecationWarning, stacklevel=2) + if not name: + name = type + if not title: + title = name + self.model = self.cs.api.jobs.add_external_input( + self.project_uid, + self.uid, + name, + InputSpec( type=type, - name=name, - min=min, - max=max, - slots=slots, title=title, - ) - except CommandError as err: - if err.code == 422 and err.data and "slots" in err.data: - raise InvalidSlotsError("add_input", err.data["slots"]) from err - raise - self.refresh() - return self.doc["input_slot_groups"][-1]["name"] + description=desc or "", + slots=[as_input_slot(slot) for slot in slots], + count_min=min, + count_max=max, + ), + ) + return name + # fmt: off @overload - def add_output( - self, - type: Datatype, - name: Optional[str] = ..., - slots: List[SlotSpec] = ..., - passthrough: Optional[str] = ..., - title: Optional[str] = ..., - *, - alloc: Literal[None] = None, - ) -> str: ... + def add_output(self, type: Datatype, name: Optional[str] = ..., slots: Sequence[SlotSpec] = ..., passthrough: Optional[str] = ..., title: Optional[str] = ...) -> str: ... @overload - def add_output( - self, - type: Datatype, - name: Optional[str] = ..., - slots: List[SlotSpec] = ..., - passthrough: Optional[str] = ..., - title: Optional[str] = ..., - *, - alloc: Union[int, Dataset] = ..., - ) -> Dataset: ... + def add_output(self, type: Datatype, name: Optional[str] = ..., slots: Sequence[SlotSpec] = ..., passthrough: Optional[str] = ..., title: Optional[str] = ..., *, alloc: Union[int, Dataset]) -> Dataset: ... + # fmt: on def add_output( self, type: Datatype, name: Optional[str] = None, - slots: List[SlotSpec] = [], + slots: Sequence[SlotSpec] = [], passthrough: Optional[str] = None, title: Optional[str] = None, *, @@ -1359,9 +1277,9 @@ def add_output( ... type="particle", ... name="particle_alignments", ... slots=[ - ... {"dtype": "alignments3D", "prefix": "alignments_class_0", "required": True}, - ... {"dtype": "alignments3D", "prefix": "alignments_class_1", "required": True}, - ... {"dtype": "alignments3D", "prefix": "alignments_class_2", "required": True}, + ... {"name": "alignments_class_0", "dtype": "alignments3D", "required": True}, + ... {"name": "alignments_class_1", "dtype": "alignments3D", "required": True}, + ... {"name": "alignments_class_2", "dtype": "alignments3D", "required": True}, ... ] ... ) "particle_alignments" @@ -1371,23 +1289,19 @@ def add_output( f'Invalid output name "{name}"; may only contain letters, numbers and underscores, ' "and must start with a letter" ) - try: - self.cs.vis.add_external_job_output( # type: ignore - project_uid=self.project_uid, - job_uid=self.uid, - type=type, - name=name, - slots=slots, - passthrough=passthrough, - title=title, - ) - except CommandError as err: - if err.code == 422 and err.data and "slots" in err.data: - raise InvalidSlotsError("add_output", err.data["slots"]) from err - raise - self.refresh() - result_name = self.doc["output_result_groups"][-1]["name"] - return result_name if alloc is None else self.alloc_output(result_name, alloc) + if any(isinstance(s, dict) and "prefix" in s for s in slots): + warnings.warn("'prefix' slot key is deprecated. Use 'name' instead.", DeprecationWarning, stacklevel=2) + if not name: + name = type + if not title: + title = name + self.model = self.cs.api.jobs.add_external_output( + self.project_uid, + self.uid, + name, + OutputSpec(type=type, title=title, slots=[as_output_slot(slot) for slot in slots], passthrough=passthrough), + ) + return name if alloc is None else self.alloc_output(name, alloc) def connect( self, @@ -1395,15 +1309,15 @@ def connect( source_job_uid: str, source_output: str, *, - slots: List[SlotSpec] = [], - title: str = "", - desc: str = "", - refresh: bool = True, + slots: Sequence[SlotSpec] = [], + title: Optional[str] = None, + desc: Optional[str] = None, + **kwargs, ) -> bool: """ Connect the given input for this job to an output with given job UID and name. If this input does not exist, it will be added with the given - slots. At least one slot must be specified if the input does not exist. + slots. Args: target_input (str): Input name to connect into. Will be created if @@ -1411,14 +1325,14 @@ def connect( source_job_uid (str): Job UID to connect from, e.g., "J42" source_output (str): Job output name to connect from , e.g., "particles" - slots (list[SlotSpec], optional): List of slots to add to - created input. All if not specified. Defaults to []. + slots (list[SlotSpec], optional): List of input slots (e.g., + "particle" or "blob") to explicitly required for the created + input. If the given source job is missing these slots, the + external job cannot start or accept outputs. Defaults to []. title (str, optional): Human readable title for created input. - Defaults to "". + Defaults to target input name. desc (str, optional): Human readable description for created input. Defaults to "". - refresh (bool, optional): Auto-refresh job document after - connecting. Defaults to True. Raises: CommandError: General CryoSPARC network access error such as @@ -1436,27 +1350,25 @@ def connect( >>> job.connect("input_micrographs", "J2", "micrographs") """ - assert source_job_uid != self.uid, f"Cannot connect job {self.uid} to itself" - try: - self.cs.vis.connect_external_job( # type: ignore - project_uid=self.project_uid, - source_job_uid=source_job_uid, - source_output=source_output, - target_job_uid=self.uid, - target_input=target_input, - slots=slots, - title=title, - desc=desc, - ) - except CommandError as err: - if err.code == 422 and err.data and "slots" in err.data: - raise InvalidSlotsError("connect", err.data["slots"]) from err - raise - if refresh: - self.refresh() - return True - - def alloc_output(self, name: str, alloc: Union[int, "ArrayLike", Dataset] = 0) -> Dataset: + if "refresh" in kwargs: + warnings.warn("refresh argument no longer applies", DeprecationWarning, stacklevel=2) + if source_job_uid == self.uid: + raise ValueError(f"Cannot connect job {self.uid} to itself") + source_job = self.cs.api.jobs.find_one(self.project_uid, source_job_uid) + if source_output not in source_job.spec.outputs.root: + raise ValueError(f"Source job {source_job_uid} does not have output {source_output}") + output = source_job.spec.outputs.root[source_output] + if target_input not in self.model.spec.inputs.root: + if any(isinstance(s, dict) and "prefix" in s for s in slots): + warnings.warn("'prefix' slot key is deprecated. Use 'name' instead.", DeprecationWarning, stacklevel=2) + # convert to prevent from warning again + slots = [as_input_slot(slot) for slot in slots] # type: ignore + self.add_input(output.type, target_input, min=1, slots=slots, title=title, desc=desc) + return super().connect(target_input, source_job_uid, source_output) + + def alloc_output( + self, name: str, alloc: Union[int, "ArrayLike", Dataset] = 0, *, dtype_params: Dict[str, Any] = {} + ) -> Dataset: """ Allocate an empty dataset for the given output with the given name. Initialize with the given number of empty rows. The result may be @@ -1469,6 +1381,9 @@ def alloc_output(self, name: str, alloc: Union[int, "ArrayLike", Dataset] = 0) - (B) a numpy array of numbers to use for UIDs in the allocated dataset or (C) a dataset from which to inherit unique row IDs (useful for allocating passthrough outputs). Defaults to 0. + dtype_params (dict, optional): Data type parameters when allocating + results with dynamic column sizes such as ``particle`` -> + ``alignments3D_multi``. Defaults to {}. Returns: Dataset: Empty dataset with the given number of rows @@ -1497,17 +1412,7 @@ def alloc_output(self, name: str, alloc: Union[int, "ArrayLike", Dataset] = 0) - ]) """ - expected_fields = [] - for result in self.doc["output_results"]: - if result["group_name"] != name or result["passthrough"]: - continue - prefix = result["name"] - for field, dtype in result["min_fields"]: - expected_fields.append((f"{prefix}/{field}", dtype)) - - if not expected_fields: - raise ValueError(f"No such output {name} on {self.project_uid}-{self.uid}") - + expected_fields = self.cs.api.jobs.get_output_fields(self.project_uid, self.uid, name, dtype_params) if isinstance(alloc, int): return Dataset.allocate(alloc, expected_fields) elif isinstance(alloc, Dataset): @@ -1515,15 +1420,16 @@ def alloc_output(self, name: str, alloc: Union[int, "ArrayLike", Dataset] = 0) - else: return Dataset({"uid": alloc}).add_fields(expected_fields) - def save_output(self, name: str, dataset: Dataset, *, refresh: bool = True): + def save_output(self, name: str, dataset: Dataset, *, version: int = 0, **kwargs): """ Save output dataset to external job. Args: name (str): Name of output on this job. dataset (Dataset): Value of output with only required fields. - refresh (bool, Optional): Auto-refresh job document after saving. - Defaults to True + version (int, optional): Version number, when saving multiple + intermediate iterations. Only the last saved version is kept. + Defaults to 0. Examples: @@ -1536,13 +1442,9 @@ def save_output(self, name: str, dataset: Dataset, *, refresh: bool = True): >>> job.save_output("picked_particles", particles) """ - - url = f"/external/projects/{self.project_uid}/jobs/{self.uid}/outputs/{urllib.parse.quote_plus(name)}/dataset" - with make_request(self.cs.vis, url=url, data=dataset.stream(compression="lz4")) as res: - result = res.read().decode() - assert res.status >= 200 and res.status < 400, f"Save output failed with message: {result}" - if refresh: - self.refresh() + if "refresh" in kwargs: + warnings.warn("refresh argument no longer applies", DeprecationWarning, stacklevel=2) + self.model = self.cs.api.jobs.save_output(self.project_uid, self.uid, name, dataset, version=version) def start(self, status: Literal["running", "waiting"] = "waiting"): """ @@ -1551,24 +1453,24 @@ def start(self, status: Literal["running", "waiting"] = "waiting"): Args: status (str, optional): "running" or "waiting". Defaults to "waiting". """ - assert status in {"running", "waiting"}, f"Invalid start status {status}" - assert self.doc["status"] not in { - "running", - "waiting", - }, f"Job {self.project_uid}-{self.uid} is already in running status" - self.cs.cli.run_external_job(self.project_uid, self.uid, status) # type: ignore - self.refresh() + self.model = self.cs.api.jobs.mark_running(self.project_uid, self.uid, status=status) - def stop(self, error=False): + def stop(self, error: str = ""): """ - Set job status to "completed" or "failed" + Set job status to "completed" or "failed" if there was an error. Args: - error (bool, optional): Job completed with errors. Defaults to False. - """ - status = "failed" if error else "completed" - self.cs.cli.set_job_status(self.project_uid, self.uid, status) # type: ignore - self.refresh() + error (str, optional): Error message, will add to event log and set + job to status to failed if specified. Defaults to "". + """ + if isinstance(error, bool): # allowed bool in previous version + warnings.warn("error should be specified as a string", DeprecationWarning, stacklevel=2) + error = "An error occurred" if error else "" + self.model = self.cs.api.jobs.kill(self.project_uid, self.uid) + if error: + self.model = self.cs.api.jobs.mark_failed(self.project_uid, self.uid, error=error) + else: + self.model = self.cs.api.jobs.mark_completed(self.project_uid, self.uid) @contextmanager def run(self): @@ -1588,21 +1490,21 @@ def run(self): ... job.save_output(...) """ - error = False - self.start("running") + error = "" try: + self.start("running") yield self except Exception: - error = True + error = traceback.format_exc() raise finally: - self.stop(error) # TODO: Write Error to job log, if possible + self.stop(error=error) def queue( self, lane: Optional[str] = None, hostname: Optional[str] = None, - gpus: List[int] = [], + gpus: Sequence[int] = [], cluster_vars: Dict[str, Any] = {}, ): raise ExternalJobError( diff --git a/cryosparc/model_registry.py b/cryosparc/model_registry.py index a1b79068..d9473859 100644 --- a/cryosparc/model_registry.py +++ b/cryosparc/model_registry.py @@ -3,17 +3,18 @@ from .models import ( api_request, api_response, + asset, auth, diagnostics, event, exposure, + external, gpu, instance, job, job_register, job_spec, license, - mongo, notification, project, scheduler_lane, @@ -41,8 +42,9 @@ register_model_module(event) register_model_module(user) register_model_module(session_params) +register_model_module(external) register_model_module(project) -register_model_module(mongo) +register_model_module(asset) register_model_module(signature) register_model_module(instance) register_model_module(job_register) diff --git a/cryosparc/models/api_response.py b/cryosparc/models/api_response.py index 1924c256..4445a86f 100644 --- a/cryosparc/models/api_response.py +++ b/cryosparc/models/api_response.py @@ -24,11 +24,13 @@ class BrowseFileResponse(BaseModel): type: str -class DeleteProjectJobPreview(BaseModel): +class DeleteJobPreview(BaseModel): project_uid: str uid: str workspace_uids: List[str] status: str + title: str + type: str class DeleteProjectWorkspacePreview(BaseModel): @@ -38,13 +40,12 @@ class DeleteProjectWorkspacePreview(BaseModel): class DeleteProjectPreview(BaseModel): - jobs: List[DeleteProjectJobPreview] + jobs: List[DeleteJobPreview] workspaces: List[DeleteProjectWorkspacePreview] class DeleteWorkspacePreview(BaseModel): - jobs_in_one_workspace: List[str] - jobs_in_multiple_workspaces: List[str] + jobs: List[DeleteJobPreview] class GetFinalResultsResponse(BaseModel): diff --git a/cryosparc/models/asset.py b/cryosparc/models/asset.py new file mode 100644 index 00000000..1a5b41ba --- /dev/null +++ b/cryosparc/models/asset.py @@ -0,0 +1,34 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +import datetime +from typing import Literal, Optional, Union + +from pydantic import BaseModel, Field + + +class GridFSAsset(BaseModel): + """ + Information about an uploaded GridFS file. + """ + + fileid: str + filename: str + filetype: str + + +class GridFSFile(BaseModel): + id: str = Field("000000000000000000000000", alias="_id") + updated_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + created_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + filename: str + contentType: Union[ + Literal["text/plain", "text/csv", "text/html", "application/json", "application/xml", "application/x-troff"], + Literal["application/pdf", "image/gif", "image/jpeg", "image/png", "image/svg+xml"], + str, + ] + uploadDate: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + length: int + chunkSize: int + md5: Optional[str] = None + project_uid: str + job_uid: str diff --git a/cryosparc/models/event.py b/cryosparc/models/event.py index ea48a362..bc4b5db2 100644 --- a/cryosparc/models/event.py +++ b/cryosparc/models/event.py @@ -5,7 +5,7 @@ from pydantic import BaseModel, Field -from .mongo import GridFSAsset +from .asset import GridFSAsset class CheckpointEvent(BaseModel): diff --git a/cryosparc/models/exposure.py b/cryosparc/models/exposure.py index 2d464868..47bbf161 100644 --- a/cryosparc/models/exposure.py +++ b/cryosparc/models/exposure.py @@ -1,7 +1,7 @@ # THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY # SEE dev/api_generate_models.py import datetime -from typing import Any, List, Literal, Optional, Union +from typing import Any, List, Literal, Optional, Tuple, Union from pydantic import BaseModel, Field @@ -9,31 +9,31 @@ class CTF(BaseModel): - accel_kv: float = 0 - amp_contrast: float = 0 - cross_corr_ctffind4: float = 0 - cs_mm: float = 0 - ctf_fit_to_A: float = 0 - df1_A: float = 0 - df2_A: float = 0 - df_angle_rad: float = 0 - exp_group_id: int = 0 - fig_of_merit_gctf: float = 0 - path: str = "." - phase_shift_rad: float = 0 - type: str = "" + accel_kv: float + amp_contrast: float + cross_corr_ctffind4: float + cs_mm: float + ctf_fit_to_A: float + df1_A: float + df2_A: float + df_angle_rad: float + exp_group_id: int + fig_of_merit_gctf: float + path: str + phase_shift_rad: float + type: str class CtfStats(BaseModel): - cross_corr: int = 0 - ctf_fit_to_A: float = 0 - df_range: List[Any] = [0, 0] - df_tilt_normal: List[Any] = [0, 0] - diag_image_path: str = "." - fit_data_path: str = "." - ice_thickness_rel: float = 0 - spectrum_dim: int = 0 - type: str = "" + cross_corr: int + ctf_fit_to_A: float + df_range: List[Any] + df_tilt_normal: List[Any] + diag_image_path: str + fit_data_path: str + ice_thickness_rel: float + spectrum_dim: int + type: str class ECLExposureProperties(BaseModel): @@ -41,61 +41,61 @@ class ECLExposureProperties(BaseModel): class StatBlob(BaseModel): - binfactor: int = 0 - idx: int = 0 - path: str = "." - psize_A: float = 0 - shape: List[int] = [0, 0] + binfactor: int + idx: int + path: str + psize_A: float + shape: Tuple[int, int] class GainRefBlob(BaseModel): - flip_x: int = 0 - flip_y: int = 0 - idx: int = 0 - path: str = "." - rotate_num: int = 0 - shape: List[int] = [] + flip_x: int + flip_y: int + idx: int + path: str + rotate_num: int + shape: List[int] class MicrographBlob(BaseModel): - format: str = "" - idx: int = 0 - is_background_subtracted: bool = False - path: str = "." - psize_A: float = 0 - shape: List[int] = [0, 0] + format: str + idx: int + is_background_subtracted: bool + path: str + psize_A: float + shape: List[int] class MovieBlob(BaseModel): - format: str = "" + format: str has_defect_file: bool = False import_sig: int = 0 - is_gain_corrected: bool = False - path: str = "." - psize_A: float = 0 - shape: List[int] = [] + is_gain_corrected: bool + path: str + psize_A: float + shape: List[int] class MScopeParams(BaseModel): - accel_kv: float = 0 + accel_kv: float beam_shift: List[int] = [0, 0] beam_shift_known: int = 0 - cs_mm: float = 0 + cs_mm: float defect_path: Optional[str] = None - exp_group_id: int = 0 + exp_group_id: int neg_stain: int = 0 - phase_plate: int = 0 + phase_plate: int total_dose_e_per_A2: float = 0 class MotionData(BaseModel): - frame_end: int = 0 - frame_start: int = 0 - idx: int = 0 - path: str = "." - psize_A: float = 0 - type: str = "" - zero_shift_frame: int = 0 + frame_end: int + frame_start: int + idx: int + path: str + psize_A: float + type: str + zero_shift_frame: int class ExposureElement(BaseModel): diff --git a/cryosparc/models/external.py b/cryosparc/models/external.py new file mode 100644 index 00000000..d81514f5 --- /dev/null +++ b/cryosparc/models/external.py @@ -0,0 +1,17 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +from typing import Optional + +from pydantic import BaseModel + +from .job_spec import OutputRef, OutputSpec + + +class ExternalOutputSpec(BaseModel): + """ + Specification for an external job with a single output. + """ + + name: str + spec: OutputSpec + connection: Optional[OutputRef] = None diff --git a/cryosparc/models/job.py b/cryosparc/models/job.py index f6f4027a..86113cb8 100644 --- a/cryosparc/models/job.py +++ b/cryosparc/models/job.py @@ -60,7 +60,7 @@ class Job(BaseModel): job_dir_size_last_updated: Optional[datetime.datetime] = None run_as_user: Optional[str] = None title: str = "" - description: str = "Enter a description." + description: str = "" status: JobStatus = "building" created_by_user_id: Optional[str] = None created_by_job_uid: Optional[str] = None @@ -141,6 +141,6 @@ class Job(BaseModel): last_exported_location: Optional[str] = None last_exported_version: Optional[str] = None tags: List[str] = [] - imported: bool = False imported_at: Optional[datetime.datetime] = None deleted_at: Optional[datetime.datetime] = None + starred_by: List[str] = [] diff --git a/cryosparc/models/job_spec.py b/cryosparc/models/job_spec.py index 664255a6..d6ead994 100644 --- a/cryosparc/models/job_spec.py +++ b/cryosparc/models/job_spec.py @@ -5,17 +5,7 @@ from pydantic import BaseModel, ConfigDict, RootModel BuilderTag = Literal[ - "new", - "beta", - "legacy", - "interactive", - "gpuEnabled", - "multiGpu", - "utility", - "import", - "live", - "benchmark", - "wrapper", + "new", "interactive", "gpuEnabled", "multiGpu", "utility", "import", "live", "benchmark", "wrapper" ] """ Visual indicators for jobs in the builder. @@ -63,6 +53,46 @@ class InputResult(BaseModel): class Connection(BaseModel): + """ + Job input connection details. + """ + + job_uid: str + output: str + type: Literal[ + "exposure", + "particle", + "template", + "volume", + "volume_multi", + "mask", + "live", + "ml_model", + "symmetry_candidate", + "flex_mesh", + "flex_model", + "hyperparameter", + "denoise_model", + "annotation_model", + ] + results: List[InputResult] = [] + + +class OutputSlot(BaseModel): + """ + Specification of an output slot in the job configuration. Part of a group + """ + + name: str + dtype: str + + +class OutputSpec(BaseModel): + """ + Used for outputs with some generated data based on data forwarded from + input inheritance + """ + type: Literal[ "exposure", "particle", @@ -79,9 +109,20 @@ class Connection(BaseModel): "denoise_model", "annotation_model", ] + title: str + description: str = "" + slots: List[Union[OutputSlot, str]] = [] + passthrough: Optional[str] = None + passthrough_exclude_slots: List[str] = [] + + +class OutputRef(BaseModel): + """ + Minimal name reference to a specific job output + """ + job_uid: str output: str - results: List[InputResult] = [] class InputSlot(BaseModel): @@ -136,6 +177,7 @@ class Params(BaseModel): model_config = ConfigDict(extra="allow") if TYPE_CHECKING: + def __init__(self, **kwargs: Any) -> None: ... def __getattr__(self, key: str) -> Any: ... @@ -194,44 +236,6 @@ class JobSpec(BaseModel): """ -class OutputSlot(BaseModel): - """ - Specification of an output slot in the job configuration. Part of a group - """ - - name: str - dtype: str - - -class OutputSpec(BaseModel): - """ - Used for outputs with some generated data based on data forwarded from - input inheritance - """ - - type: Literal[ - "exposure", - "particle", - "template", - "volume", - "volume_multi", - "mask", - "live", - "ml_model", - "symmetry_candidate", - "flex_mesh", - "flex_model", - "hyperparameter", - "denoise_model", - "annotation_model", - ] - title: str - description: str = "" - slots: List[Union[OutputSlot, str]] = [] - passthrough: Optional[str] = None - passthrough_exclude_slots: List[str] = [] - - class OutputSpecs(RootModel): root: Dict[str, OutputSpec] = {} @@ -244,3 +248,10 @@ class JobRegisterError(BaseModel): type: str message: str traceback: str + + +class ResourceSpec(BaseModel): + cpu: int = 1 + gpu: int = 0 + ram: int = 1 + ssd: bool = False diff --git a/cryosparc/models/mongo.py b/cryosparc/models/mongo.py deleted file mode 100644 index 47623860..00000000 --- a/cryosparc/models/mongo.py +++ /dev/null @@ -1,13 +0,0 @@ -# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY -# SEE dev/api_generate_models.py -from pydantic import BaseModel - - -class GridFSAsset(BaseModel): - """ - Information about an uploaded GridFS file. - """ - - fileid: str - filename: str - filetype: str diff --git a/cryosparc/models/project.py b/cryosparc/models/project.py index be514280..3c956529 100644 --- a/cryosparc/models/project.py +++ b/cryosparc/models/project.py @@ -52,13 +52,14 @@ class Project(BaseModel): generate_intermediate_results_settings: GenerateIntermediateResultsSettings = GenerateIntermediateResultsSettings() last_exp_group_id_used: Optional[int] = None develop_run_as_user: Optional[str] = None - imported: bool = False + imported_at: Optional[datetime.datetime] = None import_status: Optional[Literal["importing", "complete", "failed"]] = None project_stats: ProjectStats = ProjectStats() last_archived_version: Optional[str] = None last_detached_version: Optional[str] = None is_cleanup_in_progress: bool = False tags: List[str] = [] + starred_by: List[str] = [] class ProjectSymlink(BaseModel): diff --git a/cryosparc/models/service.py b/cryosparc/models/service.py index a0e1c3a0..1b94e107 100644 --- a/cryosparc/models/service.py +++ b/cryosparc/models/service.py @@ -3,17 +3,7 @@ from typing import Literal LoggingService = Literal[ - "app", - "database", - "cache", - "api", - "scheduler", - "command_core", - "command_rtp", - "command_vis", - "app_api", - "supervisord", - "update", + "app", "database", "cache", "api", "scheduler", "command_vis", "app_api", "supervisord", "update" ] """ Same as Service, but also includes supervisord and update logs. diff --git a/cryosparc/models/session.py b/cryosparc/models/session.py index 96ce1e5f..c876c275 100644 --- a/cryosparc/models/session.py +++ b/cryosparc/models/session.py @@ -221,7 +221,7 @@ class Session(BaseModel): workspace_stats: WorkspaceStats = WorkspaceStats() notes: str = "" notes_lock: Optional[str] = None - imported: bool = False + imported_at: Optional[datetime.datetime] = None workspace_type: str = "live" session_uid: str session_dir: str diff --git a/cryosparc/models/workspace.py b/cryosparc/models/workspace.py index df61a98f..9e8b8625 100644 --- a/cryosparc/models/workspace.py +++ b/cryosparc/models/workspace.py @@ -39,10 +39,11 @@ class Workspace(BaseModel): workspace_stats: WorkspaceStats = WorkspaceStats() notes: str = "" notes_lock: Optional[str] = None - imported: bool = False + imported_at: Optional[datetime.datetime] = None workspace_type: Literal["base", "live"] = "base" model_config = ConfigDict(extra="allow") if TYPE_CHECKING: + def __init__(self, **kwargs: Any) -> None: ... def __getattr__(self, key: str) -> Any: ... diff --git a/cryosparc/project.py b/cryosparc/project.py index 755a899c..a343f1e9 100644 --- a/cryosparc/project.py +++ b/cryosparc/project.py @@ -1,49 +1,57 @@ +import warnings from pathlib import PurePath, PurePosixPath from typing import IO, TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union +from .controller import Controller, as_output_slot from .dataset import DEFAULT_FORMAT, Dataset -from .job import ExternalJob, Job +from .job import ExternalJobController, JobController +from .models.project import Project from .row import R -from .spec import Datatype, MongoController, ProjectDocument, SlotSpec -from .workspace import Workspace +from .spec import Datatype, SlotSpec +from .workspace import WorkspaceController if TYPE_CHECKING: - from numpy.typing import NDArray # type: ignore + from numpy.typing import NDArray from .tools import CryoSPARC -class Project(MongoController[ProjectDocument]): +class ProjectController(Controller[Project]): """ Accessor instance for CryoSPARC projects with ability to add workspaces, jobs - and upload/download project files. Should be instantiated through - `CryoSPARC.find_project`_. + and upload/download project files. Should be created with + :py:meth:`cs.find_project() `. - Attributes: - uid (str): Project unique ID, e.g., "P3" - doc (ProjectDocument): All project data from the CryoSPARC database. - Database contents may change over time, use the `refresh`_ method - to update. + Arguments: + project (str | Project): either Project UID or Project model, e.g. ``"P3"`` - .. _CryoSPARC.find_project: - tools.html#cryosparc.tools.CryoSPARC.find_project + Attributes: + model (Project): All project data from the CryoSPARC database. Contents + may change over time, use :py:meth:`refresh` to update. + """ - .. _refresh: - #cryosparc.project.Project.refresh + uid: str + """ + Project unique ID, e.g., "P3" """ - def __init__(self, cs: "CryoSPARC", uid: str) -> None: + def __init__(self, cs: "CryoSPARC", project: Union[str, Project]) -> None: self.cs = cs - self.uid = uid + if isinstance(project, str): + self.uid = project + self.refresh() + else: + self.uid = project.uid + self.model = project def refresh(self): """ Reload this project from the CryoSPARC database. Returns: - Project: self + ProjectController: self """ - self._doc = self.cs.cli.get_project(self.uid) # type: ignore + self.model = self.cs.api.projects.find_one(self.uid) return self def dir(self) -> PurePosixPath: @@ -53,10 +61,10 @@ def dir(self) -> PurePosixPath: Returns: Path: project directory Pure Path instance """ - path: str = self.cs.cli.get_project_dir_abs(self.uid) # type: ignore + path: str = self.cs.api.projects.get_directory(self.uid) return PurePosixPath(path) - def find_workspace(self, workspace_uid) -> Workspace: + def find_workspace(self, workspace_uid) -> WorkspaceController: """ Get a workspace accessor instance for the workspace in this project with the given UID. Fails with an error if workspace does not exist. @@ -65,12 +73,11 @@ def find_workspace(self, workspace_uid) -> Workspace: workspace_uid (str): Workspace unique ID, e.g., "W1" Returns: - Workspace: accessor instance + WorkspaceController: workspace accessor object """ - workspace = Workspace(self.cs, self.uid, workspace_uid) - return workspace.refresh() + return WorkspaceController(self.cs, (self.uid, workspace_uid)) - def find_job(self, job_uid: str) -> Job: + def find_job(self, job_uid: str) -> JobController: """ Get a job accessor instance for the job in this project with the given UID. Fails with an error if job does not exist. @@ -79,13 +86,11 @@ def find_job(self, job_uid: str) -> Job: job_uid (str): Job unique ID, e.g., "J42" Returns: - Job: accessor instance + JobController: job accessor instance """ - job = Job(self.cs, self.uid, job_uid) - job.refresh() - return job + return JobController(self.cs, (self.uid, job_uid)) - def find_external_job(self, job_uid: str) -> ExternalJob: + def find_external_job(self, job_uid: str) -> ExternalJobController: """ Get the External job accessor instance for an External job in this project with the given UID. Fails if the job does not exist or is not an @@ -98,11 +103,11 @@ def find_external_job(self, job_uid: str) -> ExternalJob: TypeError: If job is not an external job Returns: - ExternalJob: accessor instance + ExternalJobController: external job accessor object """ return self.cs.find_external_job(self.uid, job_uid) - def create_workspace(self, title: str, desc: Optional[str] = None) -> Workspace: + def create_workspace(self, title: str, desc: Optional[str] = None) -> WorkspaceController: """ Create a new empty workspace in this project. At least a title must be provided. @@ -112,7 +117,10 @@ def create_workspace(self, title: str, desc: Optional[str] = None) -> Workspace: desc (str, optional): Markdown text description. Defaults to None. Returns: - Workspace: created workspace instance + WorkspaceController: created workspace accessor object + + Raises: + APIError: Workspace cannot be created. """ return self.cs.create_workspace(self.uid, title, desc) @@ -122,12 +130,13 @@ def create_job( type: str, connections: Dict[str, Union[Tuple[str, str], List[Tuple[str, str]]]] = {}, params: Dict[str, Any] = {}, - title: Optional[str] = None, - desc: Optional[str] = None, - ) -> Job: + title: str = "", + desc: str = "", + ) -> JobController: """ - Create a new job with the given type. Use `CryoSPARC.get_job_sections`_ - to query available job types on the connected CryoSPARC instance. + Create a new job with the given type. Use + :py:attr:`cs.job_register ` + to find available job types on the connected CryoSPARC instance. Args: project_uid (str): Project UID to create job in, e.g., "P3" @@ -138,11 +147,14 @@ def create_job( value is a (job uid, output name) tuple. Defaults to {} params (dict[str, any], optional): Specify parameter values. Defaults to {}. - title (str, optional): Job title. Defaults to None. - desc (str, optional): Job markdown description. Defaults to None. + title (str, optional): Job title. Defaults to "". + desc (str, optional): Job markdown description. Defaults to "". Returns: - Job: created job instance. Raises error if job cannot be created. + JobController: created job accessor object. + + Raises: + APIError: Job cannot be created. Examples: @@ -161,9 +173,6 @@ def create_job( ... connections={"particles": ("J20", "particles_selected")} ... params={"abinit_K": 3} ... ) - - .. _CryoSPARC.get_job_sections: - tools.html#cryosparc.tools.CryoSPARC.get_job_sections """ return self.cs.create_job( self.uid, workspace_uid, type, connections=connections, params=params, title=title, desc=desc @@ -172,26 +181,23 @@ def create_job( def create_external_job( self, workspace_uid: str, - title: Optional[str] = None, - desc: Optional[str] = None, - ) -> ExternalJob: + title: str = "", + desc: str = "", + ) -> ExternalJobController: """ Add a new External job to this project to save generated outputs to. Args: workspace_uid (str): Workspace UID to create job in, e.g., "W3". title (str, optional): Title for external job (recommended). - Defaults to None. + Defaults to "". desc (str, optional): Markdown description for external job. - Defaults to None. + Defaults to "". Returns: ExternalJob: created external job instance """ - job_uid: str = self.cs.vis.create_external_job( # type: ignore - project_uid=self.uid, workspace_uid=workspace_uid, user=self.cs.user_id, title=title, desc=desc - ) - return self.find_external_job(job_uid) + return self.cs.create_external_job(self.uid, workspace_uid=workspace_uid, title=title, desc=desc) def save_external_result( self, @@ -201,8 +207,8 @@ def save_external_result( name: Optional[str] = None, slots: Optional[List[SlotSpec]] = None, passthrough: Optional[Tuple[str, str]] = None, - title: Optional[str] = None, - desc: Optional[str] = None, + title: str = "", + desc: str = "", ) -> str: """ Save the given result dataset to the project. Specify at least the @@ -268,13 +274,16 @@ def save_external_result( inherits slots from the specified output. e.g., ``("J1", "particles")``. Defaults to None. title (str, optional): Human-readable title for this output. - Defaults to None. + Defaults to "". desc (str, optional): Markdown description for this output. Defaults - to None. + to "". Returns: str: UID of created job where this output was saved """ + if slots and any(isinstance(s, dict) and "prefix" in s for s in slots): + warnings.warn("'prefix' slot key is deprecated. Use 'name' instead.", DeprecationWarning, stacklevel=2) + slots = [as_output_slot(slot) for slot in slots] # type: ignore return self.cs.save_external_result( self.uid, workspace_uid, diff --git a/cryosparc/registry.py b/cryosparc/registry.py index 593508ec..b8d35303 100644 --- a/cryosparc/registry.py +++ b/cryosparc/registry.py @@ -4,11 +4,11 @@ """ import re -from collections.abc import Iterable +import warnings from enum import Enum from inspect import isclass from types import ModuleType -from typing import Dict, Optional, Type +from typing import Dict, Iterable, Optional, Type from pydantic import BaseModel @@ -80,8 +80,6 @@ def model_for_ref(schema_ref: str) -> Optional[Type]: Returns None if ref is not found. """ - import warnings - components = schema_ref.split("/") if len(components) != 4 or components[0] != "#" or components[1] != "components" or components[2] != "schemas": warnings.warn(f"Warning: Invalid schema reference {schema_ref}", stacklevel=2) diff --git a/cryosparc/spec.py b/cryosparc/spec.py index 8482331b..32bb2acb 100644 --- a/cryosparc/spec.py +++ b/cryosparc/spec.py @@ -1,30 +1,34 @@ """ -Type specifications for CryoSPARC database entities. - -Unless otherwise noted, classes defined here represent dictionary instances -whose attributes may be accessed with dictionary key syntax. +Specification for various utility types used throughout tools and CryoSPARC. +""" -Examples: +from typing import Dict, List, Literal, Tuple, TypedDict, Union - Accessing job document details +Shape = Tuple[int, ...] +"""A numpy shape tuple from ndarray.shape""" - >>> cs = CryoSPARC() - >>> job = cs.find_job("P3", "J118") - >>> job.doc["output_results"][0]["metafiles"] - [ - "J118/J118_000_particles.cs", - "J118/J118_001_particles.cs", - "J118/J118_002_particles.cs", - "J118/J118_003_particles.cs" - ] +DType = Union[str, Tuple[str, Shape]] """ -from abc import ABC, abstractmethod -from typing import Any, Dict, Generic, List, Literal, Optional, Tuple, TypedDict, TypeVar, Union + Can just be a single string such as "f4", "3u4" or "O". + A datatype description of a ndarray entry. -# Database document -D = TypeVar("D") + Can also be the a tuple with a string datatype name and its shape. For + example, the following dtypes are equivalent. + - "3u4" + - ">> print(details['filename']) - image.png - """ - - _id: str - """Document ID""" - - filename: str - """File name""" - - contentType: AssetContentType - """Asset content type, e.g., "image/png" """ +MICROGRAPH_CONTENT_TYPES: Dict[MicrographFormat, MicrographContentType] = { + "mrc": "image/x-mrc", + "mrcs": "image/x-mrc", + "stk": "image/x-mrc", + "tif": "image/tiff", + "tiff": "image/tiff", + "eer": "application/x-eer", + "ecc": "application/x-eer", + "bz2": "application/x-bzip2", + "cmrcbz2": "application/x-bzip2", +} - uploadDate: str # ISO formatted - """ISO 8601-formatted asset upload date""" +DATASET_CONTENT_TYPES: Dict[DatasetFormat, DatasetContentType] = { + "cs": "application/x-cryosparc-dataset", # but usually has numpy format internally + "npy": "application/x-numpy", +} - length: int # in bytes - """Size of file in bytes""" +ASSET_CONTENT_TYPES: Dict[AssetFormat, AssetContentType] = {**TEXT_CONTENT_TYPES, **IMAGE_CONTENT_TYPES} +ASSET_EXTENSIONS: Dict[AssetContentType, AssetFormat] = {v: k for k, v in ASSET_CONTENT_TYPES.items()} - chunkSize: int # in bytes - """File chunk size in bytes""" +KNOWN_CONTENT_TYPES: Dict[AnyFormat, AnyContentType] = { + **TEXT_CONTENT_TYPES, + **IMAGE_CONTENT_TYPES, + **MICROGRAPH_CONTENT_TYPES, + **DATASET_CONTENT_TYPES, +} - md5: str - """MD5 hash of asset""" - project_uid: str - """Associated project UID""" +# Slot is defined in two classes like this because it's the only way to +# make the ``required`` key optional. +class _Slot(TypedDict): + """ + :meta private: + """ - job_uid: str # also used for Session UID - """Associated job or session UID""" + name: str + dtype: str -class EventLogAsset(TypedDict): +class Slot(_Slot, total=False): """ - Dictionary item in a job event log's ``imgfiles`` property (in the - ``events`` collection). Keys may be accessed with dictionary key syntax. + Full slot dictionary specification type for items in the slots=... argument + when creating inputs or outputs. e.g., ``{"name": "ctf", "dtype": "ctf"}`` + or ``{"name": "background_blob", "dtype": "stat_blob", "required": False}`` - Examples: - - >>> print(asset['filename']) - image.png - """ + See :py:type:`SlotSpec` for details. - fileid: str - """Reference to file ``_id`` property in GridFS collection""" + Attributes: + name (str): where to find field in a corresponding .cs file e.g., + ``"background_blob"``, ``"ctf"``, ``"alignments_class_0"`` + dtype (str): name of known data type. e.g., ``"stat_blob"``, ``"ctf"``, + ``"alignments3D"``. + required (bool, optional): Whether this slot is required. Applies to + input specs only. Defaults to True. - filename: str - """File name""" + """ - filetype: AssetContentType - """File content type, e.g., "image/png" """ + required: bool class Datafield(TypedDict): """ - Definition of a prefix field within a CS file. - - Examples: - - >>> field = Datafield(dtype='alignments3D', prefix='alignments_class_0', required=False) - >>> print(field['dtype']) - alignments3D + Deprecated. Use :py:class:`Slot` instead. """ dtype: str - """Datatype-specific string from based on entry in - ``cryosparc_compute/jobs/common.py``. e.g., "movie_blob", "ctf", - "alignments2D".""" - prefix: str - """where to find field in an associated ``.cs`` file. e.g., - "alignments_class_1" """ - required: bool - """whether this field must necessarily exist in a corresponding - input/output. Assumed to be ``True`` if not specified""" -SlotSpec = Union[str, Datafield] -""" -A result slot specification for the slots=... argument. +SlotSpec = Union[str, Slot, Datafield] """ +A result slot specification for items in the slots=... argument when creating +inputs or outputs. +In CryoSPARC, all jobs have one or more inputs and outputs. An input or output +has some broad :py:type:`Datatype`, such as ``"exposure"`` or ``"particle"``. +Each input or output also has a list of associated "low-level" results created +at various stages of processing, such as ``"location"`` for picked particles and +``blob`` for extracted particles. A slot represents one of these low-level +results. -class InputSlot(TypedDict): - """ - Dictionary entry in Job document's ``input_slot_groups.slots`` property. - """ - - type: Datatype - """Cryo-EM native data type, e.g., "exposure", "particle" or "volume" """ - - name: str - """Input slot name, e.g., "movie_blob" or "location" """ - - title: str - """Human-readable input slot title""" - - description: str - """Human-readable description""" - - optional: bool - """If True, input is not required for the job""" - +In the CryoSPARC interface, open a job's "Inputs" or "Outputs" tab to see the +kinds of slots available. You may also download an output and load it with +:py:type:`~cryosparc.dataset.Dataset` to inspect the infomation encoded in its +results. -class ConnectionSlot(TypedDict): - """ - Slots specified entry in a Job document's ``input_slot_groups[].connections[].slots`` list. - """ - - slot_name: Optional[str] - """Passthrough slots have ``slot_name`` set to ``None``.""" - - job_uid: str - """Parent job UID source of this input slot connection.""" - - group_name: str - """Name of output group in parent job. e.g., "particles" """ - - result_name: str - """Name of output slot in parent job, e.g., "blob" """ - - result_type: str - """Type of result slot based on entry in ``cryosparc_compute/jobs/common.py``, e.g., "particle.blob" """ - - version: Union[int, Literal["F"]] - """Version number or specifier to use. Usually "F" """ - - -class Connection(TypedDict): - """ - Connection element specified in a Job document's ``input_slot_groups[].connections`` list. - """ - - job_uid: str - """Parent job UID source of main input group connection.""" - - group_name: str - """Name of output group in parent job. e.g., "particles" """ - - slots: List[ConnectionSlot] - """List of connection specifiers for each slot""" - - -class InputSlotGroup(TypedDict): - """Element specified in a Job document's ``input_slot_groups`` list.""" +Provide each slot as either a string representing a name and result type, or a +full dictionary specification. - type: Datatype - """Possible Cryo-EM data type for this group, e.g., "particle".""" +A string in the format ``""`` is a shortcut for ``{"name": "", +"dtype": "", "required": True}``. - name: str - """Input group name, e.g., "particles".""" - - title: str - """Human-readable input group title.""" - - description: str - """Human-readable input group description.""" +A string in the format ``"?"`` is a shortcut for ``{"name": "", +"dtype": "", "required": False}`` (input slots only). - count_min: int - """Minimum required output groups that may be connected to this input slot.""" +Example strings:: - count_max: Optional[int] - """Maximum allowed output groups that may be connected to this input slot. Infinity if not specified.""" + "ctf" + "micrograph_blob" + "?background_blob" - repeat_allowed: bool - """If True, the same output group may be connected twice.""" +Example equivalent full specifications:: - slots: List[InputSlot] - """List of slot definitions in the input group.""" - - connections: List[Connection] - """Connected output for this input group.""" - - -class OutputResultGroupContains(TypedDict): - """ - Elements of a Job document's ``output_result_groups[].contains`` list. - """ - - uid: str - """Result unique ID, e.g., "J42-R1".""" - - type: str - """Result type based on entry in ``cryosparc_compute/jobs/common.py``, e.g., "particle.alignments3D".""" - - name: str - """Name of output result (a.k.a. slot), e.g., "alignments_class_1".""" - - group_name: str - """Name of output group, e.g., "particles".""" - - passthrough: bool - """If True, this result is passed through as-is from an associated input.""" - - -class OutputResultGroup(TypedDict): - """ - Elements of a Job document's ``output_result_groups`` list. - """ - - uid: str - """Ouptut group unique ID, e.g., "J42-G1".""" - - type: Datatype - """Possible Cryo-EM data type for this group, e.g., "particle".""" - - name: str - """Output group name, e.g., "particles_selected" """ - - title: str - """Human-readable output group title.""" - - description: str - """Human-readable output group description.""" - - contains: List[OutputResultGroupContains] - """List of specific results (a.k.a. slots) in this output group.""" - - passthrough: Union[str, Literal[False]] - """Either ``False`` if this is a newly-created output or the name of an - input group used to forward passthrough slots for this result group.""" - - num_items: int - """Number of rows in the dataset for this result group populated by jobs when they run.""" - - summary: dict - """Context-specific details about this result populated by jobs when they run.""" - - -class OutputResult(TypedDict): - """ - Detailed schema and metadata for a Job document's ``output_results`` list. - Similar to a flattened ``output_result_groups[].contains`` but with more - details. - """ - - uid: str - """Result unique ID, e.g., "J42-R1".""" - - type: str - """Result type based on entry in ``cryosparc_compute/jobs/common.py``, e.g., "particle.alignments3D".""" - - name: str - """Name of output result (a.k.a. slot), e.g., "alignments_class_1".""" - - group_name: str - """Name of output group, e.g., "particles".""" - - title: str - """Human-readable output result title.""" - - description: str - """Human-readable output result description.""" - - versions: List[int] - """List of available intermediate result version numbers.""" - - metafiles: List[str] - """List of available intermediate result files (same size as ``versions``).""" - - min_fields: List[Tuple[str, str]] - """Minimum included dataset field definitions in this result.""" - - num_items: int - """Number of rows in the dataset for this result populated by jobs when they run.""" - - passthrough: bool - """If True, this result is passed through as-is from an associated input.""" - - -class BaseParam(TypedDict): - """ - Base parameter specification. - """ - - value: bool - """Base parameter value. Should not be changed.""" - - title: str - """Human-readable parameter title.""" - - desc: str - """Human-readable parameter description.""" - - order: int - """Parameter order in the builder list.""" - - section: str - """Parameter section identifier.""" - - advanced: bool - """True if this is an advanced parameter (hidden unlesss the "Advanced" - checkbox is enabled in the Job Builder".""" - - hidden: bool - """If True, this parameter is always hidden from the interface.""" - - -class Param(BaseParam): - """ - Specifies possible values for type property. Inherits from - BaseParam_. - - .. _BaseParam: - #cryosparc.spec.BaseParam - """ - - type: Literal["number", "string", "boolean"] - """Possible Parameter type.""" - - -class EnumParam(BaseParam): - """ - Additional Param keys available for enum params. Inherits from BaseParam_. - - .. _BaseParam: - #cryosparc.spec.BaseParam - """ - - type: Literal["enum"] - """Possible Parameter type.""" - - enum_keys: List[str] - """Possible enum names for display for selection. Parameter must be set to - one of these values.""" - - enum_dict: Dict[str, Any] - """Map from enum key names to their equivalent values.""" - - -class PathParam(BaseParam): - """ - Additional Param key available for path params. Inherits Inherits from - BaseParam_. - - .. _BaseParam: - #cryosparc.spec.BaseParam - """ - - type: Literal["path"] - - path_dir_allowed: bool - """If True, directories may be specified.""" - - path_file_allowed: bool - """If True, files may be specified.""" - - path_glob_allowed: bool - """If True, a wildcard string that refers to many files may be specified..""" - - -class ParamSpec(TypedDict): - """Param specification. Dictionary with single ``"value"`` key.""" - - value: Any - """Value of param.""" - - -class ParamSection(TypedDict): - """Param section specification""" - - title: str - """Parameter section title""" - desc: str - """Parameter section description""" - order: int - """Order for this parameter section to appear in the job builder""" + {"name": "ctf", "dtype": "ctf"} + {"name": "micrograph_blob", "dtype": "micrograph_blob", "required": True} + {"name": "background_blob", "dtype": "stat_blob", "required": False} +Use the full specification when the ``dtype`` cannot be inferred from the +``name`` string because it is dynamic. For example, 3D Variability job +``particles`` outputs have slots named ``"components_mode_X"`` with dtype +``"components"`` where ``X`` is a mode number:: -class ProjectLastAccessed(TypedDict, total=False): - """ - Details on when a project was last accessed. - """ - - name: str - """User account name that accessed this project.""" - - accessed_at: str - """Last access date in ISO 8601 format.""" - - -class ProjectDocument(TypedDict): - """ - Specification for a project document in the MongoDB database. - """ - - _id: str - """MongoDB ID""" - - uid: str - """Project unique ID, e.g., "J42".""" - - uid_num: int - """Project number, e.g., 42.""" - - title: str - """Human-readable Project title.""" - - description: str - """Human-readable project markdown description.""" - - project_dir: str - """Project directory on disk. May include unresolved shell variables.""" - - project_params_pdef: dict - """Project-level job parameter default definitions.""" - - owner_user_id: str - """Object ID of user account that created this project.""" - - created_at: str - """Project creation date in ISO 8601 format.""" - - deleted: bool - """Whether this project has been deleted from the interface.""" - - users_with_access: List[str] - """Object IDs of user accounts that may access this project.""" - - size: int - """Computed size of project on disk.""" - - last_accessed: ProjectLastAccessed - """Details about when the project was last accessed by a user account.""" - - archived: bool - """Whether this project has been marked as archived from the inteface.""" - - detached: bool - """Whether this project is detached.""" - - hidden: bool - """Whether this project is hidden.""" - - project_stats: dict - """Computed project statistics.""" - - generate_intermediate_results_settings: dict - """Project settings for generating intermediate results for specific job types.""" - - -class JobDocument(TypedDict): - """ - Specification for a Job document from the MongoDB database. - """ - - _id: str - """MongoDB ID""" - - uid: str - """Job unique ID, e.g., "J42".""" - - uid_num: int - """Job number, e.g., 42.""" - - project_uid: str - """Project unique ID, e.g., "P3".""" - - project_uid_num: int - """Project number, e.g., 3.""" - - type: str - """Job type identifier, e.g., "class2d".""" - - job_type: str - """Alias for type key""" - - title: str - """Human-readable job title.""" - - description: str - """Human-readable job markdown description.""" - - status: JobStatus - """Job scheduling status, e.g., "building", "queued", "running".""" - - created_at: str - """Job creation date in ISO 8601 format.""" - - created_by_user_id: Optional[str] - """Object ID of user account that created this job.""" - - deleted: bool - """True if the job has been marked as deleted.""" - - parents: List[str] - """List of parent jobs UIDs based on input connections.""" - - children: List[str] - """List of child job UIDs based on output connections.""" - - input_slot_groups: List[InputSlotGroup] - """Input group specifications, including schema and connection information.""" - - output_result_groups: List[OutputResultGroup] - """Output group specifications.""" - - output_results: List[OutputResult] - """Aggregated output results specification (similar to - ``output_result_groups`` with additional field information).""" - - params_base: Dict[str, Union[Param, EnumParam, PathParam]] - """Job param specification and their base values. Each key represents a - parameter name.""" - - params_spec: Dict[str, ParamSpec] - """User-specified parameter values. Each key is a parameter value. Not all - keys from ``params_base`` are included here, only ones that were explicitly - set.""" - - params_secs: Dict[str, ParamSection] - """Parameter section definitions""" - - workspace_uids: List[str] - """List of workspace UIDs this job belongs to.""" - - -class WorkspaceDocument(TypedDict): - """ - Specification for a Workspace document from the MongoDB database. - Live-related fields are not yet included. - """ - - _id: str - """MongoDB ID""" - - uid: str - """Workspace unique ID, e.g., "W1".""" - - uid_num: int - """Workspace number, e.g., 1.""" - - project_uid: str - """Project unique ID, e.g., "P3".""" - - project_uid_num: int - """Project number, e.g., 3.""" - - created_at: str - """Workspace creation date in ISO 8601 format.""" - - created_by_user_id: str - """Object ID of user account that created this workspace.""" - - deleted: bool - """True if the workspace has been marked as deleted.""" - - title: str - """Human-readable workspace title.""" - - description: Optional[str] - """Human-readable workspace markdown description.""" - - workspace_type: Literal["base", "live"] - """Either "live" or "base". """ - - -class ResourceSlots(TypedDict): - """ - Listings of available resources on a worker node that may be allocated for - scheduling. - """ - - CPU: List[int] - """List of available CPU core indices.""" - GPU: List[int] - """List of available GPU indices.""" - RAM: List[int] - """List of available 8GB slots.""" - - -class FixedResourceSlots(TypedDict): - """ - Available resource slots that only indicate presence, not the amount that - may be allocated. (i.e., "SSD is available or not available") - """ - - SSD: bool - """Whether this target thas an SSD""" - - -class Gpu(TypedDict): - """ - GPU details for a target. - """ - - id: int - """Index of GPU. Generally based on which PCI slot the GPU occupies.""" - name: str - """Identifiable model name for this GPU, e.g.,"GeForce RTX 3090".""" - mem: int - """Amount of memory available on this GPU, in bytes.""" - - -class SchedulerLane(TypedDict): - """ - Description for a CryoSPARC scheduler lane. - """ - - name: str - """Identifier for this lane.""" - type: Literal["node", "cluster"] - """What kind of lane this is based on how on what kind of target(s) it contains.""" - title: str - """Human-readable lane title.""" - desc: str - """Human-readable lane description.""" - - -class BaseSchedulerTarget(TypedDict): - """ - Properties shared by both node and cluster scheduler targets. - """ - - lane: str - """Lane name this target belongs to.""" - - name: str - """Identifier for this target.""" - - title: str - """Human-readable title for this target.""" - - desc: Optional[str] - """Human-readable description for this target.""" - - hostname: str - """Network machine hostname (same as name for for clusters).""" - - worker_bin_path: str - """Path to cryosparc_worker/bin/cryosparcw executable.""" - - cache_path: Optional[str] - """Path the SSD cache scratch directory, if applicable.""" - - cache_reserve_mb: int # 10G default - """Ensure at least this much space is free on the SSD scratch drive before - caching.""" - - cache_quota_mb: int - """Do not cache more than this amoun on the SSD scrath drive..""" - - -class SchedulerTargetNode(BaseSchedulerTarget): - """ - node-type scheduler target that does not include GPUs. Inherits from - BaseSchedulerTarget_. - - .. _BaseSchedulerTarget: - #cryosparc.spec.BaseSchedulerTarget - """ - - type: Literal["node"] - """Node scheduler targets have type "node".""" - - ssh_str: str - """Shell command used to access this node, e.g., ``ssh cryosparcuser@worker``.""" - - resource_slots: ResourceSlots - """Available compute resources.""" - - resource_fixed: FixedResourceSlots - """Available fixed resources.""" - - monitor_port: Optional[int] - """Not used.""" - - -class SchedulerTargetGpuNode(SchedulerTargetNode): - """ - node-type scheduler target that includes GPUs. Inherits from - BaseSchedulerTarget_ and SchedulerTargetNode_. - - .. _BaseSchedulerTarget: - #cryosparc.spec.BaseSchedulerTarget - .. _SchedulerTargetNode: - #cryosparc.spec.SchedulerTargetNode - """ - - gpus: List[Gpu] - """Details about GPUs available on this node.""" - - -class SchedulerTargetCluster(BaseSchedulerTarget): - """ - Cluster-type scheduler targets. Inherits from BaseSchedulerTarget_. - - .. _BaseSchedulerTarget: - #cryosparc.spec.BaseSchedulerTarget - """ - - type: Literal["cluster"] - """Cluster scheduler targets have type "cluster".""" - - script_tpl: str - """Full cluster submission script Jinja template.""" - - send_cmd_tpl: str - """Template command to access the cluster and running commands.""" - - qsub_cmd_tpl: str - """Template command to submit jobs to the cluster.""" - - qstat_cmd_tpl: str - """Template command to check the cluster job by its ID.""" - - qdel_cmd_tpl: str - """Template command to delete cluster jobs.""" - - qinfo_cmd_tpl: str - """Template command to check cluster queue info.""" - + [ + "blob", + "?locations", + {"name": "components_mode_0", "dtype": "components"}, + {"name": "components_mode_1", "dtype": "components", "required": False}, + {"name": "components_mode_2", "dtype": "components", "required": False}, + ] -SchedulerTarget = Union[SchedulerTargetNode, SchedulerTargetGpuNode, SchedulerTargetCluster] +Note that the ``required`` key only applies to input slots. """ -Scheduler target details. -""" - - -class JobSpec(TypedDict): - """ - Specification for a Job document from the CryoSPARC's job register. - """ - - name: str - """Job's machine-readable type, e.g., 'homo_abinit'.""" - title: str - """Job's human-readable name, e.g., 'Ab-Initio Reconstruction'.""" - shorttitle: str - """Short-version of name, e.g., 'Ab-Initio'.""" - description: str - """Detailed description of job type""" - - input_slot_groups: List[InputSlotGroup] - """Description of available inputs.""" - params_base: Dict[str, Union[Param, EnumParam, PathParam]] - """Description of available parameters.""" - params_secs: Dict[str, ParamSection] - """Description of parameter sections.""" - - is_interactive: bool - """If True, this job is requires interaction. "Curate Exposures" and "Select - 2D Classes" are examples of interactive jobs.""" - is_lightweight: bool - """If True, does job does not require GPUs and requires few-enough - resources that it can usually run directly on the master machine.""" - hidden: bool - """If True, job is not visible in the interface.""" - develop_only: bool - """If True, job is in development and not available to run.""" class JobSection(TypedDict): """ - Specification of available job types of a certain category. - - Examples: - - >>> { - ... "name": "refinement", - ... "title": "3D Refinement", - ... "description: "...", - ... "contains" : [ - ... "homo_refine", - ... "hetero_refine", - ... "nonuniform_refine", - ... "homo_reconstruct" - ... ] - ... } + Deprecated. Use :py:class:`~cryosparc.models.job_register.JobRegister` + instead. """ name: str - """Section identifier.""" title: str - """Human-readable section title.""" description: str - """Human-readable section description.""" contains: List[str] - """List of available job types in this category""" - - -class JobSpecSection(TypedDict): - """ - Similar to JobSection_, except each item in ``contains`` is a detailed - JobSpec_. - - - .. _JobSection: - #cryosparc.spec.JobSection - .. _JobSpec: - #cryosparc.spec.JobSpec - """ - - name: str - """Section identifier.""" - title: str - """Human-readable section title.""" - description: str - """Human-readable section description.""" - contains: List[JobSpec] - """List of job details available in this category""" - - -class MongoController(ABC, Generic[D]): - """ - Abstract base class for Project, Workspace, Job classes and any other types - that have underlying Mongo database documents. - - Generic type argument D is a typed dictionary definition for a Mongo - document. - - :meta private: - """ - - _doc: Optional[D] = None - - @property - def doc(self) -> D: - if not self._doc: - self.refresh() - assert self._doc, "Could not refresh database document" - return self._doc - - @abstractmethod - def refresh(self): - # Must be implemented in subclasses - return self diff --git a/cryosparc/stream.py b/cryosparc/stream.py index acd0deb7..07b8a393 100644 --- a/cryosparc/stream.py +++ b/cryosparc/stream.py @@ -4,33 +4,63 @@ from pathlib import PurePath from typing import ( IO, - Any, - AsyncGenerator, - AsyncIterable, + TYPE_CHECKING, AsyncIterator, + Awaitable, BinaryIO, - Generator, - Iterable, Iterator, Optional, Protocol, Union, + overload, ) -from typing_extensions import Self +if TYPE_CHECKING: + from typing_extensions import Buffer, Self +from .constants import EIGHT_MIB +from .util import bopen -class AsyncBinaryIO(Protocol): - async def read(self, n: Optional[int] = None) -> bytes: ... +class AsyncReadable(Protocol): + """Any object that has an async read(size) method""" -class BinaryIteratorIO(BinaryIO): + def read(self, size: int = ..., /) -> Awaitable[bytes]: ... + + +class AsyncWritable(Protocol): + """Any object that has an async write(buffer) method""" + + def write(self, b: "Buffer", /) -> Awaitable[int]: ... + + +class AsyncBinaryIterator(Protocol): + """ + Any object that asynchronously yields bytes when iterated e.g.:: + + async for chunk in obj: + print(chunk.decode()) + """ + + def __aiter__(self) -> AsyncIterator[bytes]: ... + def __anext__(self) -> Awaitable[bytes]: ... + + +class BinaryIteratorIO(BinaryIO, Iterator[bytes]): """Read through a iterator that yields bytes as if it was a file""" - def __init__(self, iter: Union[Iterator[bytes], Generator[bytes, Any, Any]]): + def __init__(self, iter: Iterator[bytes]): self._iter = iter self._left = b"" + def __iter__(self): + assert not self._left, "Cannot iterate over a stream that has already been read" + return iter(self._iter) + + def __next__(self): + assert not self._left, "Cannot iterate over a stream that has already been read" + return next(self._iter) + def readable(self): return True @@ -65,13 +95,21 @@ def read(self, n: Optional[int] = None): return b"".join(out) -class AsyncBinaryIteratorIO(AsyncBinaryIO): +class AsyncBinaryIteratorIO(AsyncReadable, AsyncBinaryIterator, AsyncIterator[bytes]): """Similar to BinaryIteratorIO except the iterator yields bytes asynchronously""" - def __init__(self, iter: Union[AsyncIterator[bytes], AsyncGenerator[bytes, Any]]): + def __init__(self, iter: AsyncBinaryIterator): self._iter = iter self._left = b"" + def __aiter__(self): + assert not self._left, "Cannot iterate over a stream that has already been read" + return self._iter.__aiter__() + + def __anext__(self): + assert not self._left, "Cannot iterate over a stream that has already been read" + return self._iter.__anext__() + def readable(self): return True @@ -120,25 +158,26 @@ def api_schema(cls): instance in the request or response body. """ return { - "description": f"A binary stream representing a CryoSPARC {cls.__name__}", + "description": f"A binary stream representing a {cls.__name__} class instance", "content": {cls.media_type: {"schema": {"title": cls.__name__, "type": "string", "format": "binary"}}}, } @classmethod @abstractmethod - def load(cls, file: Union[str, PurePath, IO[bytes]]) -> "Self": + def load(cls, file: Union[str, PurePath, IO[bytes]], *, media_type: Optional[str] = None) -> "Self": """ - The given stream param must at least implement an async read method + Load stream from a file path or readable byte stream. The stream must + at least implement the `read(size)` function. """ ... @classmethod - def from_iterator(cls, source: Iterator[bytes]): - return cls.load(BinaryIteratorIO(source)) + def from_iterator(cls, source: Iterator[bytes], *, media_type: Optional[str] = None): + return cls.load(BinaryIteratorIO(source), media_type=media_type) @classmethod @abstractmethod - async def from_async_stream(cls, stream: AsyncBinaryIO) -> "Self": + async def from_async_stream(cls, stream: AsyncReadable, *, media_type: Optional[str] = None) -> "Self": """ Asynchronously load from the given binary stream. The given stream parameter must at least have ``async read(n: int | None) -> bytes`` method. @@ -146,12 +185,111 @@ async def from_async_stream(cls, stream: AsyncBinaryIO) -> "Self": ... @classmethod - async def from_async_iterator(cls, iterator: Union[AsyncIterator[bytes], AsyncGenerator[bytes, None]]): - return await cls.from_async_stream(AsyncBinaryIteratorIO(iterator)) + async def from_async_iterator(cls, iterator: AsyncBinaryIterator, *, media_type: Optional[str] = None): + return await cls.from_async_stream(AsyncBinaryIteratorIO(iterator), media_type=media_type) @abstractmethod - def stream(self) -> Iterable[bytes]: ... + def stream(self) -> Iterator[bytes]: ... - async def astream(self) -> AsyncIterable[bytes]: + async def astream(self) -> AsyncIterator[bytes]: for chunk in self.stream(): yield chunk + + def save(self, file: Union[str, PurePath, IO[bytes]]): + with bopen(file, "wb") as f: + self.dump(f) + + def dump(self, file: IO[bytes]): + for chunk in self.stream(): + file.write(chunk) + + def dumps(self) -> bytes: + return b"".join(self.stream()) + + async def adump(self, file: Union[IO[bytes], AsyncWritable]): + async for chunk in self.astream(): + result = file.write(chunk) + if isinstance(result, Awaitable): + await result + + async def adumps(self) -> bytes: + from io import BytesIO + + data = BytesIO() + await self.adump(data) + return data.getvalue() + + +class Stream(Streamable): + """ + Generic stream that that leaves handling of the stream data to the caller. + May accept stream data in any streamable format, though async formats + must be consumed with async functions. + """ + + @overload + def __init__(self, *, stream: IO[bytes] = ..., media_type: Optional[str] = ...): ... + @overload + def __init__(self, *, iterator: Iterator[bytes] = ..., media_type: Optional[str] = ...): ... + @overload + def __init__(self, *, astream: AsyncReadable = ..., media_type: Optional[str] = ...): ... + @overload + def __init__(self, *, aiterator: AsyncBinaryIterator = ..., media_type: Optional[str] = ...): ... + def __init__( + self, + *, + stream: Optional[IO[bytes]] = None, + iterator: Optional[Iterator[bytes]] = None, + astream: Optional[AsyncReadable] = None, + aiterator: Optional[AsyncBinaryIterator] = None, + media_type: Optional[str] = None, + ): + if (stream is not None) + (iterator is not None) + (astream is not None) + (aiterator is not None) != 1: + raise TypeError("Exactly one of stream, iterator, astream or aiterator must be provided") + self._stream = stream + self._iterator = iterator + self._astream = astream + self._aiterator = aiterator + self.media_type = media_type or self.media_type + + @property + def asynchronous(self): + return (self._astream is not None) or (self._aiterator is not None) + + @classmethod + def load(cls, file: Union[str, PurePath, IO[bytes]], *, media_type: Optional[str] = None): + stream = open(file, "rb") if isinstance(file, (str, PurePath)) else file + return cls(stream=stream, media_type=media_type) + + @classmethod + def from_iterator(cls, source: Iterator[bytes], *, media_type: Optional[str] = None): + return cls(iterator=source, media_type=media_type) + + @classmethod + async def from_async_stream(cls, stream: AsyncReadable, *, media_type: Optional[str] = None): + return cls(astream=stream, media_type=media_type) + + @classmethod + async def from_async_iterator(cls, iterator: AsyncBinaryIterator, *, media_type: Optional[str] = None): + return cls(aiterator=iterator, media_type=media_type) + + def stream(self) -> Iterator[bytes]: + if self._stream: + while chunk := self._stream.read(EIGHT_MIB): + yield chunk + elif self._iterator: + for chunk in self._iterator: + yield chunk + else: + raise TypeError("This is an asynchronous stream, must use astream() instead") + + async def astream(self) -> AsyncIterator[bytes]: + if self._stream or self._iterator: + for chunk in self.stream(): + yield chunk + elif self._astream: + while chunk := await self._astream.read(EIGHT_MIB): + yield chunk + elif self._aiterator: + async for chunk in self._aiterator: + yield chunk diff --git a/cryosparc/stream_registry.py b/cryosparc/stream_registry.py index 414a2195..5a5d751a 100644 --- a/cryosparc/stream_registry.py +++ b/cryosparc/stream_registry.py @@ -1,4 +1,6 @@ from .dataset import Dataset from .registry import register_stream_class +from .stream import Stream register_stream_class(Dataset) +register_stream_class(Stream) diff --git a/cryosparc/tools.py b/cryosparc/tools.py index 0369f189..3b73d8e7 100644 --- a/cryosparc/tools.py +++ b/cryosparc/tools.py @@ -22,36 +22,41 @@ import os import re import tempfile +import warnings +from contextlib import contextmanager +from functools import cached_property +from hashlib import sha256 from io import BytesIO -from pathlib import Path, PurePath, PurePosixPath -from typing import IO, TYPE_CHECKING, Any, Container, Dict, Iterable, List, Optional, Tuple, Union -from warnings import warn +from pathlib import PurePath, PurePosixPath +from typing import IO, TYPE_CHECKING, Any, Container, Dict, Iterable, List, Optional, Tuple, Union, get_args import numpy as n -from .errors import InvalidSlotsError +from . import __version__, model_registry, mrc, registry, stream_registry +from .api import APIClient +from .controller import as_output_slot +from .dataset import CSDAT_FORMAT, DEFAULT_FORMAT, Dataset +from .job import ExternalJobController, JobController +from .models.asset import GridFSFile +from .models.external import ExternalOutputSpec +from .models.job_register import JobRegister +from .models.job_spec import Category, OutputRef, OutputSpec +from .models.scheduler_lane import SchedulerLane +from .models.scheduler_target import SchedulerTarget +from .models.user import User +from .project import ProjectController +from .row import R +from .spec import Datatype, JobSection, SlotSpec +from .stream import BinaryIteratorIO, Stream +from .util import clear_cached_property, padarray, print_table, trimarray +from .workspace import WorkspaceController if TYPE_CHECKING: - from numpy.typing import NDArray # type: ignore + from numpy.typing import NDArray -from . import __version__, mrc -from .command import CommandClient, CommandError, make_json_request, make_request -from .dataset import DEFAULT_FORMAT, Dataset -from .job import ExternalJob, Job -from .project import Project -from .row import R -from .spec import ( - ASSET_EXTENSIONS, - AssetDetails, - Datatype, - JobSection, - JobSpecSection, - SchedulerLane, - SchedulerTarget, - SlotSpec, -) -from .util import bopen, noopcontext, padarray, print_table, trimarray -from .workspace import Workspace +assert stream_registry +assert model_registry +registry.finalize() # no more models may be registered after this ONE_MIB = 2**20 # bytes in one mebibyte @@ -79,29 +84,28 @@ class CryoSPARC: High-level session class for interfacing with a CryoSPARC instance. Initialize with the host and base port of the running CryoSPARC instance. - This host and (at minimum) ``base_port + 2``, ``base_port + 3`` and - ``base_port + 5`` should be accessible on the network. + This host and (at minimum) ``base_port + 2`` should be accessible on the + network. Args: - license (str, optional): CryoSPARC license key. Defaults to - ``os.getenv("CRYOSPARC_LICENSE_ID")``. + base_url (str, optional): CryoSPARC instance URL, e.g., + "http://localhost:39000" or "https://cryosparc.example.com". + Same URL used to access CryoSPARC from a web browser. host (str, optional): Hostname or IP address running CryoSPARC master. - Defaults to ``os.getenv("CRYOSPARC_MASTER_HOSTNAME", "localhost")``. - base_port (int, optional): CryoSPARC services base port number. Defaults - to ``os.getenv("CRYOSPARC_MASTER_HOSTNAME", 39000)``. + Cannot be specified with ``base_url``. Defaults to + ``os.getenv("CRYOSPARC_MASTER_HOSTNAME", "localhost")``. + base_port (int, optional): CryoSPARC services base port number. + Cannot be specified with ``base_url``. Defaults to + ``os.getenv("CRYOSPARC_BASE_PORT", 39000)``. email (str, optional): CryoSPARC user account email address. Defaults to ``os.getenv("CRYOSPARC_EMAIL")``. password (str, optional): CryoSPARC user account password address. Defaults to ``os.getenv("CRYOSPARC_PASSWORD")``. + license (str, optional): (Deprecated) CryoSPARC license key. Defaults to + ``os.getenv("CRYOSPARC_LICENSE_ID")``. timeout (int, optional): Timeout error for HTTP requests to CryoSPARC command services. Defaults to 300. - Attributes: - cli (CommandClient): HTTP/JSONRPC client for ``command_core`` service (port + 2). - vis (CommandClient): HTTP/JSONRPC client for ``command_vis`` service (port + 3). - rtp (CommandClient): HTTP/JSONRPC client for ``command_rtp`` service (port + 5). - user_id (str): Mongo object ID of user account performing operations for this session. - Examples: Load project job and micrographs @@ -132,59 +136,79 @@ class CryoSPARC: "J43" """ - cli: CommandClient - vis: CommandClient - rtp: CommandClient - user_id: str # session user ID + api: APIClient + """ + HTTP REST API client for ``api`` service (port + 2). + """ + + base_url: str + """ + URL used for communication CryoSPARC instance REST API. + """ def __init__( self, - license: str = os.getenv("CRYOSPARC_LICENSE_ID", ""), - host: str = os.getenv("CRYOSPARC_MASTER_HOSTNAME", "localhost"), - base_port: int = int(os.getenv("CRYOSPARC_BASE_PORT", 39000)), - email: str = os.getenv("CRYOSPARC_EMAIL", ""), - password: str = os.getenv("CRYOSPARC_PASSWORD", ""), + base_url: Optional[str] = os.getenv("CRYOSPARC_BASE_URL"), + *, + host: Optional[str] = os.getenv("CRYOSPARC_MASTER_HOSTNAME"), + base_port: Union[int, str, None] = os.getenv("CRYOSPARC_BASE_PORT"), + email: Optional[str] = os.getenv("CRYOSPARC_EMAIL"), + license: Optional[str] = os.getenv("CRYOSPARC_LICENSE_ID"), + password: Optional[str] = os.getenv("CRYOSPARC_PASSWORD"), timeout: int = 300, ): - assert LICENSE_REGEX.fullmatch(license), f"Invalid or unspecified CryoSPARC license ID {license}" - assert email, "Invalid or unspecified email" - assert password, "Invalid or unspecified password" - - self.cli = CommandClient( - service="command_core", - host=host, - port=base_port + 2, - headers={"License-ID": license}, - timeout=timeout, - ) - self.vis = CommandClient( - service="command_vis", - host=host, - port=base_port + 3, - headers={"License-ID": license}, - timeout=timeout, - ) - self.rtp = CommandClient( - service="command_rtp", - host=host, - port=base_port + 5, - headers={"License-ID": license}, - timeout=timeout, - ) + if license: + warnings.warn( + "Support for license argument and CRYOSPARC_LICENSE_ID environment variable " + "will be removed in a future release", + DeprecationWarning, + stacklevel=2, + ) + if not LICENSE_REGEX.fullmatch(license): + raise ValueError(f"Invalid CryoSPARC license ID {license}") + + if host and base_port: + if base_url: + raise TypeError("Cannot specify host and base_port when base_url is specified") + self.base_url = f"http://{host}:{int(base_port) + 2}" + elif base_url: + self.base_url = f"{base_url}/api" # app forwards to api service (TODO) + else: + raise TypeError("Must specify either base_url or host + base_port") + + auth = None + if email and password: + auth = (email, sha256(password.encode()).hexdigest()) + elif license: + auth = ("cryosparc", sha256(license.encode()).hexdigest()) + # TODO: also load auth from config profile + else: + raise ValueError( + "CryoSPARC authentication not provided. " + "Please see documentation at https://tools.cryosparc.com for instructions." + ) + + tools_major_minor_version = ".".join(__version__.split(".")[:2]) # e.g., 4.1.0 -> 4.1 try: - self.user_id = self.cli.get_id_by_email_password(email, password) # type: ignore - cs_version: str = self.cli.get_running_version() # type: ignore + self.api = APIClient(self.base_url, auth=auth, timeout=timeout) + assert self.user # trigger user profile fetch + cs_version = self.api.config.get_version() except Exception as e: - raise RuntimeError("Could not complete CryoSPARC authentication with given credentials") from e + raise RuntimeError( + f"Could not connect to CryoSPARC at {base_url} due to error:\n{e}\n" + "Please ensure your credentials are correct and that you are " + "connecting to a CryoSPARC version compatible with " + f"cryosparc-tools {tools_major_minor_version}. " + "Please see the documentation at https://tools.cryosparc.com for details." + ) from e if cs_version and VERSION_REGEX.match(cs_version): cs_major_minor_version = ".".join(cs_version[1:].split(".")[:2]) # e.g., v4.1.0 -> 4.1 - tools_major_minor_version = ".".join(__version__.split(".")[:2]) # e.g., 4.1.0 -> 4.1 tools_prerelease_url = "https://github.com/cryoem-uoft/cryosparc-tools/archive/refs/heads/develop.zip" if cs_major_minor_version != tools_major_minor_version: - warn( - f"CryoSPARC instance {host}:{base_port} with version {cs_version} " - f"may not be compatible with current cryosparc-tools version v{__version__}.\n\n" + warnings.warn( + f"CryoSPARC at {self.base_url} with version {cs_version} " + f"may not be compatible with current cryosparc-tools version {__version__}.\n\n" "To install a compatible version of cryosparc-tools:\n\n" f" pip install --force cryosparc-tools~={cs_major_minor_version}.0\n\n" "Or, if running a CryoSPARC pre-release or private beta:\n\n" @@ -192,6 +216,31 @@ def __init__( stacklevel=2, ) + @cached_property + def user(self) -> User: + """ + User account performing operations for this session. + """ + return self.api.users.me() + + @cached_property + def job_register(self) -> JobRegister: + """ + Information about jobs available on this instance. + """ + return self.api.job_register() + + def refresh(self): + """ + Reset cache and refresh instance details. + + Raises: + APIError: cannot be refreshed. + """ + clear_cached_property(self, "user") + clear_cached_property(self, "job_register") + assert self.user # ensure we can still fetch a user + def test_connection(self): """ Verify connection to CryoSPARC command services. @@ -199,28 +248,13 @@ def test_connection(self): Returns: bool: True if connection succeeded, False otherwise """ - if self.cli.test_connection(): # type: ignore - print(f"Connection succeeded to CryoSPARC command_core at {self.cli._url}") + if self.api.health() == "OK": + print(f"Connection succeeded to CryoSPARC API at {self.base_url}") + return True else: - print(f"Connection FAILED to CryoSPARC command_core at {self.cli._url}") + print(f"Connection FAILED to CryoSPARC API at {self.base_url}") return False - with make_request(self.vis, method="GET") as response: - if response.read(): - print(f"Connection succeeded to CryoSPARC command_vis at {self.vis._url}") - else: - print(f"Connection FAILED to CryoSPARC command_vis at {self.vis._url}") - return False - - with make_request(self.rtp, method="GET") as response: - if response.read(): - print(f"Connection succeeded to CryoSPARC command_rtp at {self.rtp._url}") - else: - print(f"Connection FAILED to CryoSPARC command_rtp at {self.rtp._url}") - return False - - return True - def get_lanes(self) -> List[SchedulerLane]: """ Get a list of available scheduler lanes. @@ -228,7 +262,7 @@ def get_lanes(self) -> List[SchedulerLane]: Returns: list[SchedulerLane]: Details about available lanes. """ - return self.cli.get_scheduler_lanes() # type: ignore + return self.api.resources.find_lanes() def get_targets(self, lane: Optional[str] = None) -> List[SchedulerTarget]: """ @@ -241,60 +275,63 @@ def get_targets(self, lane: Optional[str] = None) -> List[SchedulerTarget]: Returns: list[SchedulerTarget]: Details about available targets. """ - targets: List[SchedulerTarget] = self.cli.get_scheduler_targets() # type: ignore - if lane is not None: - targets = [t for t in targets if t["lane"] == lane] - return targets + return self.api.resources.find_targets(lane=lane) def get_job_sections(self) -> List[JobSection]: """ - Get a summary of job types available for this instance, organized by - category. + (Deprecated) Get a summary of job types available for this instance, + organized by category. Returns: list[JobSection]: List of job section dictionaries. Job types are listed in the ``"contains"`` key in each dictionary. """ - return self.cli.get_job_sections() # type: ignore - - def get_job_specs(self) -> List[JobSpecSection]: - """ - Get a detailed summary of job and their specification available on - this instance, organized by category. - - Returns: - - list[JobSpecSection]: List of job section dictionaries. Job specs - are listed in the ``"contains"`` key in each dictionary - """ - return self.cli.get_config_var("job_types_available") # type: ignore - - def print_job_types(self, section: Union[str, Container[str], None] = None, *, show_legacy: bool = False): + warnings.warn("Use job_register property instead", DeprecationWarning, stacklevel=2) + job_types_by_category = { + category: [spec.type for spec in self.job_register.specs if spec.category == category] + for category in get_args(Category) + } + return [ + {"name": category, "title": category.replace("_", " ").title(), "description": "", "contains": job_types} + for category, job_types in job_types_by_category.items() + ] + + def print_job_types( + self, + category: Union[Category, Container[Category], None] = None, + *, + show_legacy: bool = False, + ): """ Print a table of job types and their titles, organized by category. Args: - section (str | list[str], optional): Only show jobs from the given - section or list of sections. Defaults to None. + category (Category | list[Category], optional): Only show jobs from + the given category or list of categories. Defaults to None. show_legacy (bool, optional): If True, also show legacy jobs. Defaults to False. """ - allowed_sections = {section} if isinstance(section, str) else section - sections = self.get_job_specs() - headings = ["Section", "Job", "Title"] + allowed_categories = {category} if isinstance(category, str) else category + register = self.job_register + headings = ["Category", "Job", "Title"] rows = [] - for sec in sections: - if allowed_sections is not None and sec["name"] not in allowed_sections: + prev_category = None + for job_spec in register.specs: + if allowed_categories is not None and job_spec.category not in allowed_categories: + continue + if job_spec.hidden or job_spec.stability == "obsolete": + continue + if not show_legacy and job_spec.stability == "legacy": continue - sec_name = sec["name"] - for job in sec["contains"]: - if job["hidden"] or job["develop_only"] or not show_legacy and "(LEGACY)" in job["title"]: - continue - rows.append([sec_name, job["name"], job["title"]]) - sec_name = "" + + category = job_spec.category + display_category = "" if category == prev_category else category + rows.append([display_category, job_spec.type, job_spec.title]) + prev_category = category + print_table(headings, rows) - def find_project(self, project_uid: str) -> Project: + def find_project(self, project_uid: str) -> ProjectController: """ Get a project by its unique ID. @@ -302,13 +339,11 @@ def find_project(self, project_uid: str) -> Project: project_uid (str): Project unique ID, e.g., "P3" Returns: - Project: project instance + ProjectController: project accessor object """ - project = Project(self, project_uid) - project.refresh() - return project + return ProjectController(self, project_uid) - def find_workspace(self, project_uid: str, workspace_uid: str) -> Workspace: + def find_workspace(self, project_uid: str, workspace_uid: str) -> WorkspaceController: """ Get a workspace accessor instance for the workspace in the given project with the given UID. Fails with an error if workspace does not exist. @@ -318,12 +353,11 @@ def find_workspace(self, project_uid: str, workspace_uid: str) -> Workspace: workspace_uid (str): Workspace unique ID, e.g., "W1" Returns: - Workspace: accessor instance + WorkspaceController: workspace accessor object """ - workspace = Workspace(self, project_uid, workspace_uid) - return workspace.refresh() + return WorkspaceController(self, (project_uid, workspace_uid)) - def find_job(self, project_uid: str, job_uid: str) -> Job: + def find_job(self, project_uid: str, job_uid: str) -> JobController: """ Get a job by its unique project and job ID. @@ -332,13 +366,11 @@ def find_job(self, project_uid: str, job_uid: str) -> Job: job_uid (str): job unique ID, e.g., "J42" Returns: - Job: job instance + JobController: job accessor object """ - job = Job(self, project_uid, job_uid) - job.refresh() - return job + return JobController(self, (project_uid, job_uid)) - def find_external_job(self, project_uid: str, job_uid: str) -> ExternalJob: + def find_external_job(self, project_uid: str, job_uid: str) -> ExternalJobController: """ Get the External job accessor instance for an External job in this project with the given UID. Fails if the job does not exist or is not an @@ -352,15 +384,11 @@ def find_external_job(self, project_uid: str, job_uid: str) -> ExternalJob: TypeError: If job is not an external job Returns: - ExternalJob: accessor instance + ExternalJobController: external job accessor object """ - job = ExternalJob(self, project_uid, job_uid) - job.refresh() - if job.doc["job_type"] != "snowflake": - raise TypeError(f"Job {project_uid}-{job_uid} is not an external job") - return job + return ExternalJobController(self, (project_uid, job_uid)) - def create_workspace(self, project_uid: str, title: str, desc: Optional[str] = None) -> Workspace: + def create_workspace(self, project_uid: str, title: str, desc: Optional[str] = None) -> WorkspaceController: """ Create a new empty workspace in the given project. @@ -370,12 +398,13 @@ def create_workspace(self, project_uid: str, title: str, desc: Optional[str] = N desc (str, optional): Markdown text description. Defaults to None. Returns: - Workspace: created workspace instance + WorkspaceController: created workspace accessor object + + Raises: + APIError: Workspace cannot be created. """ - workspace_uid: str = self.cli.create_empty_workspace( # type: ignore - project_uid=project_uid, created_by_user_id=self.user_id, title=title, desc=desc - ) - return self.find_workspace(project_uid, workspace_uid) + workspace = self.api.workspaces.create(project_uid, title=title, description=desc) + return WorkspaceController(self, workspace) def create_job( self, @@ -384,12 +413,12 @@ def create_job( type: str, connections: Dict[str, Union[Tuple[str, str], List[Tuple[str, str]]]] = {}, params: Dict[str, Any] = {}, - title: Optional[str] = None, - desc: Optional[str] = None, - ) -> Job: + title: str = "", + desc: str = "", + ) -> JobController: """ - Create a new job with the given type. Use `CryoSPARC.get_job_sections`_ - to query available job types on the connected CryoSPARC instance. + Create a new job with the given type. Use :py:attr:`job_register` + to find available job types on the connected CryoSPARC instance. Args: project_uid (str): Project UID to create job in, e.g., "P3" @@ -400,11 +429,14 @@ def create_job( value is a (job uid, output name) tuple. Defaults to {} params (dict[str, any], optional): Specify parameter values. Defaults to {}. - title (str, optional): Job title. Defaults to None. - desc (str, optional): Job markdown description. Defaults to None. + title (str, optional): Job title. Defaults to "". + desc (str, optional): Job markdown description. Defaults to "". Returns: - Job: created job instance. Raises error if job cannot be created. + JobController: created job accessor object. + + Raises: + APIError: Job cannot be created. Examples: @@ -422,49 +454,43 @@ def create_job( ... connections={"particles": ("J20", "particles_selected")} ... params={"abinit_K": 3} ... ) - - .. _CryoSPARC.get_job_sections: - #cryosparc.tools.CryoSPARC.get_job_sections """ - conn = {k: (v if isinstance(v, list) else [v]) for k, v in connections.items()} - conn = {k: [".".join(i) for i in v] for k, v in conn.items()} - job_uid: str = self.cli.make_job( # type: ignore - job_type=type, - project_uid=project_uid, - workspace_uid=workspace_uid, - user_id=self.user_id, - title=title, - desc=desc, - params=params, - input_group_connects=conn, - ) - return self.find_job(project_uid, job_uid) + job = self.api.jobs.create(project_uid, workspace_uid, params=params, type=type, title=title, description=desc) + for input_name, connection in connections.items(): + connection = [connection] if isinstance(connection, tuple) else connection + for source_job_uid, source_output_name in connection: + job = self.api.jobs.connect( + job.project_uid, + job.uid, + input_name, + source_job_uid=source_job_uid, + source_output_name=source_output_name, + ) + return JobController(self, job) def create_external_job( self, project_uid: str, workspace_uid: str, - title: Optional[str] = None, - desc: Optional[str] = None, - ) -> ExternalJob: + title: str = "", + desc: str = "", + ) -> ExternalJobController: """ Add a new External job to this project to save generated outputs to. - Args: - project_uid (str): Project UID to create in, e.g., "P3" - workspace_uid (str): Workspace UID to create job in, e.g., "W1" - title (str, optional): Title for external job (recommended). - Defaults to None. - desc (str, optional): Markdown description for external job. - Defaults to None. + Args: + project_uid (str): Project UID to create in, e.g., "P3" + workspace_uid (str): Workspace UID to create job in, e.g., "W1" + title (str, optional): Title for external job (recommended). + Defaults to "". + desc (str, optional): Markdown description for external job. + Defaults to "". - Returns: - ExternalJob: created external job instance + Returns: + ExternalJobController: created external job accessor object """ - job_uid: str = self.vis.create_external_job( # type: ignore - project_uid=project_uid, workspace_uid=workspace_uid, user=self.user_id, title=title, desc=desc - ) - return self.find_external_job(project_uid, job_uid) + job = self.api.jobs.create(project_uid, workspace_uid, type="snowflake", title=title, description=desc) + return ExternalJobController(self, job) def save_external_result( self, @@ -475,8 +501,8 @@ def save_external_result( name: Optional[str] = None, slots: Optional[List[SlotSpec]] = None, passthrough: Optional[Tuple[str, str]] = None, - title: Optional[str] = None, - desc: Optional[str] = None, + title: str = "", + desc: str = "", ) -> str: """ Save the given result dataset to the project. Specify at least the @@ -539,16 +565,14 @@ def save_external_result( passthrough (tuple[str, str], optional): Indicates that this output inherits slots from the specified output. e.g., ``("J1", "particles")``. Defaults to None. - title (str, optional): Human-readable title for this output. - Defaults to None. + Defaults to "". desc (str, optional): Markdown description for this output. Defaults - to None. + to "". Raises: - CommandError: General CryoSPARC network access error such as + APIError: General CryoSPARC network access error such as timeout, URL or HTTP - InvalidSlotsError: slots argument is invalid Returns: str: UID of created job where this output was saved @@ -558,35 +582,47 @@ def save_external_result( prefixes = dataset.prefixes() if slots is None: slots = list(prefixes) - slot_names = {s if isinstance(s, str) else s["prefix"] for s in slots} - assert slot_names.intersection(prefixes) == slot_names, "Given dataset missing required slots" - - passthrough_str = ".".join(passthrough) if passthrough else None - try: - job_uid, output = self.vis.create_external_result( # type: ignore - project_uid=project_uid, - workspace_uid=workspace_uid, - type=type, + elif any(isinstance(s, dict) and "prefix" in s for s in slots): + warnings.warn("'prefix' slot key is deprecated. Use 'name' instead.", DeprecationWarning, stacklevel=2) + + # Normalize slots to OutputSlot or strings + output_slots = [s if isinstance(s, str) else as_output_slot(s) for s in slots] + required_slot_names = {s if isinstance(s, str) else s.name for s in output_slots} + missing_slot_names = required_slot_names.difference(prefixes) + if missing_slot_names: + raise ValueError(f"Given dataset missing required slots: {', '.join(missing_slot_names)}") + + if not name: + name = type + if not title: + title = name.replace("_", " ").title() + + # Find the most recent workspace or create a new one if the project is empty + if workspace_uid is None: + # TODO: limit find to one workspace + workspaces = self.api.workspaces.find(project_uid=[project_uid], order=-1) + workspace = workspaces[0] if workspaces else self.api.workspaces.create(project_uid, title=title) + workspace_uid = workspace.uid + + job = self.api.jobs.create_external_result( + project_uid, + workspace_uid, + ExternalOutputSpec( name=name, - slots=slots, - passthrough=passthrough_str, - user=self.user_id, - title=title, - desc=desc, - ) - except CommandError as err: - if err.code == 422 and err.data and "slots" in err.data: - raise InvalidSlotsError("save_external_result", err.data["slots"]) from err - raise - - job = self.find_external_job(project_uid, job_uid) + spec=OutputSpec(type=type, title=title, description=desc, slots=output_slots), + connection=OutputRef(job_uid=passthrough[0], output=passthrough[1]) if passthrough else None, + ), + ) + job = ExternalJobController(self, job) with job.run(): - job.save_output(output, dataset) - + job.save_output(name, dataset) return job.uid def list_files( - self, project_uid: str, prefix: Union[str, PurePosixPath] = "", recursive: bool = False + self, + project_uid: str, + prefix: Union[str, PurePosixPath] = "", + recursive: bool = False, ) -> List[str]: """ Get a list of files inside the project directory. @@ -601,12 +637,9 @@ def list_files( Returns: list[str]: List of file paths relative to the project directory. """ - return self.vis.list_project_files( # type: ignore - project_uid=project_uid, - prefix=str(prefix), - recursive=recursive, - ) + return self.api.projects.ls(project_uid, path=str(prefix), recursive=recursive) + @contextmanager def download(self, project_uid: str, path: Union[str, PurePosixPath]): """ Open a file in the given project for reading. Use to get files from a @@ -631,8 +664,9 @@ def download(self, project_uid: str, path: Union[str, PurePosixPath]): """ if not path: raise ValueError("Download path cannot be empty") - data = {"project_uid": project_uid, "path": str(path)} - return make_json_request(self.vis, "/get_project_file", data=data) + stream = self.api.projects.download_file(project_uid, path=str(path)) + iterator = BinaryIteratorIO(stream.stream()) + yield iterator def download_file( self, @@ -655,16 +689,8 @@ def download_file( Returns: Path | IO: resulting target path or file handle. """ - if isinstance(target, (str, PurePath)): - target = Path(target) - if target.is_dir(): - target /= PurePath(path).name - with bopen(target, "wb") as f: - with self.download(project_uid, path) as response: - data = response.read(ONE_MIB) - while data: - f.write(data) - data = response.read(ONE_MIB) + stream = self.api.projects.download_file(project_uid, path=str(path)) + stream.save(target) return target def download_dataset(self, project_uid: str, path: Union[str, PurePosixPath]): @@ -679,26 +705,16 @@ def download_dataset(self, project_uid: str, path: Union[str, PurePosixPath]): Returns: Dataset: Loaded dataset instance """ - with self.download(project_uid, path) as response: - size = response.headers.get("Content-Length") - mime = response.headers.get("Content-Type") - if mime == "application/x-cryosparc-dataset": - # Stream format; can load directly without seek - return Dataset.load(response) - - # Numpy format, cannot load directly because requires seekable - if size and int(size) < ONE_MIB: - # Smaller than 1MiB, just read all into memory and load - return Dataset.load(BytesIO(response.read())) - - # Read into temporary file in 1MiB chunks. Load from that temporary file - with tempfile.TemporaryFile("w+b", suffix=".cs") as f: - data = response.read(ONE_MIB) - while data: - f.write(data) - data = response.read(ONE_MIB) - f.seek(0) - return Dataset.load(f) + stream = self.api.projects.download_file(project_uid, path=str(path)) + if stream.media_type == "application/x-cryosparc-dataset": + # Stream format; can load directly without seek + return Dataset.from_iterator(stream.stream()) + + # Numpy format, cannot load directly because requires seekable. Load from that temporary file + with tempfile.TemporaryFile("w+b", suffix=".cs") as f: + stream.save(f) + f.seek(0) + return Dataset.load(f) def download_mrc(self, project_uid: str, path: Union[str, PurePosixPath]): """ @@ -712,16 +728,13 @@ def download_mrc(self, project_uid: str, path: Union[str, PurePosixPath]): Returns: tuple[Header, NDArray]: MRC file header and data as a numpy array """ - with self.download(project_uid, path) as response: - with tempfile.TemporaryFile("w+b", suffix=".cs") as f: - data = response.read(ONE_MIB) - while data: - f.write(data) - data = response.read(ONE_MIB) - f.seek(0) - return mrc.read(f) # FIXME: Optimize file reading - - def list_assets(self, project_uid: str, job_uid: str) -> List[AssetDetails]: + stream = self.api.projects.download_file(project_uid, path=str(path)) + with tempfile.TemporaryFile("w+b", suffix=".mrc") as f: + stream.save(f) + f.seek(0) + return mrc.read(f) # FIXME: Optimize file reading + + def list_assets(self, project_uid: str, job_uid: str) -> List[GridFSFile]: """ Get a list of files available in the database for given job. Returns a list with details about the assets. Each entry is a dict with a ``_id`` @@ -733,9 +746,9 @@ def list_assets(self, project_uid: str, job_uid: str) -> List[AssetDetails]: job_uid (str): job unique ID, e.g., "J42" Returns: - list[AssetDetails]: Asset details + list[GridFSFile]: Asset details """ - return self.vis.list_job_files(project_uid=project_uid, job_uid=job_uid) # type: ignore + return self.api.assets.find(project_uid=project_uid, job_uid=job_uid) def download_asset(self, fileid: str, target: Union[str, PurePath, IO[bytes]]): """ @@ -743,34 +756,21 @@ def download_asset(self, fileid: str, target: Union[str, PurePath, IO[bytes]]): Args: fileid (str): GridFS file object ID - target (str | Path | IO): Local file path, directory path or - writeable file handle to write response data. + target (str | Path | IO): Local file path or writeable file handle + to write response data. Returns: - Path | IO: resulting target path or file handle. + str | Path | IO: resulting target path or file handle. """ - with make_json_request(self.vis, url="/get_job_file", data={"fileid": fileid}) as response: - if isinstance(target, (str, PurePath)): - target = Path(target) - if target.is_dir(): - # Try to get download filename and content type from - # headers. If cannot be determined, defaults to "file.dat" - content_type: str = response.headers.get_content_type() - attachment_filename: Optional[str] = response.headers.get_filename() - target /= attachment_filename or f"file.{ASSET_EXTENSIONS.get(content_type, 'dat')}" # type: ignore - with bopen(target, "wb") as f: - data = response.read(ONE_MIB) - while data: - f.write(data) - data = response.read(ONE_MIB) - - return target + stream = self.api.assets.download(fileid) + stream.save(target) + return target def upload( self, project_uid: str, target_path: Union[str, PurePosixPath], - source: Union[str, bytes, PurePath, IO], + source: Union[str, bytes, PurePath, IO, Stream], *, overwrite: bool = False, ): @@ -782,21 +782,16 @@ def upload( project_uid (str): Project unique ID, e.g., "P3" target_path (str | Path): Name or path of file to write in project directory. - source (str | bytes | Path | IO): Local path or file handle to + source (str | bytes | Path | IO | Stream): Local path or file handle to upload. May also specified as raw bytes. overwrite (bool, optional): If True, overwrite existing files. Defaults to False. """ - url = f"/projects/{project_uid}/files" - query: dict = {"path": target_path} - if overwrite: - query["overwrite"] = 1 - with open(source, "rb") if isinstance(source, (str, PurePath)) else noopcontext(source) as f: - with make_request(self.vis, url=url, query=query, data=f) as res: - assert res.status >= 200 and res.status < 300, ( - f"Could not upload project {project_uid} file {target_path}.\n" - f"Response from CryoSPARC ({res.status}): {res.read().decode()}" - ) + if isinstance(source, bytes): + source = BytesIO(source) + if not isinstance(source, Stream): + source = Stream.load(source) + self.api.projects.upload_file(project_uid, source, path=str(target_path), overwrite=overwrite) def upload_dataset( self, @@ -821,6 +816,9 @@ def upload_dataset( overwrite (bool, optional): If True, overwrite existing files. Defaults to False. """ + if format == CSDAT_FORMAT: + return self.upload(project_uid, target_path, Stream.from_iterator(dset.stream()), overwrite=overwrite) + if len(dset) < 100: # Probably small enough to upload from memory f = BytesIO() @@ -881,8 +879,8 @@ def mkdir( existing directories. Still raises if the target path is not a directory. Defaults to False. """ - self.vis.project_mkdir( # type: ignore - project_uid=project_uid, + self.api.projects.mkdir( + project_uid, path=str(target_path), parents=parents, exist_ok=exist_ok, @@ -902,11 +900,7 @@ def cp(self, project_uid: str, source_path: Union[str, PurePosixPath], target_pa directory to copy into. If not specified, uses the same file name as the source. Defaults to "". """ - self.vis.project_cp( # type: ignore - project_uid=project_uid, - source_path=str(source_path), - target_path=str(target_path), - ) + self.api.projects.cp(project_uid, source=str(source_path), path=str(target_path)) def symlink( self, @@ -927,11 +921,7 @@ def symlink( directory. If not specified, creates link with the same file name as the source. Defaults to "". """ - self.vis.project_symlink( # type: ignore - project_uid=project_uid, - source_path=str(source_path), - target_path=str(target_path), - ) + self.api.projects.symlink(project_uid, source=str(source_path), path=str(target_path)) def get_import_signatures(abs_paths: Union[str, Iterable[str], "NDArray"]): diff --git a/cryosparc/util.py b/cryosparc/util.py index 941380e0..9a5eaa47 100644 --- a/cryosparc/util.py +++ b/cryosparc/util.py @@ -5,9 +5,7 @@ TYPE_CHECKING, Any, Callable, - ContextManager, Dict, - Generator, Generic, Iterator, List, @@ -26,7 +24,7 @@ if TYPE_CHECKING: from numpy.typing import NDArray # type: ignore -from .dtype import Shape +from .spec import Shape OpenTextMode = Literal["r", "w", "x", "a", "r+", "w+", "x+", "a+"] """ @@ -227,24 +225,6 @@ def bopen(file: Union[str, PurePath, IO[bytes]], mode: OpenBinaryMode = "rb"): yield file -@overload -def noopcontext() -> ContextManager[None]: ... -@overload -def noopcontext(x: T) -> ContextManager[T]: ... -@contextmanager -def noopcontext(x: Optional[T] = None) -> Generator[Optional[T], None, None]: - """ - Context manager that yields the given argument without modification. - - Args: - x (T, optional): Anything. Defaults to None. - - Yields: - T: the given argument - """ - yield x - - def padarray(arr: "NDArray", dim: Optional[int] = None, val: n.number = n.float32(0)): """ Pad the given 2D or 3D array so that the x and y dimensions are equal to the @@ -348,3 +328,12 @@ def print_table(headings: List[str], rows: List[List[str]]): print("=" * len(heading)) for row in rows: print(" | ".join(f"{v:{p}s}" for v, p in zip(row, pad))) + + +def clear_cached_property(obj: object, name: str): + """ + Clear object's @cached_property without accessing it when it's never been cached. + Object must have __dict__ key. + """ + if name in obj.__dict__: + delattr(obj, name) diff --git a/cryosparc/workspace.py b/cryosparc/workspace.py index f6a36c67..c392127c 100644 --- a/cryosparc/workspace.py +++ b/cryosparc/workspace.py @@ -1,50 +1,60 @@ +import warnings from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union +from .controller import Controller, as_output_slot from .dataset import Dataset -from .job import ExternalJob, Job +from .job import ExternalJobController, JobController +from .models.workspace import Workspace from .row import R -from .spec import Datatype, MongoController, SlotSpec, WorkspaceDocument +from .spec import Datatype, SlotSpec if TYPE_CHECKING: from .tools import CryoSPARC -class Workspace(MongoController[WorkspaceDocument]): +class WorkspaceController(Controller[Workspace]): """ Accessor class to a workspace in CryoSPARC with ability create jobs and save - results. Should be instantiated through `CryoSPARC.find_workspace`_ or - `Project.find_workspace`_. + results. Should be created with` + :py:meth:`cs.find_workspace() ` or + :py:meth:`project.find_workspace() `. - Attributes: - uid (str): Workspace unique ID, e.g., "W42" - project_uid (str): Project unique ID, e.g., "P3" - doc (WorkspaceDocument): All workspace data from the CryoSPARC database. - Database contents may change over time, use the `refresh`_ method - to update. - - .. _CryoSPARC.find_workspace: - tools.html#cryosparc.tools.CryoSPARC.find_workspace + Arguments: + workspace (tuple[str, str] | Workspace): either _(Project UID, Workspace UID)_ + tuple or Workspace model, e.g. ``("P3", "W4")`` - .. _Project.find_workspace: - project.html#cryosparc.project.Project.find_workspace + Attributes: + model (Workspace): All workspace data from the CryoSPARC database. + Contents may change over time, use :py:method:`refresh` to update. + """ - .. _refresh: - #cryosparc.workspace.Workspace.refresh + uid: str + """ + Workspace unique ID, e.g., "W42" + """ + project_uid: str + """ + Project unique ID, e.g., "P3" """ - def __init__(self, cs: "CryoSPARC", project_uid: str, uid: str) -> None: + def __init__(self, cs: "CryoSPARC", workspace: Union[Tuple[str, str], Workspace]) -> None: self.cs = cs - self.project_uid = project_uid - self.uid = uid + if isinstance(workspace, tuple): + self.project_uid, self.uid = workspace + self.refresh() + else: + self.project_uid = workspace.project_uid + self.uid = workspace.uid + self.model = workspace def refresh(self): """ Reload this workspace from the CryoSPARC database. Returns: - Workspace: self + WorkspaceController: self """ - self._doc = self.cs.cli.get_workspace(self.project_uid, self.uid) # type: ignore + self.model = self.cs.api.workspaces.find_one(self.project_uid, self.uid) return self def create_job( @@ -52,13 +62,13 @@ def create_job( type: str, connections: Dict[str, Union[Tuple[str, str], List[Tuple[str, str]]]] = {}, params: Dict[str, Any] = {}, - title: Optional[str] = None, - desc: Optional[str] = None, - ) -> Job: + title: str = "", + desc: str = "", + ) -> JobController: """ - Create a new job with the given type. Use the - `CryoSPARC.get_job_sections`_ method to query available job types on - the connected CryoSPARC instance. + Create a new job with the given type. Use + :py:attr:`cs.job_register ` + to find available job types on the connected CryoSPARC instance. Args: project_uid (str): Project UID to create job in, e.g., "P3" @@ -69,11 +79,11 @@ def create_job( value is a (job uid, output name) tuple. Defaults to {} params (dict[str, any], optional): Specify parameter values. Defaults to {}. - title (str, optional): Job title. Defaults to None. - desc (str, optional): Job markdown description. Defaults to None. + title (str, optional): Job title. Defaults to "". + desc (str, optional): Job markdown description. Defaults to "". Returns: - Job: created job instance. Raises error if job cannot be created. + JobController: created job instance. Raises error if job cannot be created. Examples: @@ -92,9 +102,6 @@ def create_job( ... connections={"particles": ("J20", "particles_selected")} ... params={"abinit_K": 3} ... ) - - .. _CryoSPARC.get_job_sections: - tools.html#cryosparc.tools.CryoSPARC.get_job_sections """ return self.cs.create_job( self.project_uid, self.uid, type, connections=connections, params=params, title=title, desc=desc @@ -102,21 +109,21 @@ def create_job( def create_external_job( self, - title: Optional[str] = None, - desc: Optional[str] = None, - ) -> ExternalJob: + title: str = "", + desc: str = "", + ) -> ExternalJobController: """ Add a new External job to this workspace to save generated outputs to. Args: workspace_uid (str): Workspace UID to create job in, e.g., "W1" title (str, optional): Title for external job (recommended). - Defaults to None. + Defaults to "". desc (str, optional): Markdown description for external job. - Defaults to None. + Defaults to "". Returns: - ExternalJob: created external job instance + ExternalJobController: created external job instance """ return self.cs.create_external_job(self.project_uid, self.uid, title, desc) @@ -127,8 +134,8 @@ def save_external_result( name: Optional[str] = None, slots: Optional[List[SlotSpec]] = None, passthrough: Optional[Tuple[str, str]] = None, - title: Optional[str] = None, - desc: Optional[str] = None, + title: str = "", + desc: str = "", ) -> str: """ Save the given result dataset to a workspace. @@ -146,9 +153,9 @@ def save_external_result( inherits slots from the specified output. e.g., ``("J1", "particles")``. Defaults to None. title (str, optional): Human-readable title for this output. - Defaults to None. + Defaults to "". desc (str, optional): Markdown description for this output. Defaults - to None. + to "". Returns: str: UID of created job where this output was saved. @@ -189,6 +196,10 @@ def save_external_result( ... ) "J45" """ + if slots and any(isinstance(s, dict) and "prefix" in s for s in slots): + warnings.warn("'prefix' slot key is deprecated. Use 'name' instead.", DeprecationWarning, stacklevel=2) + # convert to prevent from warning again + slots = [as_output_slot(slot) for slot in slots] # type: ignore return self.cs.save_external_result( self.project_uid, self.uid, diff --git a/pyproject.toml b/pyproject.toml index 34b17970..4ca4dd5e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,6 @@ dependencies = [ dev = [ "build", "cython", - "httpretty", "pre-commit", "pyright", "pytest-benchmark", diff --git a/tests/conftest.py b/tests/conftest.py index c0f45b34..796e9ef4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,17 +1,30 @@ -import json import shutil import urllib.request -from io import BytesIO +from datetime import datetime, timezone from pathlib import Path from time import time -from typing import Any, Dict +from unittest import mock -import httpretty import numpy as n import pytest -from cryosparc.dataset import CSDAT_FORMAT, Row +from cryosparc.api import APIClient from cryosparc.dataset import Dataset as BaseDataset +from cryosparc.dataset import Row +from cryosparc.models.job import Job +from cryosparc.models.job_spec import ( + Connection, + InputResult, + Inputs, + JobSpec, + Output, + OutputResult, + Outputs, + Params, +) +from cryosparc.models.project import Project +from cryosparc.models.user import Email, User +from cryosparc.project import ProjectController from cryosparc.tools import CryoSPARC from cryosparc.util import default_rng @@ -98,225 +111,6 @@ def shuffle(self): # fmt: on -@pytest.fixture -def mock_jsonrpc_procs_core() -> Dict[str, Any]: - """ - Dictionary of JSON RPC method names and their return values. Can override - existing values in subfixtures. - """ - return { - "hello_world": {"hello": "world"}, - "get_running_version": "develop", - "get_id_by_email_password": "6372a35e821ed2b71d9fe4e3", - "get_job": { - "uid": "J1", - "project_uid": "P1", - "job_type": "homo_abinit", - "title": "New Job", - "description": "", - "created_by_user_id": "6372a35e821ed2b71d9fe4e3", - "output_results": [ - { - "uid": "J1-R3", - "type": "particle.blob", - "group_name": "particles_class_0", - "name": "blob", - "title": "Particle data", - "description": "Particle raw data", - "min_fields": [ - ["path", "O"], - ["idx", "u4"], - ["shape", "2u4"], - ["psize_A", "f4"], - ["sign", "f4"], - ["import_sig", "u8"], - ], - "versions": [0, 100, 200, 300, 400, 500, 600, 700, 800, 863], - "metafiles": [ - "J1/J1_class_00_00000_particles.cs", - "J1/J1_class_00_00100_particles.cs", - "J1/J1_class_00_00200_particles.cs", - "J1/J1_class_00_00300_particles.cs", - "J1/J1_class_00_00400_particles.cs", - "J1/J1_class_00_00500_particles.cs", - "J1/J1_class_00_00600_particles.cs", - "J1/J1_class_00_00700_particles.cs", - "J1/J1_class_00_00800_particles.cs", - "J1/J1_class_00_final_particles.cs", - ], - "num_items": [90, 9090, 12421, 12421, 12421, 12421, 12421, 12421, 12421, 12421], - "passthrough": False, - }, - { - "uid": "J1-R4", - "type": "particle.ctf", - "group_name": "particles_class_0", - "name": "ctf", - "title": "Particle CTF parameters", - "description": "Particle CTF parameters", - "min_fields": [ - ["type", "O"], - ["exp_group_id", "u4"], - ["accel_kv", "f4"], - ["cs_mm", "f4"], - ["amp_contrast", "f4"], - ["df1_A", "f4"], - ["df2_A", "f4"], - ["df_angle_rad", "f4"], - ["phase_shift_rad", "f4"], - ["scale", "f4"], - ["scale_const", "f4"], - ["shift_A", "2f4"], - ["tilt_A", "2f4"], - ["trefoil_A", "2f4"], - ["tetra_A", "4f4"], - ["anisomag", "4f4"], - ["bfactor", "f4"], - ], - "versions": [0, 100, 200, 300, 400, 500, 600, 700, 800, 863], - "metafiles": [ - "J1/J1_class_00_00000_particles.cs", - "J1/J1_class_00_00100_particles.cs", - "J1/J1_class_00_00200_particles.cs", - "J1/J1_class_00_00300_particles.cs", - "J1/J1_class_00_00400_particles.cs", - "J1/J1_class_00_00500_particles.cs", - "J1/J1_class_00_00600_particles.cs", - "J1/J1_class_00_00700_particles.cs", - "J1/J1_class_00_00800_particles.cs", - "J1/J1_class_00_final_particles.cs", - ], - "num_items": [90, 9090, 12421, 12421, 12421, 12421, 12421, 12421, 12421, 12421], - "passthrough": False, - }, - { # Empty to test a partially incomplete job - "uid": "J1-R7", - "type": "particle.pick_stats", - "group_name": "particles_class_0", - "name": "pick_stats", - "title": "Passthrough pick_stats", - "description": "Passthrough from input particles.pick_stats (result_name)", - "min_fields": [["ncc_score", "f4"], ["power", "f4"], ["template_idx", "u4"], ["angle_rad", "f4"]], - "versions": [], - "metafiles": [], - "num_items": [], - "passthrough": True, - }, - { - "uid": "J1-R8", - "type": "particle.location", - "group_name": "particles_class_0", - "name": "location", - "title": "Passthrough location", - "description": "Passthrough from input particles.location (result_name)", - "min_fields": [ - ["micrograph_uid", "u8"], - ["exp_group_id", "u4"], - ["micrograph_path", "O"], - ["micrograph_shape", "2u4"], - ["center_x_frac", "f4"], - ["center_y_frac", "f4"], - ], - "versions": [0], - "metafiles": ["J1/J1_passthrough_particles_class_0.cs"], - "num_items": [12421], - "passthrough": True, - }, - { - "uid": "J1-R9", - "type": "volume.blob", - "group_name": "volume_class_0", - "name": "map", - "title": "Volume data", - "description": "Volume raw data", - "min_fields": [["path", "O"], ["shape", "3u4"], ["psize_A", "f4"]], - "versions": [0, 100, 200, 300, 400, 500, 600, 700, 800, 862], - "metafiles": [ - "J1/J1_class_00_00000_volume.cs", - "J1/J1_class_00_00100_volume.cs", - "J1/J1_class_00_00200_volume.cs", - "J1/J1_class_00_00300_volume.cs", - "J1/J1_class_00_00400_volume.cs", - "J1/J1_class_00_00500_volume.cs", - "J1/J1_class_00_00600_volume.cs", - "J1/J1_class_00_00700_volume.cs", - "J1/J1_class_00_00800_volume.cs", - "J1/J1_class_00_final_volume.cs", - ], - "num_items": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - "passthrough": False, - }, - ], - }, - "get_project_dir_abs": "/projects/my-project", - "get_project": {"uid": "P1", "title": "My Project"}, - "make_job": "J1", - "set_cluster_job_custom_vars": None, - "enqueue_job": "queued", - "job_send_streamlog": None, - "job_connect_group": True, - "job_set_param": True, - } - - -@pytest.fixture -def request_callback_core(mock_jsonrpc_procs_core): - def request_callback_core_fn(request, uri, response_headers): - body = json.loads(request.body) - mock_jsonrpc_procs_core["system.describe"] = {"procs": [{"name": m} for m in mock_jsonrpc_procs_core]} - response_headers["content-type"] = "application/json" - return [200, response_headers, json.dumps({"result": mock_jsonrpc_procs_core[body["method"]]})] - - return request_callback_core_fn - - -@pytest.fixture -def mock_jsonrpc_procs_vis() -> Dict[str, Any]: - return { - "hello_world": {"hello": "world"}, - } - - -@pytest.fixture -def request_callback_vis(mock_jsonrpc_procs_vis): - def request_callback_vis_fn(request, uri, response_headers): - body = json.loads(request.body) - - mock_jsonrpc_procs_vis["system.describe"] = {"procs": [{"name": m} for m in mock_jsonrpc_procs_vis]} - response_headers["content-type"] = "application/json" - return [200, response_headers, json.dumps({"result": mock_jsonrpc_procs_vis[body["method"]]})] - - return request_callback_vis_fn - - -def request_callback_vis_get_project_file(request, uri, response_headers): - body = json.loads(request.body) - data = b"" - dset = None - if body["project_uid"] == "P1" and body["path"] == "J1/J1_class_00_final_particles.cs": - dset = T20S_PARTICLES - elif body["project_uid"] == "P1" and body["path"] == "J1/J1_passthrough_particles_class_0.cs": - dset = T20S_PARTICLES_PASSTHROUGH - else: - raise RuntimeError(f"Unimplemented get_project_file pytest fixture for request body {body}") - - if dset: - bio = BytesIO() - dset.save(bio, format=CSDAT_FORMAT) - bio.seek(0) - data = bio.read() - - return [200, response_headers, data] - - -def request_callback_rtp(request, uri, response_headers): - body = json.loads(request.body) - procs: Dict[str, Any] = {"hello_world": {"hello": "world"}} - procs["system.describe"] = {"procs": [{"name": m} for m in procs]} - response_headers["content-type"] = "application/json" - return [200, response_headers, json.dumps({"result": procs[body["method"]]})] - - @pytest.fixture(scope="session") def big_dset_path(): basename = "bench_big_dset" @@ -427,21 +221,174 @@ def t20s_particles_passthrough(): @pytest.fixture -def cs(request_callback_core, request_callback_vis): - httpretty.enable(verbose=False, allow_net_connect=False) - httpretty.register_uri(httpretty.POST, "http://localhost:39002/api", body=request_callback_core) # type: ignore - httpretty.register_uri(httpretty.POST, "http://localhost:39003/api", body=request_callback_vis) # type: ignore - httpretty.register_uri( - httpretty.POST, - "http://localhost:39003/get_project_file", - body=request_callback_vis_get_project_file, # type: ignore +def mock_user(): + return User( + _id="6372a35e821ed2b71d9fe4e3", + name="test", + first_name="Testy", + last_name="Tester", + emails=[Email(address="test@example.com", verified=True)], + roles={"__global_roles__": ["admin"]}, + register_token=None, + allowed_prefix_dir="/", + created_at=datetime(2017, 1, 1, tzinfo=timezone.utc), ) - httpretty.register_uri(httpretty.POST, "http://localhost:39005/api", body=request_callback_rtp) # type: ignore - yield CryoSPARC(license="00000000-0000-0000-0000-000000000000", email="test@structura.bio", password="password") - httpretty.disable() - httpretty.reset() @pytest.fixture -def project(cs: CryoSPARC): +def cs(mock_user, monkeypatch): + monkeypatch.setattr(APIClient, "__call__", mock.Mock(return_value=None)) + APIClient.health = mock.Mock(return_value="OK") + APIClient.users = mock.MagicMock() + APIClient.config = mock.MagicMock() + APIClient.projects = mock.MagicMock() + APIClient.workspaces = mock.MagicMock() + APIClient.jobs = mock.MagicMock() + APIClient.users.me.return_value = mock_user + APIClient.config.get_version.return_value = "develop" + return CryoSPARC("https://cryosparc.example.com", email="structura@example.com", password="password") + + +@pytest.fixture +def mock_project(mock_user): + return Project( + _id="67292e95282b26b45d0e8fae", + uid="P1", + title="Test Project", + project_dir="/home/cryosparc/projects", + owner_user_id=mock_user.id, + size_last_updated=datetime.now(timezone.utc), + ) + + +@pytest.fixture +def project(cs: CryoSPARC, mock_project): + APIClient.projects.find_one.return_value = mock_project # type: ignore return cs.find_project("P1") + + +@pytest.fixture +def mock_new_job(mock_user, mock_project): + return Job( + _id="67743226e66c192db762b689", + uid="J42", + project_uid=mock_project.uid, + workspace_uids=["W1"], + job_dir="J42", + status="building", + created_by_user_id=mock_user.id, + spec=JobSpec( + type="homo_abinit", + params=Params(), + inputs=Inputs({"particles": []}), + outputs=Outputs( + { + "particles_class_0": Output( + type="particle", + results=[ + OutputResult(name="blob", dtype="blob"), + OutputResult(name="ctf", dtype="ctf"), + OutputResult(name="alignments3D", dtype="alignments3D"), + ], + ), + "volume_class_0": Output(type="volume", results=[OutputResult(name="map", dtype="blob")]), + } + ), + ), + ) + + +@pytest.fixture +def mock_params(): + return Params(abinit_K=1, generate_intermediate_results=True, random_seed=2056920808) + + +@pytest.fixture +def mock_new_job_with_params(mock_new_job: Job, mock_params: Params): + job = mock_new_job.model_copy(deep=True) + job.spec.params = mock_params + return job + + +@pytest.fixture +def mock_new_job_with_connection(mock_new_job_with_params: Job): + job = mock_new_job_with_params.model_copy(deep=True) + input_particles = Connection( + type="particle", + job_uid="J41", + output="particles", + results=[ + InputResult(name="blob", dtype="blob", job_uid="J42", output="particles", result="blob"), + InputResult(name="ctf", dtype="ctf", job_uid="J42", output="particles", result="ctf"), + # passthrough: + InputResult(name=None, dtype="location", job_uid="J42", output="particles", result="location"), + ], + ) + passthrough_result = OutputResult(name="location", dtype="location", passthrough=True) + job.spec.inputs.root["particles"] = [input_particles] + job.spec.outputs.root["particles_class_0"].results.append(passthrough_result) + return job + + +@pytest.fixture +def mock_job(mock_new_job_with_connection: Job): # completed + job = mock_new_job_with_connection.model_copy(update={"status": "completed"}, deep=True) + # fmt: off + output_particles_class_0 = Output( + type="particle", + results=[ + OutputResult( + name="blob", + dtype="blob", + versions=[0, 100, 200, 300, 400, 863], + metafiles=["J42/class_00_00000_particles.cs", "J42/class_00_00100_particles.cs", "J42/class_00_00200_particles.cs", "J42/class_00_00300_particles.cs", "J42/class_00_00400_particles.cs", "J42/class_00_final_particles.cs"], + num_items=[90, 9090, 10000, 10000, 10000, 10000], + ), + OutputResult( + name="ctf", + dtype="ctf", + versions=[0, 100, 200, 300, 400, 863], + metafiles=["J42/class_00_00000_particles.cs", "J42/class_00_00100_particles.cs", "J42/class_00_00200_particles.cs", "J42/class_00_00300_particles.cs", "J42/class_00_00400_particles.cs", "J42/class_00_final_particles.cs"], + num_items=[90, 9090, 10000, 10000, 10000, 10000], + ), + OutputResult( + name="alignments3D", + dtype="alignments3D", + versions=[0, 100, 200, 300, 400, 863], + metafiles=["J42/class_00_00000_particles.cs", "J42/class_00_00100_particles.cs", "J42/class_00_00200_particles.cs", "J42/class_00_00300_particles.cs", "J42/class_00_00400_particles.cs", "J42/class_00_final_particles.cs"], + num_items=[90, 9090, 10000, 10000, 10000, 10000], + ), + OutputResult( + name="location", + dtype="location", + versions=[0], + metafiles=["J42/passthrough_particles_class_0.cs"], + num_items=[10000], + passthrough=True, + ), + ], + num_items=10000, + ) + output_volume_class_0 = Output( + type="volume", + results=[ + OutputResult( + name="map", + dtype="blob", + versions=[0, 100, 200, 300, 400, 862], + metafiles=["J42/class_00_00000_volume.cs", "J42/class_00_00100_volume.cs", "J42/class_00_00200_volume.cs", "J42/class_00_00300_volume.cs", "J42/class_00_00400_volume.cs", "J42/class_00_final_volume.cs"], + num_items=[1, 1, 1, 1, 1, 1], + ) + ], + num_items=1, + ) + # fmt: on + job.spec.outputs.root["particles_class_0"] = output_particles_class_0 + job.spec.outputs.root["volume_class_0"] = output_volume_class_0 + return job + + +@pytest.fixture +def job(cs: CryoSPARC, project: ProjectController, mock_job: Job): + APIClient.jobs.find_one.return_value = mock_job # type: ignore + return project.find_job("J42") diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 00000000..4ff07c11 --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,5 @@ +from cryosparc.tools import CryoSPARC + + +def test_health(cs: CryoSPARC): + assert cs.api.health() == "OK" diff --git a/tests/test_command.py b/tests/test_command.py deleted file mode 100644 index 9cf2d55c..00000000 --- a/tests/test_command.py +++ /dev/null @@ -1,5 +0,0 @@ -from cryosparc.tools import CryoSPARC - - -def test_hello(cs: CryoSPARC): - assert cs.cli.hello_world() == {"hello": "world"} # type: ignore diff --git a/tests/test_job.py b/tests/test_job.py index 57a99073..eeecab6b 100644 --- a/tests/test_job.py +++ b/tests/test_job.py @@ -1,254 +1,170 @@ import sys +from unittest import mock -import httpretty import pytest -from cryosparc.dataset import Dataset -from cryosparc.job import ExternalJob, Job -from cryosparc.project import Project +from cryosparc.api import APIClient +from cryosparc.job import ExternalJobController, JobController +from cryosparc.models.job import Job +from cryosparc.models.job_spec import ( + JobSpec, + Output, + OutputResult, + OutputSlot, + OutputSpec, + Params, +) +from cryosparc.project import ProjectController from cryosparc.tools import CryoSPARC from .conftest import T20S_PARTICLES @pytest.fixture -def job(cs, project: Project): - return project.find_job("J1") +def mock_enqueue_endpoint(mock_job: Job): + assert isinstance(endpoint := APIClient.jobs.enqueue, mock.Mock) + endpoint.return_value = mock_job.model_copy(update={"status": "queued"}) + return endpoint -@pytest.fixture -def mock_external_job_doc(): - return { - "_id": "67292e95282b26b45d0e8fee", - "uid": "J2", - "uid_num": 2, - "project_uid": "P1", - "project_uid_num": 1, - "type": "snowflake", - "job_type": "snowflake", - "title": "Recenter Particles", - "description": "Enter a description.", - "status": "building", - "created_at": "Mon, 04 Nov 2024 20:29:09 GMT", - "created_by_user_id": "61f0383552d791f286b796ef", - "parents": [], - "children": [], - "input_slot_groups": [], - "output_result_groups": [], - "output_results": [], - "params_base": {}, - "params_spec": {}, - "params_secs": {}, - "workspace_uids": ["W1"], - } +def test_queue(job: JobController, mock_enqueue_endpoint: mock.Mock): + job.queue() + assert job.model.status == "queued" + mock_enqueue_endpoint.assert_called_once_with(job.project_uid, job.uid, lane=None, hostname=None, gpus=[]) -@pytest.fixture -def external_job( - mock_jsonrpc_procs_vis, - mock_jsonrpc_procs_core, - mock_external_job_doc, - cs: CryoSPARC, - project: Project, -): - mock_jsonrpc_procs_vis["create_external_job"] = "J2" - mock_jsonrpc_procs_core["get_job"] = mock_external_job_doc - cs.cli() - cs.vis() - return project.create_external_job("W1", title="Recenter Particles") +def test_queue_worker(job: JobController, mock_enqueue_endpoint: mock.Mock): + job.queue(lane="workers", hostname="worker1", gpus=[1]) + assert job.model.status == "queued" + mock_enqueue_endpoint.assert_called_once_with( + job.project_uid, job.uid, lane="workers", hostname="worker1", gpus=[1] + ) -def test_queue(job: Job): - job.queue() - queue_request = httpretty.latest_requests()[-3] - refresh_request = httpretty.latest_requests()[-1] - assert queue_request.parsed_body["method"] == "enqueue_job" - assert queue_request.parsed_body["params"] == { - "project_uid": job.project_uid, - "job_uid": job.uid, - "lane": None, - "user_id": job.cs.user_id, - "hostname": None, - "gpus": False, - } - assert refresh_request.parsed_body["method"] == "get_job" - - -def test_queue_worker(job: Job): - job.queue(lane="workers", hostname="worker1", gpus=[1]) - queue_request = httpretty.latest_requests()[-3] - refresh_request = httpretty.latest_requests()[-1] - assert queue_request.parsed_body["method"] == "enqueue_job" - assert queue_request.parsed_body["params"] == { - "project_uid": job.project_uid, - "job_uid": job.uid, - "lane": "workers", - "user_id": job.cs.user_id, - "hostname": "worker1", - "gpus": [1], - } - assert refresh_request.parsed_body["method"] == "get_job" - - -def test_queue_cluster(job: Job): +def test_queue_cluster(job: JobController, mock_enqueue_endpoint: mock.Mock): + assert isinstance(mock_vars_endpoint := APIClient.jobs.set_cluster_custom_vars, mock.Mock) vars = {"var1": 42, "var2": "test"} job.queue(lane="cluster", cluster_vars=vars) - vars_request = httpretty.latest_requests()[-5] - queue_request = httpretty.latest_requests()[-3] - refresh_request = httpretty.latest_requests()[-1] - assert vars_request.parsed_body["method"] == "set_cluster_job_custom_vars" - assert vars_request.parsed_body["params"] == { - "project_uid": job.project_uid, - "job_uid": job.uid, - "cluster_job_custom_vars": vars, - } - assert queue_request.parsed_body["method"] == "enqueue_job" - assert queue_request.parsed_body["params"] == { - "project_uid": job.project_uid, - "job_uid": job.uid, - "lane": "cluster", - "user_id": job.cs.user_id, - "hostname": None, - "gpus": False, - } - assert refresh_request.parsed_body["method"] == "get_job" - - -def test_load_output_all_slots(job: Job): - output = job.load_output("particles_class_0") - assert set(output.prefixes()) == {"location", "blob", "ctf"} - - -def test_load_output_some_missing_slots(job: Job): - with pytest.raises( - ValueError, - match=( - "Cannot load output particles_class_0 slot pick_stats because " - "output does not have an associated dataset file. " - ), - ): - job.load_output("particles_class_0", slots=["blob", "pick_stats"]) + assert job.model.status == "queued" + mock_vars_endpoint.assert_called_once_with(job.project_uid, job.uid, vars) + mock_enqueue_endpoint.assert_called_once_with(job.project_uid, job.uid, lane="cluster", hostname=None, gpus=[]) + + +def test_load_output_all_slots(job: JobController, t20s_particles, t20s_particles_passthrough): + assert isinstance(mock_load_output_endpoint := APIClient.jobs.load_output, mock.Mock) + mock_load_output_endpoint.return_value = t20s_particles.innerjoin(t20s_particles_passthrough) + particles = job.load_output("particles_class_0") + assert set(particles.prefixes()) == {"location", "blob", "ctf"} + mock_load_output_endpoint.assert_called_once_with( + job.project_uid, job.uid, "particles_class_0", slots="all", version="F" + ) -def test_load_output_some_slots(job: Job, t20s_particles, t20s_particles_passthrough): - particles = job.load_output("particles_class_0", slots=["location", "blob", "ctf"]) - assert particles == Dataset.innerjoin_many(t20s_particles, t20s_particles_passthrough) +def test_load_output_some_slots(job: JobController, t20s_particles, t20s_particles_passthrough): + assert isinstance(mock_load_output_endpoint := APIClient.jobs.load_output, mock.Mock) + mock_load_output_endpoint.return_value = t20s_particles.innerjoin(t20s_particles_passthrough) + slots = ["location", "blob", "ctf"] + particles = job.load_output("particles_class_0", slots=slots) + assert set(particles.prefixes()) == set(slots) + mock_load_output_endpoint.assert_called_once_with( + job.project_uid, job.uid, "particles_class_0", slots=slots, version="F" + ) + +def test_job_subprocess_io(job: JobController): + assert isinstance(mock_log_endpoint := APIClient.jobs.add_event_log, mock.Mock) -def test_job_subprocess_io(job: Job): job.subprocess( [sys.executable, "-c", 'import sys; print("hello"); print("error", file=sys.stderr); print("world")'] ) - request = httpretty.latest_requests()[-3] # last two requests are "subprocess completed" log lines - body = request.parsed_body - assert body["method"] == "job_send_streamlog" + assert len(mock_log_endpoint.mock_calls) == 7 # includes some prelude/divider calls + mock_log_endpoint.assert_has_calls( + [ + mock.call(job.project_uid, job.uid, "hello", type="text"), + mock.call(job.project_uid, job.uid, "error", type="text"), + mock.call(job.project_uid, job.uid, "world", type="text"), + ], + any_order=True, + ) + - # Lines may arrive out of order, either is okay - params = body["params"] - opt1 = {"project_uid": "P1", "job_uid": "J1", "message": "error", "error": False} - opt2 = {"project_uid": "P1", "job_uid": "J1", "message": "world", "error": False} - assert params == opt1 or params == opt2 +def test_create_external_job(cs: CryoSPARC, project: ProjectController, external_job: ExternalJobController): + assert project.uid == external_job.project_uid + assert isinstance(mock_create_endpoint := APIClient.jobs.create, mock.Mock) + mock_create_endpoint.assert_called_once_with( + project.uid, "W1", type="snowflake", title="Recenter Particles", description="" + ) -def test_create_external_job(cs: CryoSPARC, external_job: ExternalJob): - requests = httpretty.latest_requests() - create_external_job_request = requests[-3] - create_external_job_body = create_external_job_request.parsed_body - find_external_job_request = requests[-1] - find_external_job_body = find_external_job_request.parsed_body +@pytest.fixture +def mock_external_job(mock_user, mock_project): + return Job( + _id="67292e95282b26b45d0e8fee", + uid="J43", + project_uid=mock_project.uid, + workspace_uids=["W1"], + job_dir="J43", + title="Recenter Particles", + status="building", + created_by_user_id=mock_user.id, + spec=JobSpec(type="snowflake", params=Params()), + ) - assert create_external_job_body["method"] == "create_external_job" - assert create_external_job_body["params"] == { - "project_uid": "P1", - "workspace_uid": "W1", - "user": cs.user_id, - "title": "Recenter Particles", - "desc": None, - } - assert find_external_job_body["method"] == "get_job" - assert find_external_job_body["params"] == ["P1", "J2"] + +@pytest.fixture +def external_job(project: ProjectController, mock_external_job: Job): + APIClient.jobs.create.return_value = mock_external_job # type: ignore + return project.create_external_job("W1", title="Recenter Particles") @pytest.fixture -def external_job_output(mock_jsonrpc_procs_vis, mock_external_job_doc, cs: CryoSPARC, external_job: ExternalJob): - mock_external_job_doc["output_result_groups"] = [ - { - "uid": "J2-G1", - "type": "particle", - "name": "particles", - "title": "Particles", - "description": "", - "contains": [ - { - "uid": "J2-R1", - "type": "particle.blob", - "group_name": "particles", - "name": "blob", - "passthrough": False, - }, - { - "uid": "J2-R2", - "type": "particle.ctf", - "group_name": "particles", - "name": "ctf", - "passthrough": False, - }, - ], - "passthrough": False, - } - ] - mock_external_job_doc["output_results"] = [ - { - "uid": "J2-R1", - "type": "particle.blob", - "group_name": "particles", - "name": "blob", - "title": "", - "description": "", - "min_fields": [["path", "O"], ["idx", "u4"], ["shape", "2u4"], ["psize_A", "f4"], ["sign", "f4"]], - "versions": [0], - "metafiles": ["J2/particles.cs"], - "num_items": [10], - "passthrough": False, - }, - { - "uid": "J2-R2", - "type": "particle.ctf", - "group_name": "particles", - "name": "ctf", - "title": "", - "description": "", - "min_fields": [["type", "O"], ["exp_group_id", "u4"], ["accel_kv", "f4"], ["cs_mm", "f4"]], - "versions": [0], - "metafiles": ["J2/particles.cs"], - "num_items": [10], - "passthrough": False, - }, - ] - mock_jsonrpc_procs_vis["add_external_job_output"] = "particles" - httpretty.register_uri( - httpretty.POST, - "http://localhost:39003/external/projects/P1/jobs/J2/outputs/particles/dataset", - body='"particles"', +def external_job_with_added_output(external_job: ExternalJobController, mock_external_job: Job): + mock_external_job = mock_external_job.model_copy(deep=True) + mock_external_job.spec.outputs.root["particles"] = Output( + type="particle", + results=[ + OutputResult(name="blob", dtype="blob"), + OutputResult(name="ctf", dtype="ctf"), + ], ) - - cs.vis() + APIClient.jobs.add_output.return_value = mock_external_job # type: ignore external_job.add_output("particle", name="particles", slots=["blob", "ctf"]) - external_job.save_output("particles", T20S_PARTICLES) - return T20S_PARTICLES - + return external_job -def test_external_job_output(external_job_output): - requests = httpretty.latest_requests() - create_output_request = requests[-3] - find_external_job_request = requests[-1] - find_external_job_body = find_external_job_request.parsed_body - assert len(external_job_output) > 0 - assert create_output_request.url == "http://localhost:39003/external/projects/P1/jobs/J2/outputs/particles/dataset" - assert find_external_job_body["method"] == "get_job" - assert find_external_job_body["params"] == ["P1", "J2"] +@pytest.fixture +def mock_external_job_with_saved_output(external_job_with_added_output: ExternalJobController, mock_external_job: Job): + metafile = f"{mock_external_job.uid}/particles.cs" + mock_external_job = mock_external_job.model_copy(deep=True) + mock_external_job.spec.outputs.root["particles"] = Output( + type="particle", + results=[ + OutputResult(name="blob", dtype="blob", versions=[0], metafiles=[metafile], num_items=[10]), + OutputResult(name="ctf", dtype="ctf", versions=[0], metafiles=[metafile], num_items=[10]), + ], + ) + APIClient.jobs.save_output.return_value = mock_external_job # type: ignore + external_job_with_added_output.save_output("particles", T20S_PARTICLES) + return external_job_with_added_output + + +def test_external_job_output(mock_external_job_with_saved_output: ExternalJobController): + assert isinstance(mock_add_output_endpoint := APIClient.jobs.add_external_output, mock.Mock) + assert isinstance(mock_save_output_endpoint := APIClient.jobs.save_output, mock.Mock) + j = mock_external_job_with_saved_output + + mock_add_output_endpoint.assert_called_once_with( + j.project_uid, + j.uid, + "particles", + OutputSpec( + type="particle", + title="particles", + slots=[OutputSlot(name="blob", dtype="blob"), OutputSlot(name="ctf", dtype="ctf")], + ), + ) + mock_save_output_endpoint.assert_called_once_with(j.project_uid, j.uid, "particles", T20S_PARTICLES, version=0) def test_invalid_external_job_output(external_job): diff --git a/tests/test_project.py b/tests/test_project.py new file mode 100644 index 00000000..c54822e2 --- /dev/null +++ b/tests/test_project.py @@ -0,0 +1,2 @@ +def test_project(project, mock_project): + assert project.uid == mock_project.uid diff --git a/tests/test_tools.py b/tests/test_tools.py index e39589e2..4da8954f 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -1,58 +1,50 @@ -import httpretty +from unittest import mock -from cryosparc.job import Job +from cryosparc.api import APIClient +from cryosparc.job import Job, JobController +from cryosparc.models.job_spec import Params from cryosparc.project import Project from cryosparc.tools import CryoSPARC -def test_create_job_basic(cs: CryoSPARC, project: Project): - job = cs.create_job(project.uid, "W1", "homo_abinit") - assert isinstance(job, Job) - assert job.uid == "J1" +def test_create_job_basic(cs: CryoSPARC, project: Project, mock_new_job: Job): + assert isinstance(mock_create_endpoint := APIClient.jobs.create, mock.Mock) + mock_create_endpoint.return_value = mock_new_job - latest_requests = httpretty.latest_requests() - create_job_request = latest_requests[-3] - get_job_request = latest_requests[-1] - assert create_job_request.parsed_body["method"] == "make_job" - assert create_job_request.parsed_body["params"] == { - "job_type": "homo_abinit", - "project_uid": project.uid, - "workspace_uid": "W1", - "user_id": cs.user_id, - "params": {}, - "input_group_connects": {}, - "title": None, - "desc": None, - } - assert get_job_request.parsed_body["method"] == "get_job" - assert get_job_request.parsed_body["params"] == ["P1", "J1"] + job = cs.create_job(project.uid, "W1", "homo_abinit") + assert isinstance(job, JobController) + assert job.uid == mock_new_job.uid + assert len(job.model.spec.params.model_dump(exclude_defaults=True, exclude_none=True)) == 0 + mock_create_endpoint.assert_called_once_with( + project.uid, "W1", type="homo_abinit", title="", description="", params={} + ) -def test_create_job_connect_params(cs: CryoSPARC, project: Project): +def test_create_job_connect_params( + cs: CryoSPARC, + project: Project, + mock_params: Params, + mock_new_job_with_params: Job, + mock_new_job_with_connection: Job, +): + assert isinstance(mock_create_endpoint := APIClient.jobs.create, mock.Mock) + assert isinstance(mock_connect_endpoint := APIClient.jobs.connect, mock.Mock) + mock_create_endpoint.return_value = mock_new_job_with_params + mock_connect_endpoint.return_value = mock_new_job_with_connection job = cs.create_job( project.uid, "W1", "homo_abinit", - connections={"particles": ("J2", "particles_selected")}, - params={"abinit_K": 3}, + connections={"particles": ("J41", "particles")}, + params=mock_params.model_dump(), + ) + assert isinstance(job, JobController) + assert job.uid == mock_new_job_with_connection.uid + assert job.model.spec.params == mock_params + assert len(job.model.spec.inputs.root["particles"]) == 1 + mock_create_endpoint.assert_called_once_with( + project.uid, "W1", type="homo_abinit", title="", description="", params=mock_params.model_dump() + ) + mock_connect_endpoint.assert_called_once_with( + project.uid, job.uid, "particles", source_job_uid="J41", source_output_name="particles" ) - assert isinstance(job, Job) - assert job.uid == "J1" - - latest_requests = httpretty.latest_requests() - create_job_request = latest_requests[-3] - get_job_request = latest_requests[-1] - - assert create_job_request.parsed_body["method"] == "make_job" - assert create_job_request.parsed_body["params"] == { - "job_type": "homo_abinit", - "project_uid": project.uid, - "workspace_uid": "W1", - "user_id": cs.user_id, - "params": {"abinit_K": 3}, - "input_group_connects": {"particles": ["J2.particles_selected"]}, - "title": None, - "desc": None, - } - assert get_job_request.parsed_body["method"] == "get_job" - assert get_job_request.parsed_body["params"] == ["P1", "J1"]