diff --git a/.github/scripts/s3_client.py b/.github/scripts/s3_client.py new file mode 100644 index 0000000..5fb3106 --- /dev/null +++ b/.github/scripts/s3_client.py @@ -0,0 +1,243 @@ +import os +import io +from pathlib import Path +from dataclasses import dataclass, field +from datetime import timedelta +from typing import Iterator +import json + +from minio import Minio # type: ignore +# import requests # type: ignore +from loguru import logger # type: ignore + + +@dataclass +class VersionStatus: + version: str + status: str + path: str + + +@dataclass +class Client: + host: str + bucket: str + prefix: str + access_key: str = field(repr=False) + secret_key: str = field(repr=False) + _client: Minio = field(init=False, repr=False) + + def __post_init__(self): + self._client = Minio( + self.host, + access_key=self.access_key, + secret_key=self.secret_key, + ) + found = self.bucket_exists(self.bucket) + if not found: + raise Exception("target bucket does not exist: {self.bucket}") + logger.debug("Created S3-Client: {}", self) + + def bucket_exists(self, bucket): + return self._client.bucket_exists(bucket) + + def put( + self, + path, + file_object, + length=-1, + content_type="application/octet-stream"): + # For unknown length (ie without reading file into mem) give `part_size` + part_size = 0 + if length == -1: + part_size = 10*1024*1024 + path = f"{self.prefix}/{path}" + self._client.put_object( + self.bucket, + path, + file_object, + length=length, + part_size=part_size, + # content_type="application/json", + ) + + def get_file_urls( + self, + path="", + exclude_files=("status.json"), + lifetime=timedelta(hours=1), + ) -> list[str]: + """Checks an S3 'folder' for its list of files""" + logger.debug("Getting file list using {}, at {}", self, path) + path = f"{self.prefix}/{path}" + objects = self._client.list_objects( + self.bucket, + prefix=path, + recursive=True) + file_urls : list[str] = [] + for obj in objects: + if obj.is_dir: + continue + filename = Path(obj.object_name).name + if filename in exclude_files: + continue + # Option 1: + url = self._client.get_presigned_url( + "GET", + obj.bucket_name, + obj.object_name, + expires=lifetime, + ) + file_urls.append(url) + # Option 2: Work with minio.datatypes.Object directly + return file_urls + + + def ls(self, path, only_folders=False, only_files=False) -> Iterator[str]: + """ + List folder contents, non-recursive, ala `ls` + but no "." or ".." + """ + # path = str(Path(self.prefix, path)) + path = f"{self.prefix}/{path}" + logger.debug("Running ls at path: {}", path) + objects = self._client.list_objects( + self.bucket, + prefix=path, + recursive=False) + for obj in objects: + if only_files and obj.is_dir: + continue + if only_folders and not obj.is_dir: + continue + yield Path(obj.object_name).name + + + def load_file(self, path): + """Load file from S3""" + path = f"{self.prefix}/{path}" + try: + response = self._client.get_object(self.bucket, path) + content = response.read() + except Exception: + logger.critical("Failed to get object at path {}", path) + logger.critical("Using client: {}", self) + raise + try: + response.close() + response.release_conn() + except Exception: + pass + return content + + # url = self.client.get_presigned_url( + # "GET", + # self.bucket, + # str(Path(self.prefix, path)), + # expires=timedelta(minutes=10), + # ) + # response = requests.get(url) + # return response.content + + def check_versions(self, model_name: str) -> Iterator[VersionStatus]: + """ + Check model repository for version of model-name. + + Returns dictionary of version-status pairs. + """ + logger.debug("Checking versions for {}", model_name) + version_folders = self.ls(f"{model_name}/", only_folders=True) + + # For each folder get the contents of status.json + for version in version_folders: + + yield self.get_version_status(model_name, version) + + def get_unpublished_version(self, model_name:str) -> str: + """Get the unpublisted version""" + versions = list(self.check_versions(model_name)) + if len(versions) == 0: + return "1" + unpublished = [version for version in versions if version.status == "staging"] + if len(unpublished) == 0: + # Only published version exist, start a new one + return f"{len(unpublished) + 1}" + if len(unpublished) > 1: + raise ValueError("Opps! We seem to have > 1 staging versions!!") + return unpublished[0].version + + def get_version_status(self, model_name: str, version: str) -> VersionStatus: + status = self.get_status(model_name, version) + status_str = status.get('status', 'status-field-unset') + version_path = f"{model_name}/{version}" + return VersionStatus(version, status_str, version_path) + + def get_status(self, model_name: str, version: str) -> dict: + version_path = f"{model_name}/{version}" + logger.debug("model_name: {}, version: {}", model_name, version) + status_path = f"{version_path}/status.json" + logger.debug("Getting status using path {}", status_path) + status = self.load_file(status_path) + status = json.loads(status) + return status + + def put_status(self, model_name: str, version: str, status: dict): + logger.debug("Updating status for {}-{}, with {}", model_name, version, status) + contents = json.dumps(status).encode() + file_object = io.BytesIO(contents) + + self.put( + f"{model_name}/{version}/status.json", + file_object, + length=len(contents), + content_type="application/json", + ) + + def get_log(self, model_name: str, version: str) -> dict: + version_path = f"{model_name}/{version}" + logger.debug("model_name: {}, version: {}", model_name, version) + path = f"{version_path}/log.json" + logger.debug("Getting log using path {}", path) + log = self.load_file(path) + log = json.loads(log) + return log + + def put_log(self, model_name: str, version: str, log: dict): + logger.debug("Updating log for {}-{}, with {}", model_name, version, log) + contents = json.dumps(log).encode() + file_object = io.BytesIO(contents) + + self.put( + f"{model_name}/{version}/log.json", + file_object, + length=len(contents), + content_type="application/json", + ) + + +def create_client() -> Client: + """ + Creates a Minio client using env settings + """ + host = os.getenv("S3_HOST") + root_folder = os.getenv("S3_FOLDER") + bucket = os.getenv("S3_BUCKET") + access_key_id = os.getenv("S3_ACCESS_KEY_ID") + secret_access_key = os.getenv("S3_SECRET_ACCESS_KEY") + + assert isinstance(host, str) + assert isinstance(bucket, str) + assert isinstance(root_folder, str) + assert isinstance(access_key_id, str) + assert isinstance(secret_access_key, str) + + client = Client( + host=host, + bucket=bucket, + prefix=root_folder, + access_key=access_key_id, + secret_key=secret_access_key, + ) + return client + + diff --git a/.github/scripts/test_dynamically.py b/.github/scripts/test_dynamically.py new file mode 100644 index 0000000..ae7ca16 --- /dev/null +++ b/.github/scripts/test_dynamically.py @@ -0,0 +1,76 @@ +import traceback +from functools import partialmethod +from pathlib import Path +from typing import Optional + +import typer +from bioimageio.spec import load_raw_resource_description +from bioimageio.spec.shared import yaml +from tqdm import tqdm +from update_log import add_log_entry + +tqdm.__init__ = partialmethod(tqdm.__init__, disable=True) # silence tqdm + + +def test_summary_from_exception(name: str, exception: Exception): + return dict( + name=name, + status="failed", + error=str(exception), + traceback=traceback.format_tb(exception.__traceback__), + ) + + +def test_dynamically( + descr_id: str, + source: str, + weight_format: Optional[str] = typer.Argument( + ..., help="weight format to test model with." + ), + create_env_outcome: str = "success", +): + if weight_format is None: + # no dynamic tests for non-model resources yet... + return + + if create_env_outcome == "success": + try: + from bioimageio.core.resource_tests import test_resource + except Exception as e: + summaries = [test_summary_from_exception( + "import test_resource from test environment", e + )] + else: + try: + rdf = yaml.load(source) + test_kwargs = ( + rdf.get("config", {}) + .get("bioimageio", {}) + .get("test_kwargs", {}) + .get(weight_format, {}) + ) + except Exception as e: + summaries = [test_summary_from_exception("check for test kwargs", e)] + else: + try: + rd = load_raw_resource_description(source) + summaries = test_resource( + rd, weight_format=weight_format, **test_kwargs + ) + except Exception as e: + summaries = [test_summary_from_exception("call 'test_resource'", e)] + + else: + env_path = Path(f"conda_env_{weight_format}.yaml") + if env_path.exists(): + error = "Failed to install conda environment:\n" + env_path.read_text() + else: + error = f"Conda environment yaml file not found: {env_path}" + + summaries = [dict(name="install test environment", status="failed", error=error)] + + add_log_entry(descr_id, "validation_summaries", summaries) + + +if __name__ == "__main__": + typer.run(test_dynamically) diff --git a/.github/scripts/unzip_model.py b/.github/scripts/unzip_model.py index 0b0089d..c04a32e 100644 --- a/.github/scripts/unzip_model.py +++ b/.github/scripts/unzip_model.py @@ -1,14 +1,13 @@ import argparse import io -import os import traceback from typing import Optional import urllib.request import zipfile -from minio import Minio # type: ignore from update_status import update_status +from s3_client import create_client def create_parser() -> argparse.ArgumentParser: @@ -38,22 +37,27 @@ def main(): update_status(model_name, {'status' : err_message}) raise + def unzip_from_url(model_name, model_zip_url): filename = "model.zip" - s3_host = os.getenv("S3_HOST") - s3_bucket = os.getenv("S3_BUCKET") - s3_root_folder = os.getenv("S3_FOLDER") - s3_access_key_id = os.getenv("S3_ACCESS_KEY_ID") - s3_secret_access_key = os.getenv("S3_SECRET_ACCESS_KEY") - - client = Minio( - s3_host, - access_key=s3_access_key_id, - secret_key=s3_secret_access_key, - ) - found = client.bucket_exists(s3_bucket) - if not found: - raise Exception("target bucket does not exist: {s3_bucket}") + client = create_client() + + versions = client.check_versions(model_name) + if len(versions) == 0: + version = "1" + + else: + # TODO handle if a staging version exists vs + # if only published version exist + raise NotImplementedError("Updating/publishing new version not implemented") + + # TODO: Need to make sure status is staging + status = client.get_status(model_name, version) + status_str = status.get("status", "missing-status") + if status_str != "staging": + raise ValueError( + "Model {} at version {} is status: {}", + model_name, version, status_str) # Download the model zip file remotezip = urllib.request.urlopen(model_zip_url) @@ -63,17 +67,11 @@ def unzip_from_url(model_name, model_zip_url): for filename in zipobj.namelist(): # file_object = io.BytesIO(zipobj) file_object = zipobj.open(filename) - s3_path = f"{s3_root_folder}/{model_name}/{filename}" + path = f"{model_name}/{version}/{filename}" - # For unknown length (ie without reading file into mem) give `part_size` - client.put_object( - s3_bucket, - s3_path, + client.put( + path, file_object, - length=-1, - part_size=10*1024*1024, - # length=len(status_message), - # content_type="application/json", ) diff --git a/.github/scripts/update_log.py b/.github/scripts/update_log.py new file mode 100644 index 0000000..ce307a3 --- /dev/null +++ b/.github/scripts/update_log.py @@ -0,0 +1,55 @@ +import argparse +from typing import Optional +import datetime +from loguru import logger + +from s3_client import create_client + +def create_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser() + parser.add_argument("model_name", help="Model name") + parser.add_argument("category", help="Log category") + parser.add_argument("summary", help="Log summary") + parser.add_argument("--version", help="Version") + return parser + + +def get_args(argv: Optional[list] = None): + """ + Get command-line arguments + """ + parser = create_parser() + return parser.parse_args(argv) + + +def main(): + args = get_args() + model_name = args.model_name + category = args.category + summary = args.summary + version = args.version + add_log_entry(model_name, category, summary, version=version) + +def add_log_entry(model_name, category, summary, version=None): + timenow = datetime.datetime.now().isoformat() + client = create_client() + logger.info("Updating log for {} with category {} and summary", + model_name, + category, + summary) + + if version is None: + version = client.get_unpublished_version(model_name) + logger.info("Version detected: {}", version) + else: + logger.info("Version requested: {}", version) + log = client.get_log(model_name, version) + + if category not in log: + log[category] = [] + log[category].append({"timestamp": timenow, "log": summary}) + client.put_log(model_name, version, log) + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/update_status.py b/.github/scripts/update_status.py index e7b8310..7180f72 100644 --- a/.github/scripts/update_status.py +++ b/.github/scripts/update_status.py @@ -1,19 +1,17 @@ -import os -import io import argparse from typing import Optional -import json import datetime +from loguru import logger -from minio import Minio # type: ignore -# from minio.error import S3Error +from s3_client import create_client def create_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser() parser.add_argument("model_name", help="Model name") parser.add_argument("status", help="Status") - parser.add_argument("step", help="Step", nargs="?", default=0, type=int) - parser.add_argument("num_steps", help="Status", nargs="?", default=0, type=int) + parser.add_argument("--version", help="Version") + parser.add_argument("--step", help="Step", default=0, type=int) + parser.add_argument("--num_steps", help="Status", default=0, type=int) return parser @@ -28,63 +26,39 @@ def get_args(argv: Optional[list] = None): def main(): args = get_args() model_name = args.model_name + version = args.version step = args.step num_steps = args.num_steps status = args.status - update_status(model_name, status, step, num_steps) + update_status(model_name, status, version=version, step=step, num_steps=num_steps) -def update_status(model_name, status, step=None, num_steps=None): +def update_status(model_name, status_text, version=None, step=None, num_steps=None): timenow = datetime.datetime.now().isoformat() - s3_host = os.getenv("S3_HOST") - s3_bucket = os.getenv("S3_BUCKET") - s3_root_folder = os.getenv("S3_FOLDER") - s3_access_key_id = os.getenv("S3_ACCESS_KEY_ID") - s3_secret_access_key = os.getenv("S3_SECRET_ACCESS_KEY") - filename = "status.json" - - client = Minio( - s3_host, - access_key=s3_access_key_id, - secret_key=s3_secret_access_key, - ) - s3_path = f"{s3_root_folder}/{model_name}/{filename}" - - try: - response = client.get_object(s3_bucket, s3_path) - # Read data from response. - status_message = json.loads(response.read()) - except Exception: - status_message = {} - try: - response.close() - response.release_conn() - except Exception: - pass - - found = client.bucket_exists(s3_bucket) - if not found: - raise Exception("target bucket does not exist: {s3_bucket}") - if "messages" not in status_message: - status_message["messages"] = [] - + client = create_client() + logger.info("Updating status for {} with text {} [steps={}, num_steps={}]", + model_name, + status_text, + step, + num_steps) + + if version is None: + version = client.get_unpublished_version(model_name) + logger.info("Version detected: {}", version) + else: + logger.info("Version requested: {}", version) + status = client.get_status(model_name, version) + + if "messages" not in status: + status["messages"] = [] if step is not None: - status_message["step"] = step + status["step"] = step if num_steps is not None: - status_message["num_steps"] = num_steps - status_message["last_message"] = status - status_message["messages"].append({"timestamp": timenow, "text": status}) - - status_message_str = json.dumps(status_message).encode() - status_file_object = io.BytesIO(status_message_str) + status["num_steps"] = num_steps + status["last_message"] = status_text + status["messages"].append({"timestamp": timenow, "text": status_text}) + client.put_status(model_name, version, status) - client.put_object( - s3_bucket, - s3_path, - status_file_object, - length=len(status_message_str), - content_type="application/json", - ) if __name__ == "__main__": diff --git a/.github/scripts/upload_model_to_zenodo.py b/.github/scripts/upload_model_to_zenodo.py index 686d1e3..eedede8 100644 --- a/.github/scripts/upload_model_to_zenodo.py +++ b/.github/scripts/upload_model_to_zenodo.py @@ -5,19 +5,18 @@ from pathlib import Path from urllib.parse import urlparse, urljoin, quote_plus from typing import Optional -from datetime import timedelta, datetime -from dataclasses import dataclass, field +from datetime import datetime import pprint from packaging.version import parse as parse_version import requests # type: ignore -from minio import Minio from loguru import logger # type: ignore import spdx_license_list # type: ignore import yaml # type: ignore from update_status import update_status +from s3_client import create_client spdx_licenses = [item.id for item in spdx_license_list.LICENSES.values()] @@ -58,6 +57,7 @@ def assert_good_response(response, message, info=None): def create_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser() parser.add_argument("--model_name", help="Model name", required=True) + parser.add_argument("--version", help="Version", nargs="?", default=None) return parser @@ -74,14 +74,19 @@ def main(): headers = {"Content-Type": "application/json"} params = {'access_token': ACCESS_TOKEN} - s3_settings = S3Settings( - host=S3_HOST, - bucket=S3_BUCKET, - prefix=f'{S3_FOLDER}/{args.model_name}', - access_key=S3_ACCESS_KEY, - secret_key=S3_SECRET_KEY) + client = create_client() + + + # TODO: GET THE CURRENT VERSION + if args.version is None: + version = client.get_unpublished_version(args.model_name) + + + s3_path = Path(args.model_name, version) + + # List the files at the model URL - file_urls = get_file_urls(s3_settings) + file_urls = client.get_file_urls(path=s3_path) logger.info("Using file URLs:\n{}", '\n'.join((str(obj) for obj in file_urls))) # Create empty deposition @@ -97,7 +102,7 @@ def main(): deposition_info = response.json() bucket_url = deposition_info["links"]["bucket"] - rdf_text = load_file_from_S3(s3_settings, "rdf.yaml") + rdf_text = client.load_file(Path(s3_path, "rdf.yaml")) rdf = yaml.safe_load(rdf_text) if not isinstance(rdf, dict): raise Exception('Failed to load rdf.yaml from S3') @@ -118,7 +123,7 @@ def main(): # Get the file URL docstring = docstring.replace("./", "") - text = load_file_from_S3(s3_settings, docstring) + text = client.load_file(Path(s3_path, docstring)) # Load markdown? docstring = text @@ -170,57 +175,6 @@ def main(): -@dataclass -class S3Settings: - host: str - bucket: str - prefix: str - access_key: str = field(repr=False) - secret_key: str = field(repr=False) - - -def get_file_urls(s3_settings: S3Settings, exclude_files=("status.json")) -> list[str]: - """Checks an S3 'folder' for its list of files""" - logger.debug("Getting file list from {}", s3_settings) - client = Minio( - s3_settings.host, - access_key=s3_settings.access_key, - secret_key=s3_settings.secret_key, - ) - objects = client.list_objects(s3_settings.bucket, prefix=s3_settings.prefix, recursive=True) - file_urls : list[str] = [] - for obj in objects: - if obj.is_dir: - continue - filename = Path(obj.object_name).name - if filename in exclude_files: - continue - # Option 1: - url = client.get_presigned_url( - "GET", - obj.bucket_name, - obj.object_name, - expires=timedelta(hours=1), - ) - file_urls.append(url) - # Option 2: Work with minio.datatypes.Object directly - return file_urls - - -def load_file_from_S3(s3_settings: S3Settings, filename): - client = Minio( - s3_settings.host, - access_key=s3_settings.access_key, - secret_key=s3_settings.secret_key, - ) - url = client.get_presigned_url( - "GET", - s3_settings.bucket, - str(Path(s3_settings.prefix, filename)), - expires=timedelta(minutes=10), - ) - response = requests.get(url) - return response.content def put_file_from_url(file_url: str, destination_url: str, params: dict) -> dict: diff --git a/.github/scripts/validate_format.py b/.github/scripts/validate_format.py new file mode 100644 index 0000000..4eb7b80 --- /dev/null +++ b/.github/scripts/validate_format.py @@ -0,0 +1,284 @@ +import json +import os +import uuid +import warnings +from functools import partialmethod +from pathlib import Path +from typing import Any, Dict, List, Optional, Union + +import requests +import typer +from bioimageio.spec import load_raw_resource_description, validate +from bioimageio.spec.model.raw_nodes import Model, WeightsFormat +from bioimageio.spec.rdf.raw_nodes import RDF_Base +from bioimageio.spec.shared import yaml +from bioimageio.spec.shared.raw_nodes import URI, Dependencies +from marshmallow import missing +from marshmallow.utils import _Missing +from packaging.version import Version +from tqdm import tqdm + +tqdm.__init__ = partialmethod(tqdm.__init__, disable=True) # silence tqdm + + +from update_log import add_log_entry + + +def set_multiple_gh_actions_outputs(outputs: Dict[str, Union[str, Any]]): + for name, out in outputs.items(): + set_gh_actions_output(name, out) + + +def set_gh_actions_output(name: str, output: Union[str, Any]): + """set output of a github actions workflow step calling this script""" + if isinstance(output, bool): + output = "yes" if output else "no" + + if not isinstance(output, str): + output = json.dumps(output, sort_keys=True) + + if "GITHUB_OUTPUT" not in os.environ: + print(output) + return + + if "\n" in output: + with open(os.environ["GITHUB_OUTPUT"], "a") as fh: + delimiter = uuid.uuid1() + print(f"{name}<<{delimiter}", file=fh) + print(output, file=fh) + print(delimiter, file=fh) + else: + with open(os.environ["GITHUB_OUTPUT"], "a") as fh: + print(f"{name}={output}", file=fh) + + +def get_base_env(): + return {"channels": ["conda-forge"], "dependencies": ["bioimageio.core"]} + + +def get_env_from_deps(deps: Dependencies): + conda_env = get_base_env() + try: + if deps.manager in ["conda", "pip"]: + if isinstance(deps.file, Path): + raise TypeError( + f"File path for remote source? {deps.file} should be a url" + ) + elif not isinstance( + deps.file, URI + ): # pyright: ignore[reportUnnecessaryIsInstance] + raise TypeError(deps.file) + + r = requests.get(str(deps.file)) + r.raise_for_status() + dep_file_content = r.text + if deps.manager == "conda": + conda_env = yaml.load(dep_file_content) + + # add bioimageio.core to dependencies + deps = conda_env.get("dependencies", []) + if not isinstance(deps, list): + raise TypeError( + f"expected dependencies in conda environment.yaml to be a list, but got: {deps}" + ) + if not any( + isinstance(d, str) and d.startswith("bioimageio.core") for d in deps + ): + conda_env["dependencies"] = deps + ["conda-forge::bioimageio.core"] + elif deps.manager == "pip": + pip_req = [ + d + for d in dep_file_content.split("\n") + if not d.strip().startswith("#") + ] + if not any(r.startswith("bioimageio.core") for r in pip_req): + pip_req.append("bioimageio.core") + + conda_env = dict( + channels=["conda-forge"], + dependencies=["python=3.9", "pip", {"pip": pip_req}], + ) + else: + raise NotImplementedError(deps.manager) + + except Exception as e: + warnings.warn(f"Failed to resolve dependencies: {e}") + + return conda_env + + +def get_version_range(v: Version) -> str: + return f"=={v.major}.{v.minor}.*" + + +def get_default_env( + *, + opset_version: Optional[int] = None, + pytorch_version: Optional[Version] = None, + tensorflow_version: Optional[Version] = None, +): + conda_env: Dict[str, Union[Any, List[Any]]] = get_base_env() + if opset_version is not None: + conda_env["dependencies"].append("onnxruntime") + # note: we should not need to worry about the opset version, + # see https://github.com/microsoft/onnxruntime/blob/master/docs/Versioning.md + + if pytorch_version is not None: + conda_env["channels"].insert(0, "pytorch") + conda_env["dependencies"].extend( + [f"pytorch {get_version_range(pytorch_version)}", "cpuonly"] + ) + + if tensorflow_version is not None: + # tensorflow 1 is not available on conda, so we need to inject this as a pip dependency + if tensorflow_version.major == 1: + tensorflow_version = max( + tensorflow_version, Version("1.13") + ) # tf <1.13 not available anymore + assert opset_version is None + assert pytorch_version is None + conda_env["dependencies"] = [ + "pip", + "python=3.7.*", + ] # tf 1.15 not available for py>=3.8 + # get bioimageio.core (and its dependencies) via pip as well to avoid conda/pip mix + # protobuf pin: tf 1 does not pin an upper limit for protobuf, + # but fails to load models saved with protobuf 3 when installing protobuf 4. + conda_env["dependencies"].append( + { + "pip": [ + "bioimageio.core", + f"tensorflow {get_version_range(tensorflow_version)}", + "protobuf <4.0", + ] + } + ) + elif tensorflow_version.major == 2 and tensorflow_version.minor < 11: + # get older tf versions from defaults channel + conda_env = { + "channels": ["defaults"], + "dependencies": [ + "conda-forge::bioimageio.core", + f"tensorflow {get_version_range(tensorflow_version)}", + ], + } + else: # use conda-forge otherwise + conda_env["dependencies"].append( + f"tensorflow {get_version_range(tensorflow_version)}" + ) + + return conda_env + + +def write_conda_env_file( + *, rd: Model, weight_format: WeightsFormat, path: Path, env_name: str +): + assert isinstance(rd, Model) + given_versions: Dict[str, Union[_Missing, Version]] = {} + default_versions = dict( + pytorch_version=Version("1.10"), + tensorflow_version=Version("1.15"), + opset_version=15, + ) + if weight_format in ["pytorch_state_dict", "torchscript"]: + given_versions["pytorch_version"] = rd.weights[weight_format].pytorch_version + elif weight_format in ["tensorflow_saved_model_bundle", "keras_hdf5"]: + given_versions["tensorflow_version"] = rd.weights[ + weight_format + ].tensorflow_version + elif weight_format in ["onnx"]: + given_versions["opset_version"] = rd.weights[weight_format].opset_version + else: + raise NotImplementedError(weight_format) + + deps = rd.weights[weight_format].dependencies + if deps is missing: + conda_env = get_default_env( + **{vn: v or default_versions[vn] for vn, v in given_versions.items()} + ) + else: + if any(given_versions.values()): + warnings.warn( + f"Using specified dependencies; ignoring given versions: {given_versions}" + ) + + conda_env = get_env_from_deps(deps) + + conda_env["name"] = env_name + + path.parent.mkdir(parents=True, exist_ok=True) + yaml.dump(conda_env, path) + + +def ensure_valid_conda_env_name(name: str) -> str: + for illegal in ("/", " ", ":", "#"): + name = name.replace(illegal, "") + + return name or "empty" + + +def prepare_dynamic_test_cases(descr_id: str, rd: RDF_Base) -> List[Dict[str, str]]: + validation_cases: List[Dict[str, str]] = [] + # construct test cases based on resource type + if isinstance(rd, Model): + # generate validation cases per weight format + for wf in rd.weights: + # we skip the keras validation for now, see + # https://github.com/bioimage-io/collection-bioimage-io/issues/16 + if wf in ("keras_hdf5", "tensorflow_js"): + warnings.warn(f"{wf} weights are currently not validated") + continue + + env_name = ensure_valid_conda_env_name(descr_id) + write_conda_env_file( + rd=rd, + weight_format=wf, + path=Path(f"conda_env_{wf}.yaml"), + env_name=env_name, + ) + validation_cases.append( + { + "env_name": env_name, + "weight_format": wf, + } + ) + elif isinstance(rd, RDF_Base): # pyright: ignore[reportUnnecessaryIsInstance] + pass + else: + raise TypeError(rd) + + return validation_cases + + +def validate_format(descr_id: str, source: str): + dynamic_test_cases: List[Dict[str, str]] = [] + + summaries = [validate(source)] + + + if summaries[0]["status"] == "passed": + # validate rdf using the latest format version + latest_static_summary = validate(source, update_format=True) + if latest_static_summary["status"] == "passed": + rd = load_raw_resource_description(source, update_to_format="latest") + assert isinstance(rd, RDF_Base) + dynamic_test_cases += prepare_dynamic_test_cases(descr_id, rd) + + if "name" not in latest_static_summary: + latest_static_summary["name"] = ( + "bioimageio.spec static validation with auto-conversion to latest format" + ) + summaries.append(latest_static_summary) + + add_log_entry(descr_id, "validation_summaries", summaries) + + set_multiple_gh_actions_outputs( + dict( + has_dynamic_test_cases=bool(dynamic_test_cases), + dynamic_test_cases={"include": dynamic_test_cases}, + ) + ) + + +if __name__ == "__main__": + typer.run(validate_format) diff --git a/.github/workflows/ci_runner.yaml b/.github/workflows/ci_runner.yaml index e52a50e..72c8578 100644 --- a/.github/workflows/ci_runner.yaml +++ b/.github/workflows/ci_runner.yaml @@ -3,66 +3,91 @@ name: CI Runner on: workflow_dispatch: inputs: - model_nickname: + model_nickname: description: 'Nickname of the model - to be used to access the model data on S3' required: true type: string model_zip_url: description: 'Presigned url for the model zip-file' required: true - type: true + type: string + + +env: + S3_HOST: ${{vars.S3_HOST}} + S3_BUCKET: ${{vars.S3_BUCKET}} + S3_FOLDER: ${{vars.S3_FOLDER}} + S3_ACCESS_KEY_ID: ${{secrets.S3_ACCESS_KEY_ID}} + S3_SECRET_ACCESS_KEY: ${{secrets.S3_SECRET_ACCESS_KEY}} jobs: - test-model: + validate: runs-on: ubuntu-latest strategy: fail-fast: false - env: - S3_HOST: ${{vars.S3_HOST}} - S3_BUCKET: ${{vars.S3_BUCKET}} - S3_FOLDER: ${{vars.S3_FOLDER}} - S3_ACCESS_KEY_ID: ${{secrets.S3_ACCESS_KEY_ID}} - S3_SECRET_ACCESS_KEY: ${{secrets.S3_SECRET_ACCESS_KEY}} - ZENODO_URL: ${{vars.ZENODO_URL}} - ZENODO_API_ACCESS_TOKEN: ${{secrets.ZENODO_API_ACCESS_TOKEN}} + outputs: + dynamic_test_cases: ${{ steps.validate.outputs.dynamic_test_cases }} + has_dynamic_test_cases: ${{ steps.validate.outputs.has_dynamic_test_cases }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v4 + - name: Install workflow script dependencies + run: | + echo "Installing workflow script dependencies" + python -m pip install --upgrade pip + python -m pip install "minio==7.2.3" "ruamel.yaml==0.18.5" "bioimageio.spec==0.4.9.post5" "typer" + - name: Unzip model file + run: | + python .github/scripts/update_status.py "${{ inputs.model_nickname }}" "Unzipping package" "1" "6" + python .github/scripts/unzip_model.py "${{inputs.model_nickname}}" "${{inputs.model_zip_url}}" - - name: Install workflow script dependencies - run: | - echo "Installing workflow script dependencies" - python -m pip install --upgrade pip - python -m pip install "minio==7.2.3" "loguru==0.7.2" "packaging==23.2" "spdx-license-list==3.22" "PyYAML==6.0.1" - - name: Unzip model file - run: | - python .github/scripts/update_status.py "${{ inputs.model_nickname }}" "Unzipping model-file of ${{inputs.model_nickname}}" "1" "6" - python .github/scripts/unzip_model.py "${{inputs.model_nickname}}" "${{inputs.model_zip_url}}" - - name: Install dependencies - run: | - python .github/scripts/update_status.py "${{ inputs.model_nickname }}" "Installing dependencies of ${{inputs.model_nickname}}" "2" "6" - echo "Installing dependencies" - python -m pip install --upgrade pip - python -m pip install flake8 pytest - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Validate format + id: validate + run: | + python .github/scripts/update_status.py "${{ inputs.model_nickname }}" "Starting validation" "2" "6" + python .github/scripts/validate_format.py "${{ inputs.model_nickname }}" "${{inputs.model_zip_url}}" + - run: | + python .github/scripts/update_status.py "${{ inputs.model_nickname }}" "Starting additional tests" "3" "6" + if: steps.validate.outputs.has_dynamic_test_cases == 'yes' + - run: | + python .github/scripts/update_status.py "${{ inputs.model_nickname }}" "Validation done" "3" "6" + if: steps.validate.outputs.has_dynamic_test_cases == 'no' - #- name: Additional Steps - #run: | - #python .github/scripts/update_status.py "${{ inputs.model_nickname }}" "Running additional steps of ${{inputs.model_nickname}}" "3" "8" - #echo "Running additional steps..." - #sleep 30 - #echo "Done" - #python .github/scripts/update_status.py "${{ inputs.model_nickname }}" "Done running additional steps of ${{inputs.model_nickname}}" "4" "8" - - name: Main testing - run: | - python .github/scripts/update_status.py "${{ inputs.model_nickname }}" "WARNING: Skipping testing ${{inputs.model_nickname}}" "3" "6" - #python .github/scripts/update_status.py "${{ inputs.model_nickname }}" "WARNING: Skipping Running main testing of ${{inputs.model_nickname}}" "5" "8" - sleep 5 - python .github/scripts/update_status.py "${{ inputs.model_nickname }}" "Testing complete of ${{inputs.model_nickname}}" "4" "6" + test: + needs: validate + if: needs.validate.outputs.has_dynamic_test_cases == 'yes' + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: ${{ fromJson(needs.static-validation.outputs.dynamic_test_cases) }} # [{env_name: ..., weight_format: ...}, ...] + steps: + - uses: actions/checkout@v4 + - name: install validation dependencies + id: create_env + uses: mamba-org/setup-micromamba@v1 + with: + cache-downloads: true + environment-name: ${{ matrix.env_name }} + environment-file: conda_env_${{ matrix.weight_format }}.yaml + create-args: >- # script dependencies + typer + conda-forge::bioimageio.spec + continue-on-error: true # we inspect this step's outcome in test_dynamically.py + timeout-minutes: 60 + - name: install minimal script dependencies if val env failed + if: ${{ steps.create_env.outcome != 'success' }} + run: pip install typer bioimageio.spec + - name: dynamic validation + shell: bash -l {0} + run: python scripts/test_dynamically.py "${{inputs.model_zip_url}}" ${{ matrix.weight_format }} --create-env-outcome ${{ steps.create_env.outcome }} --${{ contains(inputs.deploy_to, 'gh-pages') && 'no-ignore' || 'ignore' }}-rdf-source-field-in-validation + timeout-minutes: 60 - - name: Publish within to Zenodo - run: | - python .github/scripts/update_status.py "${{ inputs.model_nickname }}" "Running publishing of ${{inputs.model_nickname}} to Zenodo" "5" "6" - python .github/scripts/upload_model_to_zenodo.py --model_name "${{inputs.model_nickname}}" - python .github/scripts/update_status.py "${{ inputs.model_nickname }}" "Publishing complete" "6" "6" + conclude: + needs: test + if: always() # run even if test job fails + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - run: | + python .github/scripts/update_status.py "${{ inputs.model_nickname }}" "Awaiting review" "4" "6" diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml new file mode 100644 index 0000000..2530135 --- /dev/null +++ b/.github/workflows/publish.yaml @@ -0,0 +1,38 @@ +name: publish + +on: + workflow_dispatch: + inputs: + model_nickname: + description: 'Nickname of the model - to be used to access the model data on S3' + required: true + type: string + + +env: + S3_HOST: ${{vars.S3_HOST}} + S3_BUCKET: ${{vars.S3_BUCKET}} + S3_FOLDER: ${{vars.S3_FOLDER}} + S3_ACCESS_KEY_ID: ${{secrets.S3_ACCESS_KEY_ID}} + S3_SECRET_ACCESS_KEY: ${{secrets.S3_SECRET_ACCESS_KEY}} + ZENODO_URL: ${{vars.ZENODO_URL}} + ZENODO_API_ACCESS_TOKEN: ${{secrets.ZENODO_API_ACCESS_TOKEN}} + +jobs: + publish: + runs-on: ubuntu-latest + outputs: + dynamic_test_cases: ${{ steps.validate.outputs.dynamic_test_cases }} + has_dynamic_test_cases: ${{ steps.validate.outputs.has_dynamic_test_cases }} + steps: + - uses: actions/checkout@v4 + - name: Install workflow script dependencies + run: | + echo "Installing workflow script dependencies" + python -m pip install --upgrade pip + python -m pip install "minio==7.2.3" "loguru==0.7.2" "packaging==23.2" "spdx-license-list==3.22" "ruamel.yaml==0.18.5" "typer" + - name: Publish to Zenodo + run: | + python .github/scripts/update_status.py "${{ inputs.model_nickname }}" "Publishing to Zenodo" "5" "6" + python .github/scripts/upload_model_to_zenodo.py --model_name "${{inputs.model_nickname}}" + python .github/scripts/update_status.py "${{ inputs.model_nickname }}" "Publishing complete" "6" "6"