From 7c629ad70a0d758e1814d586f124d45aeda70ddc Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 28 Feb 2024 14:12:47 +0100 Subject: [PATCH 1/6] log gh outputs --- backoffice/utils/_gh.py | 40 +++++++++++++++++++++++++++++++++++ backoffice/validate_format.py | 34 ++--------------------------- 2 files changed, 42 insertions(+), 32 deletions(-) create mode 100644 backoffice/utils/_gh.py diff --git a/backoffice/utils/_gh.py b/backoffice/utils/_gh.py new file mode 100644 index 00000000..9369b74a --- /dev/null +++ b/backoffice/utils/_gh.py @@ -0,0 +1,40 @@ +import json +import os +import uuid +from io import TextIOWrapper +from typing import Any, Union + +from loguru import logger + + +def _set_gh_actions_output_impl(msg: Union[str, uuid.UUID], fh: TextIOWrapper): + logger.info("GH actions output: {}", msg) + print(msg, file=fh) + + +def set_gh_actions_output(name: str, output: Union[str, Any]): + """set output of a github actions workflow step calling this script""" + if isinstance(output, bool): + output = "yes" if output else "no" + + if not isinstance(output, str): + output = json.dumps(output, sort_keys=True) + + if "GITHUB_OUTPUT" not in os.environ: + logger.error("GITHUB_OUTPUT env var not defined; output would be: {}", output) + return + + if "\n" in output: + with open(os.environ["GITHUB_OUTPUT"], "a") as fh: + delimiter = uuid.uuid1() + _set_gh_actions_output_impl(f"{name}<<{delimiter}", fh) + _set_gh_actions_output_impl(output, fh) + _set_gh_actions_output_impl(delimiter, fh) + else: + with open(os.environ["GITHUB_OUTPUT"], "a") as fh: + _set_gh_actions_output_impl(f"{name}={output}", fh) + + +def set_multiple_gh_actions_outputs(outputs: dict[str, Union[str, Any]]): + for name, out in outputs.items(): + set_gh_actions_output(name, out) diff --git a/backoffice/validate_format.py b/backoffice/validate_format.py index 4085b070..90429dc6 100644 --- a/backoffice/validate_format.py +++ b/backoffice/validate_format.py @@ -1,9 +1,6 @@ -import json -import os -import uuid import warnings from pathlib import Path -from typing import Any, Literal, Optional, TypedDict, Union, cast +from typing import Literal, Optional, TypedDict, Union, cast import pooch from bioimageio.spec import InvalidDescr, ResourceDescr, load_description @@ -13,6 +10,7 @@ from ruyaml import YAML from typing_extensions import assert_never +from backoffice.utils._gh import set_multiple_gh_actions_outputs from backoffice.utils.remote_resource import StagedVersion yaml = YAML(typ="safe") @@ -29,34 +27,6 @@ ] -def set_multiple_gh_actions_outputs(outputs: dict[str, Union[str, Any]]): - for name, out in outputs.items(): - set_gh_actions_output(name, out) - - -def set_gh_actions_output(name: str, output: Union[str, Any]): - """set output of a github actions workflow step calling this script""" - if isinstance(output, bool): - output = "yes" if output else "no" - - if not isinstance(output, str): - output = json.dumps(output, sort_keys=True) - - if "GITHUB_OUTPUT" not in os.environ: - print(output) - return - - if "\n" in output: - with open(os.environ["GITHUB_OUTPUT"], "a") as fh: - delimiter = uuid.uuid1() - print(f"{name}<<{delimiter}", file=fh) - print(output, file=fh) - print(delimiter, file=fh) - else: - with open(os.environ["GITHUB_OUTPUT"], "a") as fh: - print(f"{name}={output}", file=fh) - - class PipDeps(TypedDict): pip: list[str] From 2f415b3653947a2649753c19b8a3e8b6aeb3f3e2 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 28 Feb 2024 23:54:26 +0100 Subject: [PATCH 2/6] respect and test sem_ver --- .gitignore | 1 + backoffice/utils/remote_resource.py | 47 ++++++++++++++++++++++------- backoffice/utils/s3_structure.py | 11 ++++++- backoffice/validate_format.py | 19 ++++++++++++ setup.py | 4 +-- 5 files changed, 68 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index 009f1d37..55e686b7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ __pycache__/ .env +*.egg-info/ docs/ diff --git a/backoffice/utils/remote_resource.py b/backoffice/utils/remote_resource.py index af2b9e62..594ad99c 100644 --- a/backoffice/utils/remote_resource.py +++ b/backoffice/utils/remote_resource.py @@ -14,7 +14,15 @@ from typing_extensions import assert_never from .s3_client import Client -from .s3_structure import Details, Log, LogCategory, Status, StatusName +from .s3_structure import ( + Details, + Log, + LogCategory, + Status, + StatusName, + VersionDetails, + Versions, +) yaml = YAML(typ="safe") @@ -29,6 +37,19 @@ class RemoteResource: id: str """resource identifier""" + @property + def versions_path(self) -> str: + return f"{self.id}/versions.json" + + def get_published_versions(self) -> Versions: + versions_data = self.client.load_file(self.versions_path) + if versions_data is None: + versions: Versions = {} + else: + versions = json.loads(versions_data) + assert isinstance(versions, dict) + return versions + def _get_latest_stage_nr(self) -> Optional[int]: staged = list(map(int, self.client.ls(f"{self.id}/staged/", only_folders=True))) if not staged: @@ -226,32 +247,36 @@ def await_review(self): def publish(self) -> PublishedVersion: """publish this staged version candidate as the next resource version""" # get next version and update versions.json - versions_path = f"{self.id}/versions.json" - versions_data = self.client.load_file(versions_path) - if versions_data is None: - versions: dict[str, Any] = {} + versions = self.get_published_versions() + if not versions: next_version = 1 else: - versions = json.loads(versions_data) next_version = max(map(int, versions)) + 1 logger.debug("Publishing {} as version {}", self.folder, next_version) assert next_version not in versions, (next_version, versions) - versions[str(next_version)] = {} + # load rdf + staged_rdf_path = f"{self.folder}files/rdf.yaml" + rdf_data = self.client.load_file(staged_rdf_path) + rdf = yaml.load(rdf_data) + + sem_ver = rdf.get("sem_ver") + if sem_ver is not None and sem_ver in {v["sem_ver"] for v in versions.values()}: + raise RuntimeError(f"Trying to publish {sem_ver} again!") + + versions[next_version] = VersionDetails(sem_ver=sem_ver) + updated_versions_data = json.dumps(versions).encode() self.client.put( - versions_path, + self.versions_path, io.BytesIO(updated_versions_data), length=len(updated_versions_data), ) ret = PublishedVersion(client=self.client, id=self.id, version=next_version) # move rdf.yaml and set version in it - staged_rdf_path = f"{self.folder}files/rdf.yaml" - rdf_data = self.client.load_file(staged_rdf_path) - rdf = yaml.load(rdf_data) rdf["version"] = ret.version stream = io.StringIO() yaml.dump(rdf, stream) diff --git a/backoffice/utils/s3_structure.py b/backoffice/utils/s3_structure.py index cc1d1cd4..0d33672a 100644 --- a/backoffice/utils/s3_structure.py +++ b/backoffice/utils/s3_structure.py @@ -1,10 +1,19 @@ """ Descriptions of +- `/versions.json` `Versions` - `//log.json` → `Log` - `//details.json` → `Details` """ -from typing import Any, Literal, TypedDict +from typing import Any, Literal, Optional, TypedDict + + +class VersionDetails(TypedDict): + sem_ver: Optional[str] + + +Versions = dict[int, VersionDetails] +"""info about published resource versions at `/versions.json`""" LogCategory = Literal[ "bioimageio.spec", "bioimageio.core", "ilastik", "deepimagej", "icy", "biapy" diff --git a/backoffice/validate_format.py b/backoffice/validate_format.py index 90429dc6..87593950 100644 --- a/backoffice/validate_format.py +++ b/backoffice/validate_format.py @@ -6,6 +6,7 @@ from bioimageio.spec import InvalidDescr, ResourceDescr, load_description from bioimageio.spec.model import v0_4, v0_5 from bioimageio.spec.model.v0_5 import WeightsFormat +from bioimageio.spec.summary import ErrorEntry, ValidationDetail from packaging.version import Version from ruyaml import YAML from typing_extensions import assert_never @@ -232,6 +233,24 @@ def validate_format(staged: StagedVersion): rd = rd_latest rd.validation_summary.status = "passed" # passed in 'discover' mode + if not isinstance(rd, InvalidDescr) and rd.sem_ver is not None: + published = staged.get_published_versions() + if rd.sem_ver in {v["sem_ver"] for v in published.values()}: + error = ErrorEntry( + loc=("sem_ver",), + msg=f"Trying to publish semantic version {rd.sem_ver} again!", + type="error", + ) + else: + error = None + + rd.validation_summary.add_detail( + ValidationDetail( + name="Enforce that RDF has unpublished semantic version (field `sem_ver`)", + status="passed" if error is None else "failed", + errors=[error], + ) + ) summary = rd.validation_summary.model_dump(mode="json") staged.add_log_entry("bioimageio.spec", summary) diff --git a/setup.py b/setup.py index 3a385f70..11bf74f8 100644 --- a/setup.py +++ b/setup.py @@ -31,8 +31,8 @@ ], packages=find_packages(exclude=["tests"]), install_requires=[ - "bioimageio.core @ git+https://github.com/bioimage-io/core-bioimage-io-python@3a7875b5debc2d52b2fc87f6579afe217e1c7280", # TODO: change to released version - "bioimageio.spec @ git+https://github.com/bioimage-io/spec-bioimage-io@539a09d0a35144a5928f8a58433c76ff1f2c3bcb", # TODO: change to released version + "bioimageio.core @ git+https://github.com/bioimage-io/core-bioimage-io-python@40aae1005e6caea4a3ad6104f54deba8582d0f87", # TODO: change to released version + "bioimageio.spec @ git+https://github.com/bioimage-io/spec-bioimage-io@b9714ba8d8214c67e9377a8cd24f6ba65ed1b981", # TODO: change to released version "fire", "loguru", "minio==7.2.3", From 5aefd541357bfac063ed2e02a49e392e8f47ba27 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 29 Feb 2024 00:07:46 +0100 Subject: [PATCH 3/6] fix error entries --- backoffice/validate_format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backoffice/validate_format.py b/backoffice/validate_format.py index 87593950..b11da623 100644 --- a/backoffice/validate_format.py +++ b/backoffice/validate_format.py @@ -248,7 +248,7 @@ def validate_format(staged: StagedVersion): ValidationDetail( name="Enforce that RDF has unpublished semantic version (field `sem_ver`)", status="passed" if error is None else "failed", - errors=[error], + errors=[] if error is None else [error], ) ) From bbc0e8278520428c1eb78ae4a90abb7802ff116d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 29 Feb 2024 00:08:57 +0100 Subject: [PATCH 4/6] fix sem_ver comparison --- backoffice/validate_format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backoffice/validate_format.py b/backoffice/validate_format.py index b11da623..a3e686ee 100644 --- a/backoffice/validate_format.py +++ b/backoffice/validate_format.py @@ -235,7 +235,7 @@ def validate_format(staged: StagedVersion): rd.validation_summary.status = "passed" # passed in 'discover' mode if not isinstance(rd, InvalidDescr) and rd.sem_ver is not None: published = staged.get_published_versions() - if rd.sem_ver in {v["sem_ver"] for v in published.values()}: + if str(rd.sem_ver) in {v["sem_ver"] for v in published.values()}: error = ErrorEntry( loc=("sem_ver",), msg=f"Trying to publish semantic version {rd.sem_ver} again!", From 04e4cf1f679908a9e8c9226975d01a3aafd49865 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 29 Feb 2024 00:23:36 +0100 Subject: [PATCH 5/6] log json uploads --- backoffice/utils/s3_client.py | 7 ++++++- setup.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/backoffice/utils/s3_client.py b/backoffice/utils/s3_client.py index 7ece42b5..e29ff501 100644 --- a/backoffice/utils/s3_client.py +++ b/backoffice/utils/s3_client.py @@ -74,8 +74,13 @@ def put( def put_json(self, path: str, json_value: Any): """convenience method to upload a json file from a json serializable value""" - data = json.dumps(json_value).encode() + data_str = json.dumps(json_value) + data = data_str.encode() self.put(path, io.BytesIO(data), length=len(data)) + data_log = data_str[:1000] + if len(data_log) < len(data_str): + data_log += "..." + logger.debug("Uploaded {}", data_log) def get_file_urls( self, diff --git a/setup.py b/setup.py index 11bf74f8..c6cb11e3 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ "bioimageio.spec @ git+https://github.com/bioimage-io/spec-bioimage-io@b9714ba8d8214c67e9377a8cd24f6ba65ed1b981", # TODO: change to released version "fire", "loguru", - "minio==7.2.3", + "minio==7.2.4", "ruyaml", "tqdm", ], From 2914f06e8465b6938723de75afb698cce97f34bd Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 29 Feb 2024 11:06:35 +0100 Subject: [PATCH 6/6] clean up version_nr vs sem_ver --- backoffice/utils/remote_resource.py | 4 ++-- backoffice/utils/s3_structure.py | 5 ++++- backoffice/validate_format.py | 10 +++++----- setup.py | 4 ++-- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/backoffice/utils/remote_resource.py b/backoffice/utils/remote_resource.py index 594ad99c..cd162989 100644 --- a/backoffice/utils/remote_resource.py +++ b/backoffice/utils/remote_resource.py @@ -253,7 +253,7 @@ def publish(self) -> PublishedVersion: else: next_version = max(map(int, versions)) + 1 - logger.debug("Publishing {} as version {}", self.folder, next_version) + logger.debug("Publishing {} as version nr {}", self.folder, next_version) assert next_version not in versions, (next_version, versions) @@ -262,7 +262,7 @@ def publish(self) -> PublishedVersion: rdf_data = self.client.load_file(staged_rdf_path) rdf = yaml.load(rdf_data) - sem_ver = rdf.get("sem_ver") + sem_ver = rdf.get("version") if sem_ver is not None and sem_ver in {v["sem_ver"] for v in versions.values()}: raise RuntimeError(f"Trying to publish {sem_ver} again!") diff --git a/backoffice/utils/s3_structure.py b/backoffice/utils/s3_structure.py index 0d33672a..358c22fd 100644 --- a/backoffice/utils/s3_structure.py +++ b/backoffice/utils/s3_structure.py @@ -12,7 +12,10 @@ class VersionDetails(TypedDict): sem_ver: Optional[str] -Versions = dict[int, VersionDetails] +VersionNr = int +"""the n-th published version""" + +Versions = dict[VersionNr, VersionDetails] """info about published resource versions at `/versions.json`""" LogCategory = Literal[ diff --git a/backoffice/validate_format.py b/backoffice/validate_format.py index a3e686ee..92758497 100644 --- a/backoffice/validate_format.py +++ b/backoffice/validate_format.py @@ -233,12 +233,12 @@ def validate_format(staged: StagedVersion): rd = rd_latest rd.validation_summary.status = "passed" # passed in 'discover' mode - if not isinstance(rd, InvalidDescr) and rd.sem_ver is not None: + if not isinstance(rd, InvalidDescr) and rd.version is not None: published = staged.get_published_versions() - if str(rd.sem_ver) in {v["sem_ver"] for v in published.values()}: + if str(rd.version) in {v["sem_ver"] for v in published.values()}: error = ErrorEntry( - loc=("sem_ver",), - msg=f"Trying to publish semantic version {rd.sem_ver} again!", + loc=("version",), + msg=f"Trying to publish version {rd.version} again!", type="error", ) else: @@ -246,7 +246,7 @@ def validate_format(staged: StagedVersion): rd.validation_summary.add_detail( ValidationDetail( - name="Enforce that RDF has unpublished semantic version (field `sem_ver`)", + name="Enforce that RDF has unpublished semantic `version`", status="passed" if error is None else "failed", errors=[] if error is None else [error], ) diff --git a/setup.py b/setup.py index c6cb11e3..9355b89a 100644 --- a/setup.py +++ b/setup.py @@ -31,8 +31,8 @@ ], packages=find_packages(exclude=["tests"]), install_requires=[ - "bioimageio.core @ git+https://github.com/bioimage-io/core-bioimage-io-python@40aae1005e6caea4a3ad6104f54deba8582d0f87", # TODO: change to released version - "bioimageio.spec @ git+https://github.com/bioimage-io/spec-bioimage-io@b9714ba8d8214c67e9377a8cd24f6ba65ed1b981", # TODO: change to released version + "bioimageio.core @ git+https://github.com/bioimage-io/core-bioimage-io-python@569666b426cb089503f2ee3bb5651e124d8740e8", # TODO: change to released version + "bioimageio.spec @ git+https://github.com/bioimage-io/spec-bioimage-io@06e6b0f77c696e7c5192fa1340482f97b2df98fc", # TODO: change to released version "fire", "loguru", "minio==7.2.4",