diff --git a/.github/workflows/build_backoffice.yaml b/.github/workflows/build_backoffice.yaml index 0fd3e379..610e1340 100644 --- a/.github/workflows/build_backoffice.yaml +++ b/.github/workflows/build_backoffice.yaml @@ -69,6 +69,17 @@ jobs: S3_FOLDER: ${{vars.S3_TEST_FOLDER}}/ci # testing! secrets: inherit + + test-generate-collection-staged-json: + needs: test-publish + uses: bioimage-io/collection/.github/workflows/generate_collection_json_call.yaml@main + with: + mode: 'staged' + S3_HOST: ${{vars.S3_HOST}} + S3_BUCKET: ${{vars.S3_TEST_BUCKET}} # testing! + S3_FOLDER: ${{vars.S3_TEST_FOLDER}}/ci # testing! + secrets: inherit + test-backup: needs: test-generate-collection-json uses: bioimage-io/collection/.github/workflows/backup_call.yaml@main diff --git a/.github/workflows/generate_collection_json.yaml b/.github/workflows/generate_collection_json.yaml index 0d517ea2..a8fd11ee 100644 --- a/.github/workflows/generate_collection_json.yaml +++ b/.github/workflows/generate_collection_json.yaml @@ -3,6 +3,12 @@ run-name: generate ${{vars.S3_BUCKET}}/${{vars.S3_FOLDER}}/collection.json on: workflow_dispatch: + inputs: + mode: + description: "determines which collection file to generate: 'published' (default) or 'staged'" + required: false + default: published + type: string push: branches: [main] @@ -19,6 +25,7 @@ jobs: call: uses: bioimage-io/collection/.github/workflows/generate_collection_json_call.yaml@main with: + mode: ${{inputs == null && 'published' || inputs.mode}} S3_HOST: ${{vars.S3_HOST}} S3_BUCKET: ${{vars.S3_BUCKET}} S3_FOLDER: ${{vars.S3_FOLDER}} diff --git a/.github/workflows/generate_collection_json_call.yaml b/.github/workflows/generate_collection_json_call.yaml index cefb4eab..3108a816 100644 --- a/.github/workflows/generate_collection_json_call.yaml +++ b/.github/workflows/generate_collection_json_call.yaml @@ -3,6 +3,11 @@ name: generate collection.json call on: workflow_call: inputs: + mode: + description: "determines which collection file to generate: 'published' (default) or 'staged'" + required: false + default: published + type: string S3_HOST: required: true type: string @@ -33,4 +38,4 @@ jobs: python-version: "3.12" cache: "pip" # caching pip dependencies - run: pip install . - - run: backoffice generate_collection_json + - run: backoffice generate_collection_json --mode ${{inputs.mode}} diff --git a/bioimageio_collection_backoffice/_backoffice.py b/bioimageio_collection_backoffice/_backoffice.py index fe74a946..a5a0423d 100644 --- a/bioimageio_collection_backoffice/_backoffice.py +++ b/bioimageio_collection_backoffice/_backoffice.py @@ -50,6 +50,7 @@ def stage(self, resource_id: str, package_url: str): resource = ResourceConcept(self.client, resource_id) staged = resource.stage_new_version(package_url) set_gh_actions_outputs(version=staged.version) + self.generate_collection_json(mode="staged") def validate_format(self, resource_id: str, version: str): """validate a resource version's bioimageio.yaml""" @@ -131,7 +132,7 @@ def publish(self, resource_id: str, version: str, reviewer: str): rv.lock_publish() published: PublishedVersion = rv.publish(reviewer) assert isinstance(published, PublishedVersion) - self.generate_collection_json() + self.generate_collection_json(mode="published") notify_uploader( rv, "was published! 🎉", @@ -144,12 +145,18 @@ def publish(self, resource_id: str, version: str, reviewer: str): def backup(self, destination: ZenodoHost): """backup the whole collection (to zenodo.org)""" _ = backup(self.client, destination) + self.generate_collection_json(mode="published") + self.generate_collection_json(mode="staged") def generate_collection_json( - self, collection_template: Path = Path("collection_template.json") + self, + collection_template: Path = Path("collection_template.json"), + mode: Literal["published", "staged"] = "published", ): """generate the collection.json file --- a summary of the whole collection""" - generate_collection_json(self.client, collection_template=collection_template) + generate_collection_json( + self.client, collection_template=collection_template, mode=mode + ) def forward_emails_to_chat(self): logger.error("disabled") diff --git a/bioimageio_collection_backoffice/generate_collection_json.py b/bioimageio_collection_backoffice/generate_collection_json.py index 8db07df8..a6e2f978 100644 --- a/bioimageio_collection_backoffice/generate_collection_json.py +++ b/bioimageio_collection_backoffice/generate_collection_json.py @@ -11,52 +11,59 @@ from bioimageio.spec.utils import download from loguru import logger from ruyaml import YAML +from typing_extensions import Literal, assert_never from .remote_collection import RemoteCollection -from .remote_resource import PublishedVersion +from .remote_resource import PublishedVersion, StagedVersion from .s3_client import Client yaml = YAML(typ="safe") -COLLECTION_JSON_S3_PATH = "collection.json" - def generate_collection_json( client: Client, collection_template: Path = Path("collection_template.json"), + mode: Literal["published", "staged"] = "published", ) -> None: """generate a json file with an overview of all published resources""" - logger.info("generating {}", COLLECTION_JSON_S3_PATH) + output_file_name: str = ( + "collection.json" if mode == "published" else f"collection_{mode}.json" + ) + logger.info("generating {}", output_file_name) remote_collection = RemoteCollection(client=client) with collection_template.open() as f: collection = json.load(f) collection["config"]["url_root"] = client.get_file_url("").strip("/") - for p in remote_collection.get_all_published_versions(): - collection["collection"].append(create_entry(p)) - + if mode == "published": + for rv in remote_collection.get_all_published_versions(): + collection["collection"].append(create_entry(rv)) + elif mode == "staged": + for rv in remote_collection.get_all_staged_versions(): + collection["collection"].append(create_entry(rv)) + else: + assert_never(mode) coll_descr = build_description( collection, context=ValidationContext(perform_io_checks=False) ) if not isinstance(coll_descr, CollectionDescr): logger.error(coll_descr.validation_summary.format()) - client.put_json(COLLECTION_JSON_S3_PATH, collection) + client.put_json(output_file_name, collection) def create_entry( - p: PublishedVersion, + rv: Union[PublishedVersion, StagedVersion], ) -> Dict[str, Any]: with ValidationContext(perform_io_checks=False): - rdf_url = HttpUrl(p.rdf_url) + rdf_url = HttpUrl(rv.rdf_url) rdf_path = download(rdf_url).path rdf = yaml.load(rdf_path) entry = { - k: rdf[k] + k: rdf.get(k, f"unknown {k}") for k in ( - "authors", "description", "id_emoji", "id", @@ -65,6 +72,8 @@ def create_entry( "type", ) } + entry["authors"] = rdf.get("authors", []) + try: thumbnails = rdf["config"]["bioimageio"]["thumbnails"] except KeyError: @@ -85,7 +94,7 @@ def maybe_swap_with_thumbnail( if isinstance(src, str): clean_name = Path(src).name # remove any leading './' if clean_name in thumbnails: - return p.get_file_url(thumbnails[clean_name]) + return rv.get_file_url(thumbnails[clean_name]) else: return src @@ -101,14 +110,16 @@ def maybe_swap_with_thumbnail( if "icon" in rdf: entry["icon"] = maybe_swap_with_thumbnail(rdf["icon"]) - entry["created"] = p.info.timestamp.isoformat() + entry["created"] = rv.info.timestamp.isoformat() entry["download_count"] = "?" entry["nickname"] = entry["id"] entry["nickname_icon"] = entry["id_emoji"] - entry["entry_source"] = p.rdf_url + entry["entry_source"] = rv.rdf_url entry["entry_sha256"] = get_sha256(rdf_path) entry["rdf_source"] = entry["entry_source"] - entry["version_number"] = p.number - entry["versions"] = list(p.concept.versions.published) - entry["staged_versions"] = [f"staged/{s}" for s in p.concept.versions.staged] + entry["version_number"] = rv.number + entry["versions"] = list(rv.concept.versions.published) + entry["staged_versions"] = [f"staged/{s}" for s in rv.concept.versions.staged] + entry["doi"] = rv.doi if isinstance(rv, PublishedVersion) else None + entry["concept_doi"] = rv.concept.doi return entry