diff --git a/.github/scripts/s3_client.py b/.github/scripts/s3_client.py index 0b37751..27ea444 100644 --- a/.github/scripts/s3_client.py +++ b/.github/scripts/s3_client.py @@ -4,11 +4,12 @@ from dataclasses import dataclass, field from datetime import timedelta from pathlib import Path -from typing import Iterator +from typing import Iterator, Optional # import requests # type: ignore from loguru import logger # type: ignore from minio import Minio # type: ignore +import minio.error @dataclass @@ -38,12 +39,12 @@ def __post_init__(self): raise Exception("target bucket does not exist: {self.bucket}") logger.debug("Created S3-Client: {}", self) - def bucket_exists(self, bucket): + def bucket_exists(self, bucket) -> bool: return self._client.bucket_exists(bucket) def put( self, path, file_object, length=-1, content_type="application/octet-stream" - ): + ) -> None: # For unknown length (ie without reading file into mem) give `part_size` part_size = 0 if length == -1: @@ -102,7 +103,7 @@ def ls(self, path, only_folders=False, only_files=False) -> Iterator[str]: continue yield Path(obj.object_name).name - def load_file(self, path) -> str: + def load_file(self, path) -> bytes: """Load file from S3""" path = f"{self.prefix}/{path}" try: @@ -166,11 +167,11 @@ def get_status(self, resource_path: str, version: str) -> dict: logger.debug("resource_path: {}, version: {}", resource_path, version) status_path = f"{version_path}/status.json" logger.debug("Getting status using path {}", status_path) - status = self.load_file(status_path) - status = json.loads(status) + status_str = self.load_file(status_path) + status = json.loads(status_str) return status - def put_status(self, resource_path: str, version: str, status: dict): + def put_status(self, resource_path: str, version: str, status: dict) -> None: logger.debug( "Updating status for {}-{}, with {}", resource_path, version, status ) @@ -189,11 +190,14 @@ def get_log(self, resource_path: str, version: str) -> dict: logger.debug("resource_path: {}, version: {}", resource_path, version) path = f"{version_path}/log.json" logger.debug("Getting log using path {}", path) - log = self.load_file(path) - log = json.loads(log) + try: + log_str = self.load_file(path) + log = json.loads(log_str) + except minio.error.S3Error: + log = {} return log - def put_log(self, resource_path: str, version: str, log: dict): + def put_log(self, resource_path: str, version: str, log: dict) -> None: logger.debug("Updating log for {}-{}, with {}", resource_path, version, log) contents = json.dumps(log).encode() file_object = io.BytesIO(contents) @@ -205,6 +209,11 @@ def put_log(self, resource_path: str, version: str, log: dict): content_type="application/json", ) + def get_url_for_file(self, resource_path: str, filename: str, version: Optional[str] = None) -> str: + if version is None: + resource_path, version = version_from_resource_path_or_s3(resource_path, self) + return f"https://{self.host}/{self.bucket}/{self.prefix}/{resource_path}/{version}/files/{filename}" + def create_client() -> Client: """ @@ -230,3 +239,21 @@ def create_client() -> Client: secret_key=secret_access_key, ) return client + + +def version_from_resource_path_or_s3(resource_path, client : Optional[Client] = None) -> tuple[str, str]: + """ + Extract version from resource_path if present + Otherwise try and determine from model folder + """ + parts = resource_path.split("/") + if len(parts) == 2: + resource_path = parts[0] + version = parts[1] + logger.info("Version: {}", version) + else: + if client is None: + client = create_client() + version = client.get_unpublished_version(resource_path) + logger.info("Version detected: {}", version) + return resource_path, version diff --git a/.github/scripts/update_log.py b/.github/scripts/update_log.py index 12e95c4..66f6cb0 100644 --- a/.github/scripts/update_log.py +++ b/.github/scripts/update_log.py @@ -3,15 +3,14 @@ from typing import Optional from loguru import logger -from s3_client import create_client +from s3_client import create_client, version_from_resource_path_or_s3 def create_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser() - parser.add_argument("resource_path", help="Model name") + parser.add_argument("resource_path", help="Resource name") parser.add_argument("category", help="Log category") parser.add_argument("summary", help="Log summary") - parser.add_argument("--version", help="Version") return parser @@ -28,11 +27,10 @@ def main(): resource_path = args.resource_path category = args.category summary = args.summary - version = args.version - add_log_entry(resource_path, category, summary, version=version) + add_log_entry(resource_path, category, summary) -def add_log_entry(resource_path, category, summary, version=None): +def add_log_entry(resource_path, category, summary): timenow = datetime.datetime.now().isoformat() client = create_client() logger.info( @@ -42,11 +40,7 @@ def add_log_entry(resource_path, category, summary, version=None): summary, ) - if version is None: - version = client.get_unpublished_version(resource_path) - logger.info("Version detected: {}", version) - else: - logger.info("Version requested: {}", version) + resource_path, version = version_from_resource_path_or_s3(resource_path) log = client.get_log(resource_path, version) if category not in log: diff --git a/.github/scripts/update_status.py b/.github/scripts/update_status.py index ad1c2d8..9b06bcd 100644 --- a/.github/scripts/update_status.py +++ b/.github/scripts/update_status.py @@ -3,14 +3,12 @@ from typing import Optional from loguru import logger -from s3_client import create_client - +from s3_client import create_client, version_from_resource_path_or_s3 def create_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser() parser.add_argument("resource_path", help="Model name") parser.add_argument("status", help="Status") - parser.add_argument("--version", help="Version") parser.add_argument("--step", help="Step", default=0, type=int) parser.add_argument("--num_steps", help="Status", default=0, type=int) return parser @@ -27,14 +25,13 @@ def get_args(argv: Optional[list] = None): def main(): args = get_args() resource_path = args.resource_path - version = args.version step = args.step num_steps = args.num_steps status = args.status - update_status(resource_path, status, version=version, step=step, num_steps=num_steps) + update_status(resource_path, status, step=step, num_steps=num_steps) -def update_status(resource_path: str, status_text: str, version: Optional[str] = None, step: Optional[int], num_steps: int = 6): +def update_status(resource_path: str, status_text: str, step: Optional[int]=None, num_steps: int = 6): assert step is None or step <= num_steps timenow = datetime.datetime.now().isoformat() client = create_client() @@ -46,11 +43,7 @@ def update_status(resource_path: str, status_text: str, version: Optional[str] = num_steps, ) - if version is None: - version = client.get_unpublished_version(resource_path) - logger.info("Version detected: {}", version) - else: - logger.info("Version requested: {}", version) + resource_path, version = version_from_resource_path_or_s3(resource_path, client) status = client.get_status(resource_path, version) if "messages" not in status: diff --git a/.github/scripts/update_status.sh b/.github/scripts/update_status.sh deleted file mode 100644 index dcc8505..0000000 --- a/.github/scripts/update_status.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/sh -# Updated to use S3 creds: -# S3_HOST -# S3_BUCKET -# S3_FOLDER -# S3_ACCESS_KEY_ID -# S3_SECRET_ACCESS_KEY -# First arg is now resource_path - -FILENAME=status.json - -resource_path=$1 -STATUS=$2 - -if [ -z "$resource_path" ]; then - printf '%s\n' "resource_path is unset or empty" >&2; - exit 1 -fi -if [ -z "$S3_HOST" ]; then - printf '%s\n' "S3_HOST is unset or empty" >&2; - exit 1 -fi -if [ -z "$S3_BUCKET" ]; then - printf '%s\n' "S3_BUCKET is unset or empty" >&2; - exit 1 -fi -if [ -z "$S3_FOLDER" ]; then - printf '%s\n' "S3_FOLDER is unset or empty" >&2; - exit 1 -fi -if [ -z "$S3_ACCESS_KEY_ID" ]; then - printf '%s\n' "S3_ACCESS_KEY_ID is unset or empty" >&2; - exit 1 -fi -if [ -z "$S3_SECRET_ACCESS_KEY" ]; then - printf '%s\n' "S3_SECRET_ACCESS_KEY is unset or empty" >&2; - exit 1 -fi - - -#curl -X PUT -H 'Content-Type: application/json' -d '{"status": "'"$2"'"}' "$1" - -RESOURCE="/${S3_BUCKET}/${S3_FOLDER}/${resource_path}/${FILENAME}" -CONTENT_TYPE="application/json" -DATE=`date -R` -_SIGNATURE="PUT\n\n${CONTENT_TYPE}\n${DATE}\n${RESOURCE}" -SIGNATURE=`echo -en ${_SIGNATURE} | openssl sha1 -hmac ${S3_SECRET_ACCESS_KEY} -binary | base64` - -curl -X PUT -d '{"status": "'"$STATUS"'"}' \ - -H "Host: ${S3_HOST}" \ - -H "Date: ${DATE}" \ - -H "Content-Type: ${CONTENT_TYPE}" \ - -H "Authorization: AWS ${S3_ACCESS_KEY_ID}:${SIGNATURE}" \ - https://${S3_HOST}${RESOURCE} - diff --git a/.github/scripts/upload_model_to_zenodo.py b/.github/scripts/upload_model_to_zenodo.py index aa0ddaf..ce1b24d 100644 --- a/.github/scripts/upload_model_to_zenodo.py +++ b/.github/scripts/upload_model_to_zenodo.py @@ -14,7 +14,7 @@ from loguru import logger # type: ignore from packaging.version import parse as parse_version from ruyaml import YAML # type: ignore -from s3_client import create_client +from s3_client import create_client, version_from_resource_path_or_s3 from update_status import update_status yaml = YAML(typ="safe") @@ -57,8 +57,7 @@ def assert_good_response(response, message, info=None): def create_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser() - parser.add_argument("--resource_path", help="Model name", required=True) - parser.add_argument("--version", help="Version", nargs="?", default=None) + parser.add_argument("resource_path", help="Resource path") return parser @@ -76,12 +75,9 @@ def main(): params = {"access_token": ACCESS_TOKEN} client = create_client() + resource_path, version = version_from_resource_path_or_s3(args.resource_path, client) - # TODO: GET THE CURRENT VERSION - if args.version is None: - version = client.get_unpublished_version(args.resource_path) - - s3_path = f"{args.resource_path}/{version}/files" + s3_path = f"{resource_path}/{version}/files" # List the files at the model URL file_urls = client.get_file_urls(path=s3_path) diff --git a/.github/scripts/validate_format.py b/.github/scripts/validate_format.py index 4eb7b80..b5c0333 100644 --- a/.github/scripts/validate_format.py +++ b/.github/scripts/validate_format.py @@ -6,22 +6,23 @@ from pathlib import Path from typing import Any, Dict, List, Optional, Union -import requests -import typer -from bioimageio.spec import load_raw_resource_description, validate -from bioimageio.spec.model.raw_nodes import Model, WeightsFormat -from bioimageio.spec.rdf.raw_nodes import RDF_Base -from bioimageio.spec.shared import yaml -from bioimageio.spec.shared.raw_nodes import URI, Dependencies +import requests # type: ignore +import typer # type: ignore +from bioimageio.spec import load_raw_resource_description, validate # type: ignore +from bioimageio.spec.model.raw_nodes import Model, WeightsFormat # type: ignore +from bioimageio.spec.rdf.raw_nodes import RDF_Base # type: ignore +from bioimageio.spec.shared import yaml # type: ignore +from bioimageio.spec.shared.raw_nodes import URI, Dependencies # type: ignore from marshmallow import missing from marshmallow.utils import _Missing from packaging.version import Version -from tqdm import tqdm +from tqdm import tqdm # type: ignore +from update_log import add_log_entry +from s3_client import create_client, version_from_resource_path_or_s3 tqdm.__init__ = partialmethod(tqdm.__init__, disable=True) # silence tqdm -from update_log import add_log_entry def set_multiple_gh_actions_outputs(outputs: Dict[str, Union[str, Any]]): @@ -250,7 +251,12 @@ def prepare_dynamic_test_cases(descr_id: str, rd: RDF_Base) -> List[Dict[str, st return validation_cases -def validate_format(descr_id: str, source: str): +def validate_format(descr_id: str): + + client = create_client() + resource_path, version = version_from_resource_path_or_s3(descr_id, client) + source = client.get_url_for_file(resource_path, "rdf.yaml", version=version) + dynamic_test_cases: List[Dict[str, str]] = [] summaries = [validate(source)] diff --git a/.github/workflows/ci_runner.yaml b/.github/workflows/ci_runner.yaml index 6a768dd..479d526 100644 --- a/.github/workflows/ci_runner.yaml +++ b/.github/workflows/ci_runner.yaml @@ -45,7 +45,7 @@ jobs: id: validate run: | python .github/scripts/update_status.py "${{ inputs.resource_path }}" "Starting validation" "2" - python .github/scripts/validate_format.py "${{ inputs.resource_path }}" "${{env.S3_HOST}}/${{env.S3_BUCKET}}/${{env.S3_FOLDER}}/${{inputs.resource_path}}/files/rdf.yaml" + python .github/scripts/validate_format.py "${{ inputs.resource_path }}" - run: | python .github/scripts/update_status.py "${{ inputs.resource_path }}" "Starting additional tests" "3" if: steps.validate.outputs.has_dynamic_test_cases == 'yes' @@ -80,7 +80,7 @@ jobs: run: pip install typer bioimageio.spec - name: dynamic validation shell: bash -l {0} - run: python scripts/test_dynamically.py "${{env.S3_HOST}}/${{env.S3_BUCKET}}/${{env.S3_FOLDER}}/${{inputs.resource_path}}/files/rdf.yaml" ${{ matrix.weight_format }} --create-env-outcome ${{ steps.create_env.outcome }} --${{ contains(inputs.deploy_to, 'gh-pages') && 'no-ignore' || 'ignore' }}-rdf-source-field-in-validation + run: python scripts/test_dynamically.py "https://${{env.S3_HOST}}/${{env.S3_BUCKET}}/${{env.S3_FOLDER}}/${{inputs.resource_path}}/files/rdf.yaml" ${{ matrix.weight_format }} --create-env-outcome ${{ steps.create_env.outcome }} --${{ contains(inputs.deploy_to, 'gh-pages') && 'no-ignore' || 'ignore' }}-rdf-source-field-in-validation timeout-minutes: 60 conclude: diff --git a/.gitignore b/.gitignore index 6f9de21..01e5200 100644 --- a/.gitignore +++ b/.gitignore @@ -17,4 +17,4 @@ dist/ coverage/ tests/__snapshots__/ functions_off/ -.local/ +.local/.env diff --git a/.local/test_logging.sh b/.local/test_logging.sh new file mode 100644 index 0000000..eaa2db1 --- /dev/null +++ b/.local/test_logging.sh @@ -0,0 +1,13 @@ +#!/bin/sh +SCRIPT_DIR="$( dirname -- "$( readlink -f -- "$0"; )"; )" + +set -o allexport +source $SCRIPT_DIR/.env +set +o allexport + +DATE=$( date ) +resource_name="willing-pig" +echo "Testing update_log on $resource_name" +python $SCRIPT_DIR/../.github/scripts/update_log.py $resource_name "ci_testing" "Adding a log at $DATE" + + diff --git a/.local/test_status_update.sh b/.local/test_status_update.sh new file mode 100644 index 0000000..8bbfa53 --- /dev/null +++ b/.local/test_status_update.sh @@ -0,0 +1,13 @@ +#!/bin/sh +SCRIPT_DIR="$( dirname -- "$( readlink -f -- "$0"; )"; )" + +set -o allexport +source $SCRIPT_DIR/.env +set +o allexport + +DATE=$( date ) +resource_name="willing-pig" +echo "Testing update-status on $resource_name" +python $SCRIPT_DIR/../.github/scripts/update_status.py $resource_name "Setting random status from testing at $DATE" + + diff --git a/.local/test_validate_format.sh b/.local/test_validate_format.sh new file mode 100644 index 0000000..978c4d0 --- /dev/null +++ b/.local/test_validate_format.sh @@ -0,0 +1,13 @@ +#!/bin/sh +SCRIPT_DIR="$( dirname -- "$( readlink -f -- "$0"; )"; )" + +set -o allexport +source $SCRIPT_DIR/.env +set +o allexport + +DATE=$( date ) +resource_path="willing-pig" +echo "Testing update_log on $resource_path" +python $SCRIPT_DIR/../.github/scripts/validate_format.py "$resource_path" + + diff --git a/.local/test_zenodo_upload.sh b/.local/test_zenodo_upload.sh new file mode 100644 index 0000000..d082b97 --- /dev/null +++ b/.local/test_zenodo_upload.sh @@ -0,0 +1,8 @@ +#!/bin/sh +SCRIPT_DIR="$( dirname -- "$( readlink -f -- "$0"; )"; )" + +set -o allexport +source $SCRIPT_DIR/.env +set +o allexport + +python $SCRIPT_DIR/../.github/scripts/upload_model_to_zenodo.py "willing-pig"