Skip to content

Commit

Permalink
Merge pull request #39 from bioimage-io/add_validation
Browse files Browse the repository at this point in the history
Add validation in CI
  • Loading branch information
FynnBe authored Feb 7, 2024
2 parents e8aa4bc + acedb46 commit 8a35a9d
Show file tree
Hide file tree
Showing 9 changed files with 836 additions and 189 deletions.
243 changes: 243 additions & 0 deletions .github/scripts/s3_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
import os
import io
from pathlib import Path
from dataclasses import dataclass, field
from datetime import timedelta
from typing import Iterator
import json

from minio import Minio # type: ignore
# import requests # type: ignore
from loguru import logger # type: ignore


@dataclass
class VersionStatus:
version: str
status: str
path: str


@dataclass
class Client:
host: str
bucket: str
prefix: str
access_key: str = field(repr=False)
secret_key: str = field(repr=False)
_client: Minio = field(init=False, repr=False)

def __post_init__(self):
self._client = Minio(
self.host,
access_key=self.access_key,
secret_key=self.secret_key,
)
found = self.bucket_exists(self.bucket)
if not found:
raise Exception("target bucket does not exist: {self.bucket}")
logger.debug("Created S3-Client: {}", self)

def bucket_exists(self, bucket):
return self._client.bucket_exists(bucket)

def put(
self,
path,
file_object,
length=-1,
content_type="application/octet-stream"):
# For unknown length (ie without reading file into mem) give `part_size`
part_size = 0
if length == -1:
part_size = 10*1024*1024
path = f"{self.prefix}/{path}"
self._client.put_object(
self.bucket,
path,
file_object,
length=length,
part_size=part_size,
# content_type="application/json",
)

def get_file_urls(
self,
path="",
exclude_files=("status.json"),
lifetime=timedelta(hours=1),
) -> list[str]:
"""Checks an S3 'folder' for its list of files"""
logger.debug("Getting file list using {}, at {}", self, path)
path = f"{self.prefix}/{path}"
objects = self._client.list_objects(
self.bucket,
prefix=path,
recursive=True)
file_urls : list[str] = []
for obj in objects:
if obj.is_dir:
continue
filename = Path(obj.object_name).name
if filename in exclude_files:
continue
# Option 1:
url = self._client.get_presigned_url(
"GET",
obj.bucket_name,
obj.object_name,
expires=lifetime,
)
file_urls.append(url)
# Option 2: Work with minio.datatypes.Object directly
return file_urls


def ls(self, path, only_folders=False, only_files=False) -> Iterator[str]:
"""
List folder contents, non-recursive, ala `ls`
but no "." or ".."
"""
# path = str(Path(self.prefix, path))
path = f"{self.prefix}/{path}"
logger.debug("Running ls at path: {}", path)
objects = self._client.list_objects(
self.bucket,
prefix=path,
recursive=False)
for obj in objects:
if only_files and obj.is_dir:
continue
if only_folders and not obj.is_dir:
continue
yield Path(obj.object_name).name


def load_file(self, path):
"""Load file from S3"""
path = f"{self.prefix}/{path}"
try:
response = self._client.get_object(self.bucket, path)
content = response.read()
except Exception:
logger.critical("Failed to get object at path {}", path)
logger.critical("Using client: {}", self)
raise
try:
response.close()
response.release_conn()
except Exception:
pass
return content

# url = self.client.get_presigned_url(
# "GET",
# self.bucket,
# str(Path(self.prefix, path)),
# expires=timedelta(minutes=10),
# )
# response = requests.get(url)
# return response.content

def check_versions(self, model_name: str) -> Iterator[VersionStatus]:
"""
Check model repository for version of model-name.
Returns dictionary of version-status pairs.
"""
logger.debug("Checking versions for {}", model_name)
version_folders = self.ls(f"{model_name}/", only_folders=True)

# For each folder get the contents of status.json
for version in version_folders:

yield self.get_version_status(model_name, version)

def get_unpublished_version(self, model_name:str) -> str:
"""Get the unpublisted version"""
versions = list(self.check_versions(model_name))
if len(versions) == 0:
return "1"
unpublished = [version for version in versions if version.status == "staging"]
if len(unpublished) == 0:
# Only published version exist, start a new one
return f"{len(unpublished) + 1}"
if len(unpublished) > 1:
raise ValueError("Opps! We seem to have > 1 staging versions!!")
return unpublished[0].version

def get_version_status(self, model_name: str, version: str) -> VersionStatus:
status = self.get_status(model_name, version)
status_str = status.get('status', 'status-field-unset')
version_path = f"{model_name}/{version}"
return VersionStatus(version, status_str, version_path)

def get_status(self, model_name: str, version: str) -> dict:
version_path = f"{model_name}/{version}"
logger.debug("model_name: {}, version: {}", model_name, version)
status_path = f"{version_path}/status.json"
logger.debug("Getting status using path {}", status_path)
status = self.load_file(status_path)
status = json.loads(status)
return status

def put_status(self, model_name: str, version: str, status: dict):
logger.debug("Updating status for {}-{}, with {}", model_name, version, status)
contents = json.dumps(status).encode()
file_object = io.BytesIO(contents)

self.put(
f"{model_name}/{version}/status.json",
file_object,
length=len(contents),
content_type="application/json",
)

def get_log(self, model_name: str, version: str) -> dict:
version_path = f"{model_name}/{version}"
logger.debug("model_name: {}, version: {}", model_name, version)
path = f"{version_path}/log.json"
logger.debug("Getting log using path {}", path)
log = self.load_file(path)
log = json.loads(log)
return log

def put_log(self, model_name: str, version: str, log: dict):
logger.debug("Updating log for {}-{}, with {}", model_name, version, log)
contents = json.dumps(log).encode()
file_object = io.BytesIO(contents)

self.put(
f"{model_name}/{version}/log.json",
file_object,
length=len(contents),
content_type="application/json",
)


def create_client() -> Client:
"""
Creates a Minio client using env settings
"""
host = os.getenv("S3_HOST")
root_folder = os.getenv("S3_FOLDER")
bucket = os.getenv("S3_BUCKET")
access_key_id = os.getenv("S3_ACCESS_KEY_ID")
secret_access_key = os.getenv("S3_SECRET_ACCESS_KEY")

assert isinstance(host, str)
assert isinstance(bucket, str)
assert isinstance(root_folder, str)
assert isinstance(access_key_id, str)
assert isinstance(secret_access_key, str)

client = Client(
host=host,
bucket=bucket,
prefix=root_folder,
access_key=access_key_id,
secret_key=secret_access_key,
)
return client


76 changes: 76 additions & 0 deletions .github/scripts/test_dynamically.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import traceback
from functools import partialmethod
from pathlib import Path
from typing import Optional

import typer
from bioimageio.spec import load_raw_resource_description
from bioimageio.spec.shared import yaml
from tqdm import tqdm
from update_log import add_log_entry

tqdm.__init__ = partialmethod(tqdm.__init__, disable=True) # silence tqdm


def test_summary_from_exception(name: str, exception: Exception):
return dict(
name=name,
status="failed",
error=str(exception),
traceback=traceback.format_tb(exception.__traceback__),
)


def test_dynamically(
descr_id: str,
source: str,
weight_format: Optional[str] = typer.Argument(
..., help="weight format to test model with."
),
create_env_outcome: str = "success",
):
if weight_format is None:
# no dynamic tests for non-model resources yet...
return

if create_env_outcome == "success":
try:
from bioimageio.core.resource_tests import test_resource
except Exception as e:
summaries = [test_summary_from_exception(
"import test_resource from test environment", e
)]
else:
try:
rdf = yaml.load(source)
test_kwargs = (
rdf.get("config", {})
.get("bioimageio", {})
.get("test_kwargs", {})
.get(weight_format, {})
)
except Exception as e:
summaries = [test_summary_from_exception("check for test kwargs", e)]
else:
try:
rd = load_raw_resource_description(source)
summaries = test_resource(
rd, weight_format=weight_format, **test_kwargs
)
except Exception as e:
summaries = [test_summary_from_exception("call 'test_resource'", e)]

else:
env_path = Path(f"conda_env_{weight_format}.yaml")
if env_path.exists():
error = "Failed to install conda environment:\n" + env_path.read_text()
else:
error = f"Conda environment yaml file not found: {env_path}"

summaries = [dict(name="install test environment", status="failed", error=error)]

add_log_entry(descr_id, "validation_summaries", summaries)


if __name__ == "__main__":
typer.run(test_dynamically)
48 changes: 23 additions & 25 deletions .github/scripts/unzip_model.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import argparse
import io
import os
import traceback
from typing import Optional
import urllib.request
import zipfile

from minio import Minio # type: ignore

from update_status import update_status
from s3_client import create_client


def create_parser() -> argparse.ArgumentParser:
Expand Down Expand Up @@ -38,22 +37,27 @@ def main():
update_status(model_name, {'status' : err_message})
raise


def unzip_from_url(model_name, model_zip_url):
filename = "model.zip"
s3_host = os.getenv("S3_HOST")
s3_bucket = os.getenv("S3_BUCKET")
s3_root_folder = os.getenv("S3_FOLDER")
s3_access_key_id = os.getenv("S3_ACCESS_KEY_ID")
s3_secret_access_key = os.getenv("S3_SECRET_ACCESS_KEY")

client = Minio(
s3_host,
access_key=s3_access_key_id,
secret_key=s3_secret_access_key,
)
found = client.bucket_exists(s3_bucket)
if not found:
raise Exception("target bucket does not exist: {s3_bucket}")
client = create_client()

versions = client.check_versions(model_name)
if len(versions) == 0:
version = "1"

else:
# TODO handle if a staging version exists vs
# if only published version exist
raise NotImplementedError("Updating/publishing new version not implemented")

# TODO: Need to make sure status is staging
status = client.get_status(model_name, version)
status_str = status.get("status", "missing-status")
if status_str != "staging":
raise ValueError(
"Model {} at version {} is status: {}",
model_name, version, status_str)

# Download the model zip file
remotezip = urllib.request.urlopen(model_zip_url)
Expand All @@ -63,17 +67,11 @@ def unzip_from_url(model_name, model_zip_url):
for filename in zipobj.namelist():
# file_object = io.BytesIO(zipobj)
file_object = zipobj.open(filename)
s3_path = f"{s3_root_folder}/{model_name}/{filename}"
path = f"{model_name}/{version}/{filename}"

# For unknown length (ie without reading file into mem) give `part_size`
client.put_object(
s3_bucket,
s3_path,
client.put(
path,
file_object,
length=-1,
part_size=10*1024*1024,
# length=len(status_message),
# content_type="application/json",
)


Expand Down
Loading

0 comments on commit 8a35a9d

Please sign in to comment.