Skip to content

Commit

Permalink
Merge pull request #88 from bioimage-io/fix_nickname_refs
Browse files Browse the repository at this point in the history
allow referencing legacy models by their nickname
  • Loading branch information
FynnBe authored Jul 24, 2024
2 parents 8d09c72 + 870e7d5 commit e0aa433
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 44 deletions.
93 changes: 49 additions & 44 deletions bioimageio_collection_backoffice/remote_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -787,10 +787,10 @@ def unpack(self, package_url: str):
break

# set matching id_emoji
rdf["id_emoji"] = self.collection.config.id_parts.get_icon(self.id)
rdf["id_emoji"] = self.collection.config.id_parts.get_icon(self.concept_id)
if rdf["id_emoji"] is None:
self.add_log_entry(
LogEntry(message=f"error: Failed to get icon for {self.id}")
LogEntry(message=f"error: Failed to get icon for {self.concept_id}")
)

if "id" not in rdf:
Expand Down Expand Up @@ -1088,54 +1088,45 @@ def create_collection_entries(
if not versions:
return [], {}

# create an explicit entry only for the latest version
# (all versions are referenced under `versions`)
latest_record_version = versions[0]

with ValidationContext(perform_io_checks=False):
rdf_url = HttpUrl(latest_record_version.rdf_url)

root_url = str(rdf_url.parent)
assert root_url == ((root := latest_record_version.get_file_url("").strip("/"))), (
root_url,
root,
)
parsed_root = urlsplit(root_url)
rdf = latest_record_version.get_rdf()
rdf: Optional[Dict[str, Any]] = None
record_version: Optional[Union[Record, RecordDraft]] = None
concept: Optional[str] = None
id_info: Optional[IdInfo] = None

id_map: Dict[str, IdInfo] = {}
version_infos: List[VersionInfo] = []
for record_version in versions:
for record_version in versions[::-1]: # process oldest to newest
rdf_version_data = record_version.client.load_file(record_version.rdf_path)
if rdf_version_data is None:
logger.error("failed to load {}", latest_record_version.rdf_path)
logger.error("failed to load {}", record_version.rdf_path)
continue

id_info = IdInfo(
source=record_version.rdf_url,
sha256=hashlib.sha256(rdf_version_data).hexdigest(),
)
id_map[record_version.id] = id_info

if record_version.concept_doi is not None:
id_map[record_version.concept_doi] = id_info
id_map[record_version.concept_id] = id_info

if record_version.doi is not None:
id_map[record_version.doi] = id_info

rdf_version = record_version.get_rdf()
if (version_id := rdf_version["id"]) is not None and version_id not in id_map:
if record_version.concept_doi is not None:
id_map[record_version.concept_doi] = id_info

rdf = record_version.get_rdf()
if (version_id := rdf["id"]) is not None and version_id not in id_map:
id_map[version_id] = id_info

if rdf_version["id"].startswith("10.5281/zenodo."):
if rdf["id"].startswith("10.5281/zenodo."):
# legacy models
concept_end = rdf_version["id"].rfind("/")
concept = rdf_version["id"][:concept_end]
concept_end = rdf["id"].rfind("/")
concept = rdf["id"][:concept_end]
else:
concept = rdf_version["id"]
concept = rdf["id"]

if concept not in id_map:
id_map[concept] = id_info
assert concept is not None
id_map[concept] = id_info

version_infos.append(
VersionInfo(
Expand Down Expand Up @@ -1175,23 +1166,23 @@ def create_collection_entries(
test_summary, f"{record_version.folder}test_summary.yaml"
)

assert rdf is not None
assert record_version is not None
assert concept is not None
assert id_info is not None

# create an explicit entry only for the latest version
# (all versions are referenced under `versions`)
# upload 'versions.json' summary
if isinstance(latest_record_version, Record):
if isinstance(record_version, Record):
versions_info = VersionsInfo(
concept_doi=latest_record_version.concept_doi, versions=version_infos
concept_doi=record_version.concept_doi, versions=version_infos[::-1]
)
latest_record_version.concept.client.put_json(
f"{latest_record_version.concept.folder}versions.json",
record_version.concept.client.put_json(
f"{record_version.concept.folder}versions.json",
versions_info.model_dump(mode="json"),
)

if rdf["id"].startswith("10.5281/zenodo."):
# legacy models
concept_end = rdf["id"].rfind("/")
concept = rdf["id"][:concept_end]
else:
concept = rdf["id"]

try:
# legacy nickname
nickname = str(rdf["config"]["bioimageio"]["nickname"])
Expand All @@ -1204,14 +1195,17 @@ def create_collection_entries(
if nickname == concept:
nickname = None

if nickname is not None:
id_map[nickname] = id_info

legacy_download_count = LEGACY_DOWNLOAD_COUNTS.get(nickname or concept, 0)

# TODO: read new download count
download_count = "?" if legacy_download_count == 0 else legacy_download_count

# ingest compatibility reports
links = set(rdf.get("links", []))
compat_reports = latest_record_version.get_all_compatibility_reports()
compat_reports = record_version.get_all_compatibility_reports()

for r in compat_reports:
if r.status == "passed":
Expand All @@ -1226,19 +1220,30 @@ def create_collection_entries(
if not isinstance(thumbnails, dict):
thumbnails = {}

# get parsed root
with ValidationContext(perform_io_checks=False):
rdf_url = HttpUrl(record_version.rdf_url)

root_url = str(rdf_url.parent)
assert root_url == ((root := record_version.get_file_url("").strip("/"))), (
root_url,
root,
)
parsed_root = urlsplit(root_url)

return [
CollectionEntry(
authors=rdf.get("authors", []),
badges=resolve_relative_path(
maybe_swap_with_thumbnail(rdf.get("badges", []), thumbnails),
parsed_root,
),
concept_doi=latest_record_version.concept_doi,
concept_doi=record_version.concept_doi,
covers=resolve_relative_path(
maybe_swap_with_thumbnail(rdf.get("covers", []), thumbnails),
parsed_root,
),
created=latest_record_version.info.created,
created=record_version.info.created,
description=rdf["description"],
download_count=download_count,
download_url=rdf["download_url"] if "download_url" in rdf else None,
Expand All @@ -1251,7 +1256,7 @@ def create_collection_entries(
name=rdf["name"],
nickname_icon=nickname_icon,
nickname=nickname,
rdf_source=AnyUrl(latest_record_version.rdf_url),
rdf_source=AnyUrl(record_version.rdf_url),
root_url=root_url,
tags=rdf.get("tags", []),
training_data=rdf["training_data"] if "training_data" in rdf else None,
Expand Down
3 changes: 3 additions & 0 deletions tests/test_collection_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@ def test_collection_config():

config = CollectionConfig.load()
assert config

# test id parts
assert "🐼" == config.id_parts.get_icon("philosophical-panda")

0 comments on commit e0aa433

Please sign in to comment.