From 2fb9725c1c870a64d4387633c607c7864f296742 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Tue, 30 Jul 2024 12:21:16 +0000 Subject: [PATCH] add delete dataset --- src/anemoi/registry/commands/datasets.py | 8 +++-- src/anemoi/registry/commands/list.py | 39 +++++++++++++++++++++++- src/anemoi/registry/entry/dataset.py | 19 +++++++++++- 3 files changed, 62 insertions(+), 4 deletions(-) diff --git a/src/anemoi/registry/commands/datasets.py b/src/anemoi/registry/commands/datasets.py index 1749eb2..dea69b0 100644 --- a/src/anemoi/registry/commands/datasets.py +++ b/src/anemoi/registry/commands/datasets.py @@ -39,7 +39,6 @@ def add_arguments(self, command_parser): action="store_true", ) command_parser.add_argument("--url", help="Print the URL of the dataset.", action="store_true") - # command_parser.add_argument("--delete", help=f"Delete the dataset from the catalogue and from any other location", action="store_true") command_parser.add_argument("--set-status", help="Set the status to the dataset.", metavar="STATUS") command_parser.add_argument( "--set-recipe", help="Set the recipe file to [re-]build the dataset.", metavar="FILE" @@ -64,6 +63,11 @@ def add_arguments(self, command_parser): ) command_parser.add_argument("--remove-location", help="Platform name to remove.", metavar="PLATFORM") + command_parser.add_argument( + "--DELETE", + help="Delete the dataset when removing a location. Requires --remove-location.", + action="store_true", + ) def _run(self, entry, args): if entry is None: @@ -92,7 +96,7 @@ def _run(self, entry, args): self.process_task(entry, args, "register") self.process_task(entry, args, "set_recipe") self.process_task(entry, args, "set_status") - self.process_task(entry, args, "remove_location") + self.process_task(entry, args, "remove_location", delete=args.DELETE) if args.add_local: entry.add_location(args.add_local, path=args.NAME_OR_PATH) diff --git a/src/anemoi/registry/commands/list.py b/src/anemoi/registry/commands/list.py index 935741b..8f02a90 100644 --- a/src/anemoi/registry/commands/list.py +++ b/src/anemoi/registry/commands/list.py @@ -43,14 +43,17 @@ def add_arguments(self, command_parser): experiment.add_argument( "filter", nargs="*", help="Filter experiments with a list of key=value.", metavar="key=value" ) + experiment.add_argument("--json", help="Output as JSON", action="store_true") checkpoint = sub_parser.add_parser("weights", help="List weights in the catalogue.") checkpoint.add_argument( "filter", nargs="*", help="Filter experiments with a list of key=value.", metavar="key=value" ) + checkpoint.add_argument("--json", help="Output as JSON", action="store_true") dataset = sub_parser.add_parser("datasets", help="List datasets in the catalogue.") dataset.add_argument("filter", nargs="*", help="Filter datasets with a list of key=value.", metavar="key=value") + dataset.add_argument("--json", help="Output as JSON", action="store_true") # tasks = sub_parser.add_parser("tasks") # tasks.add_argument("filter", nargs="*") @@ -67,7 +70,41 @@ def _run_default(self, args): collection = args.subcommand request = list_to_dict(args.filter) payload = RestItemList(collection).get(params=request) - print(json_pretty_dump(payload)) + if args.json: + print(json_pretty_dump(payload)) + else: + for v in payload: + print(v["name"]) + + def run_datasets(self, args): + collection = args.subcommand + request = list_to_dict(args.filter) + payload = RestItemList(collection).get(params=request) + if args.json: + print(json_pretty_dump(payload)) + else: + for v in payload: + print(v["name"]) + + def run_weights(self, args): + collection = args.subcommand + request = list_to_dict(args.filter) + payload = RestItemList(collection).get(params=request) + if args.json: + print(json_pretty_dump(payload)) + else: + for v in payload: + print(v["uuid"]) + + def run_experiments(self, args): + collection = args.subcommand + request = list_to_dict(args.filter) + payload = RestItemList(collection).get(params=request) + if args.json: + print(json_pretty_dump(payload)) + else: + for v in payload: + print(v["expver"]) def run_tasks(self, args): collection = "tasks" diff --git a/src/anemoi/registry/entry/dataset.py b/src/anemoi/registry/entry/dataset.py index 6ca9d8c..ae4b7e3 100644 --- a/src/anemoi/registry/entry/dataset.py +++ b/src/anemoi/registry/entry/dataset.py @@ -53,9 +53,26 @@ def add_location(self, platform, path): self.rest_item.patch([{"op": "add", "path": f"/locations/{platform}", "value": {"path": path}}]) return path - def remove_location(self, platform): + def remove_location(self, platform, *, delete): + if delete: + self.delete(platform) self.rest_item.patch([{"op": "remove", "path": f"/locations/{platform}"}]) + def delete(self, platform): + if not config().get("allow_delete"): + raise ValueError("Delete not allowed by configuration") + + path = self.record.get("locations", {}).get(platform, {}).get("path") + if path is None: + LOG.warning(f"Nothing to delete for {self.key} on platform {platform}") + return + if path.startswith("s3://"): + from anemoi.utils.s3 import delete + + return delete(path + "/") + else: + LOG.warning(f"Location is not an s3 path: {path}. Delete not implemented.") + def upload(self, source, target, platform="unknown", resume=True): LOG.info(f"Uploading from {source} to {target} ") assert target.startswith("s3://"), target