From 7db8b499708112ea3f250c1bbf82e920c269d8d3 Mon Sep 17 00:00:00 2001 From: William Moore Date: Wed, 20 Sep 2023 08:53:23 +0100 Subject: [PATCH 1/5] Don't walk() into zarray directories --- src/omero_mkngff/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/omero_mkngff/__init__.py b/src/omero_mkngff/__init__.py index e95a93e..ef952fe 100644 --- a/src/omero_mkngff/__init__.py +++ b/src/omero_mkngff/__init__.py @@ -277,9 +277,12 @@ def walk(self, path: Path) -> Generator[Tuple[Path, str, str], None, None]: if not p.is_dir(): yield (p.parent, p.name, "application/octet-stream") else: - if (p / ".zarray").exists() or (p / ".zgroup").exists(): + is_array = (p / ".zarray").exists() + if is_array or (p / ".zgroup").exists(): yield (p.parent, p.name, "Directory") - yield from self.walk(p) + # Don't try to walk zarray - will only contain chunks! + if not is_array: + yield from self.walk(p) else: # Chunk directory continue From 0e4dca393d6821c1b78d4fd0bac35e7d99abe078 Mon Sep 17 00:00:00 2001 From: William Moore Date: Wed, 20 Sep 2023 11:31:03 +0100 Subject: [PATCH 2/5] create_symlink() ignores existing links --- src/omero_mkngff/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/omero_mkngff/__init__.py b/src/omero_mkngff/__init__.py index ef952fe..215f018 100644 --- a/src/omero_mkngff/__init__.py +++ b/src/omero_mkngff/__init__.py @@ -257,7 +257,7 @@ def create_symlink(self, symlink_repo, prefix, symlink_path, symlink_target): prefix_dir = os.path.join(symlink_repo, prefix) self.ctx.err(f"Checking for prefix_dir {prefix_dir}") if not os.path.exists(prefix_dir): - self.ctx.die(402, f"Fileset dir does not exist: {prefix_dir}") + self.ctx.die(402, f"Fileset dir does not exist: {prefix_dir}") symlink_container = f"{symlink_path.parent}" if symlink_container.startswith("/"): symlink_container = symlink_container[1:] # remove "/" from start @@ -270,7 +270,9 @@ def create_symlink(self, symlink_repo, prefix, symlink_path, symlink_target): self.ctx.err( f"Creating symlink {symlink_source} -> {symlink_target}" ) - os.symlink(symlink_target, symlink_source, target_is_directory) + # ignore if symlink exists + if not os.path.exists(symlink_source): + os.symlink(symlink_target, symlink_source, target_is_directory) def walk(self, path: Path) -> Generator[Tuple[Path, str, str], None, None]: for p in path.iterdir(): From a2d0aeeb5195e7374c7cb48e5d989d813a05f982 Mon Sep 17 00:00:00 2001 From: William Moore Date: Thu, 21 Sep 2023 10:30:18 +0100 Subject: [PATCH 3/5] skip sql if symlink_dir exists. Create symlink_dir last --- src/omero_mkngff/__init__.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/omero_mkngff/__init__.py b/src/omero_mkngff/__init__.py index 215f018..9040480 100644 --- a/src/omero_mkngff/__init__.py +++ b/src/omero_mkngff/__init__.py @@ -174,11 +174,7 @@ def setup(self, args: Namespace) -> None: def sql(self, args: Namespace) -> None: prefix = self.get_prefix(args) - - prefix_path, prefix_name = prefix.rsplit("/", 1) - self.ctx.err( - f"Found prefix {prefix_path} // {prefix_name} for fileset {args.fileset_id}" - ) + self.ctx.err(f"Found prefix {prefix} for fileset {args.fileset_id}") symlink_path = Path(args.symlink_target) @@ -186,9 +182,12 @@ def sql(self, args: Namespace) -> None: self.ctx.die(401, f"Symlink target does not exist: {args.symlink_target}") return - # create *_SUFFIX/path/to/zarr directory containing symlink to data + # If symlink dir exists, we assume that this fileset has been processed -> skip... if args.symlink_repo: - self.create_symlink(args.symlink_repo, prefix, symlink_path, args.symlink_target) + symlink_dir = self.get_symlink_dir(args.symlink_repo, prefix, symlink_path) + if os.path.exists(symlink_dir): + self.ctx.err(f"Symlink dir exists at {symlink_dir} - skipping sql output") + return rows = [] # Need a file to set path/name on pixels table BioFormats uses for setId() @@ -198,7 +197,7 @@ def sql(self, args: Namespace) -> None: row_path = str(row_path).replace(f"{symlink_path.parent}", "") if str(row_path).startswith("/"): row_path = str(row_path)[1:] # remove "/" from start - row_full_path = f"{prefix_path}/{prefix_name}_{SUFFIX}/{row_path}" + row_full_path = f"{prefix}_{SUFFIX}/{row_path}" # pick the first .zattrs file we find, then update to ome.xml if we find it if setid_target is None and row_name == ".zattrs" or row_name == "METADATA.ome.xml": setid_target = [row_full_path, row_name] @@ -218,13 +217,17 @@ def sql(self, args: Namespace) -> None: self.ctx.out( TEMPLATE.format( OLD_FILESET=args.fileset_id, - PREFIX=f"{prefix_path}/{prefix_name}_{SUFFIX}/", + PREFIX=f"{prefix}_{SUFFIX}/", ROWS=",\n".join(rows), REPO=self.get_uuid(args), UUID=args.secret, ) ) + # Finally create *_SUFFIX/ directory containing symlink to data + if args.symlink_repo: + self.create_symlink(args.symlink_repo, prefix, symlink_path, args.symlink_target) + def symlink(self, args: Namespace) -> None: prefix = self.get_prefix(args) symlink_path = Path(args.symlink_target) @@ -252,8 +255,7 @@ def get_prefix(self, args): return prefix - def create_symlink(self, symlink_repo, prefix, symlink_path, symlink_target): - + def get_symlink_dir(self, symlink_repo, prefix, symlink_path): prefix_dir = os.path.join(symlink_repo, prefix) self.ctx.err(f"Checking for prefix_dir {prefix_dir}") if not os.path.exists(prefix_dir): @@ -262,6 +264,10 @@ def create_symlink(self, symlink_repo, prefix, symlink_path, symlink_target): if symlink_container.startswith("/"): symlink_container = symlink_container[1:] # remove "/" from start symlink_dir = f"{prefix_dir}_{SUFFIX}" + return symlink_dir + + def create_symlink(self, symlink_repo, prefix, symlink_path, symlink_target): + symlink_dir = self.get_symlink_dir(symlink_repo, prefix, symlink_path) self.ctx.err(f"Creating dir at {symlink_dir}") os.makedirs(symlink_dir, exist_ok=True) From cac303d3c1bdab030ee286533b94fa744461d726 Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 22 Sep 2023 12:04:23 +0100 Subject: [PATCH 4/5] Fix failure to return .zarray files --- src/omero_mkngff/__init__.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/omero_mkngff/__init__.py b/src/omero_mkngff/__init__.py index 9040480..533dbb9 100644 --- a/src/omero_mkngff/__init__.py +++ b/src/omero_mkngff/__init__.py @@ -174,7 +174,7 @@ def setup(self, args: Namespace) -> None: def sql(self, args: Namespace) -> None: prefix = self.get_prefix(args) - self.ctx.err(f"Found prefix {prefix} for fileset {args.fileset_id}") + self.ctx.err(f"Found prefix {prefix} for fileset: {args.fileset_id}") symlink_path = Path(args.symlink_target) @@ -288,11 +288,13 @@ def walk(self, path: Path) -> Generator[Tuple[Path, str, str], None, None]: is_array = (p / ".zarray").exists() if is_array or (p / ".zgroup").exists(): yield (p.parent, p.name, "Directory") - # Don't try to walk zarray - will only contain chunks! - if not is_array: + # If array, don't recursively check sub-dirs + if is_array: + yield (p, ".zarray", "application/octet-stream") + else: yield from self.walk(p) else: - # Chunk directory + # Non-zarr directory continue def get_uuid(self, args: Namespace) -> str: From 08db883c54410265783d5f5a4cf5f6b31d2dd5e3 Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 22 Sep 2023 14:09:00 +0100 Subject: [PATCH 5/5] default secret is SECRETUUID --- src/omero_mkngff/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/omero_mkngff/__init__.py b/src/omero_mkngff/__init__.py index 533dbb9..a8385f7 100644 --- a/src/omero_mkngff/__init__.py +++ b/src/omero_mkngff/__init__.py @@ -147,7 +147,7 @@ def _configure(self, parser: Parser) -> None: sql = sub.add_parser("sql", help="generate SQL statement") sql.add_argument( - "--secret", help="DB UUID for protecting SQL statements", default="TBD" + "--secret", help="DB UUID for protecting SQL statements", default="SECRETUUID" ) sql.add_argument("--zarr_name", help="Nicer name for zarr directory if desired") sql.add_argument( @@ -174,7 +174,7 @@ def setup(self, args: Namespace) -> None: def sql(self, args: Namespace) -> None: prefix = self.get_prefix(args) - self.ctx.err(f"Found prefix {prefix} for fileset: {args.fileset_id}") + self.ctx.err(f"Found prefix: {prefix} for fileset: {args.fileset_id}") symlink_path = Path(args.symlink_target)