Skip to content

Commit

Permalink
Merge pull request #11 from will-moore/dont_walk_arrays
Browse files Browse the repository at this point in the history
Dont walk arrays
  • Loading branch information
joshmoore authored Sep 25, 2023
2 parents 15612c4 + 9e9349b commit 90c4dde
Showing 1 changed file with 30 additions and 17 deletions.
47 changes: 30 additions & 17 deletions src/omero_mkngff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def _configure(self, parser: Parser) -> None:

sql = sub.add_parser("sql", help="generate SQL statement")
sql.add_argument(
"--secret", help="DB UUID for protecting SQL statements", default="TBD"
"--secret", help="DB UUID for protecting SQL statements", default="SECRETUUID"
)
sql.add_argument("--zarr_name", help="Nicer name for zarr directory if desired")
sql.add_argument(
Expand All @@ -174,21 +174,20 @@ def setup(self, args: Namespace) -> None:

def sql(self, args: Namespace) -> None:
prefix = self.get_prefix(args)

prefix_path, prefix_name = prefix.rsplit("/", 1)
self.ctx.err(
f"Found prefix {prefix_path} // {prefix_name} for fileset {args.fileset_id}"
)
self.ctx.err(f"Found prefix: {prefix} for fileset: {args.fileset_id}")

symlink_path = Path(args.symlink_target)

if not symlink_path.exists():
self.ctx.die(401, f"Symlink target does not exist: {args.symlink_target}")
return

# create *_SUFFIX/path/to/zarr directory containing symlink to data
# If symlink dir exists, we assume that this fileset has been processed -> skip...
if args.symlink_repo:
self.create_symlink(args.symlink_repo, prefix, symlink_path, args.symlink_target)
symlink_dir = self.get_symlink_dir(args.symlink_repo, prefix, symlink_path)
if os.path.exists(symlink_dir):
self.ctx.err(f"Symlink dir exists at {symlink_dir} - skipping sql output")
return

rows = []
# Need a file to set path/name on pixels table BioFormats uses for setId()
Expand All @@ -198,7 +197,7 @@ def sql(self, args: Namespace) -> None:
row_path = str(row_path).replace(f"{symlink_path.parent}", "")
if str(row_path).startswith("/"):
row_path = str(row_path)[1:] # remove "/" from start
row_full_path = f"{prefix_path}/{prefix_name}_{SUFFIX}/{row_path}"
row_full_path = f"{prefix}_{SUFFIX}/{row_path}"
# pick the first .zattrs file we find, then update to ome.xml if we find it
if setid_target is None and row_name == ".zattrs" or row_name == "METADATA.ome.xml":
setid_target = [row_full_path, row_name]
Expand All @@ -218,13 +217,17 @@ def sql(self, args: Namespace) -> None:
self.ctx.out(
TEMPLATE.format(
OLD_FILESET=args.fileset_id,
PREFIX=f"{prefix_path}/{prefix_name}_{SUFFIX}/",
PREFIX=f"{prefix}_{SUFFIX}/",
ROWS=",\n".join(rows),
REPO=self.get_uuid(args),
UUID=args.secret,
)
)

# Finally create *_SUFFIX/ directory containing symlink to data
if args.symlink_repo:
self.create_symlink(args.symlink_repo, prefix, symlink_path, args.symlink_target)

def symlink(self, args: Namespace) -> None:
prefix = self.get_prefix(args)
symlink_path = Path(args.symlink_target)
Expand Down Expand Up @@ -252,16 +255,19 @@ def get_prefix(self, args):

return prefix

def create_symlink(self, symlink_repo, prefix, symlink_path, symlink_target):

def get_symlink_dir(self, symlink_repo, prefix, symlink_path):
prefix_dir = os.path.join(symlink_repo, prefix)
self.ctx.err(f"Checking for prefix_dir {prefix_dir}")
if not os.path.exists(prefix_dir):
self.ctx.die(402, f"Fileset dir does not exist: {prefix_dir}")
self.ctx.die(402, f"Fileset dir does not exist: {prefix_dir}")
symlink_container = f"{symlink_path.parent}"
if symlink_container.startswith("/"):
symlink_container = symlink_container[1:] # remove "/" from start
symlink_dir = f"{prefix_dir}_{SUFFIX}"
return symlink_dir

def create_symlink(self, symlink_repo, prefix, symlink_path, symlink_target):
symlink_dir = self.get_symlink_dir(symlink_repo, prefix, symlink_path)
self.ctx.err(f"Creating dir at {symlink_dir}")
os.makedirs(symlink_dir, exist_ok=True)

Expand All @@ -270,18 +276,25 @@ def create_symlink(self, symlink_repo, prefix, symlink_path, symlink_target):
self.ctx.err(
f"Creating symlink {symlink_source} -> {symlink_target}"
)
os.symlink(symlink_target, symlink_source, target_is_directory)
# ignore if symlink exists
if not os.path.exists(symlink_source):
os.symlink(symlink_target, symlink_source, target_is_directory)

def walk(self, path: Path) -> Generator[Tuple[Path, str, str], None, None]:
for p in path.iterdir():
if not p.is_dir():
yield (p.parent, p.name, "application/octet-stream")
else:
if (p / ".zarray").exists() or (p / ".zgroup").exists():
is_array = (p / ".zarray").exists()
if is_array or (p / ".zgroup").exists():
yield (p.parent, p.name, "Directory")
yield from self.walk(p)
# If array, don't recursively check sub-dirs
if is_array:
yield (p, ".zarray", "application/octet-stream")
else:
yield from self.walk(p)
else:
# Chunk directory
# Non-zarr directory
continue

def get_uuid(self, args: Namespace) -> str:
Expand Down

0 comments on commit 90c4dde

Please sign in to comment.