Skip to content

Commit

Permalink
Format: Black
Browse files Browse the repository at this point in the history
  • Loading branch information
yuriyzubov committed Jan 16, 2025
1 parent b660f44 commit 641abdf
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 122 deletions.
87 changes: 45 additions & 42 deletions src/tiff_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,79 +9,82 @@
from glob import glob
from tiff_volume import TiffVolume


class TiffStack(TiffVolume):

def __init__(self,
src_path: str,
axes : list[str],
scale : list[float],
translation : list[float],
units : list[str]):
def __init__(
self,
src_path: str,
axes: list[str],
scale: list[float],
translation: list[float],
units: list[str],
):
"""Construct all the necessary attributes for the proper conversion of tiff to OME-NGFF Zarr.
Args:
input_filepath (str): path to source tiff file.
"""
self.src_path = src_path
self.stack_list = natsorted(glob(os.path.join(src_path, '*.tif*')))
probe_image_store = imread(os.path.join(src_path, self.stack_list[0]), aszarr=True)
self.stack_list = natsorted(glob(os.path.join(src_path, "*.tif*")))
probe_image_store = imread(
os.path.join(src_path, self.stack_list[0]), aszarr=True
)
probe_image_arr = da.from_zarr(probe_image_store)

self.dtype = probe_image_arr.dtype
self.shape = [len(self.stack_list)] + list(probe_image_arr.shape)
#metadata
self.shape = [len(self.stack_list)] + list(probe_image_arr.shape)

# metadata
self.zarr_metadata = {
"axes": axes,
"translation": translation,
"scale": scale,
"units": units,
}

def write_tile_slab_to_zarr(self,
chunk_num : int,
zarray : zarr.Array,
src_volume : list):


def write_tile_slab_to_zarr(
self, chunk_num: int, zarray: zarr.Array, src_volume: list
):

# check if the slab is at the array boundary or not
if chunk_num + zarray.chunks[0] > zarray.shape[0]:
slab_thickness = zarray.shape[0] - chunk_num
slab_thickness = zarray.shape[0] - chunk_num
else:
slab_thickness = zarray.chunks[0]
slab_shape = [slab_thickness] + list(zarray.shape[-2:])

slab_shape = [slab_thickness] + list(zarray.shape[-2:])
np_slab = np.empty(slab_shape, zarray.dtype)



# combine tiles into a slab with thickness equal to the chunk size in z direction
for slab_index in np.arange(chunk_num, chunk_num+slab_thickness, 1):
for slab_index in np.arange(chunk_num, chunk_num + slab_thickness, 1):
try:
image_tile = imread(src_volume[slab_index])
except:
print(f'Tiff tile with index {slab_index} is not present in tiff stack.')
print(
f"Tiff tile with index {slab_index} is not present in tiff stack."
)
np_slab[slab_index - chunk_num, :, :] = image_tile

# write a tiff stack slab into a zarr array
zarray[chunk_num : chunk_num+ zarray.chunks[0], :, :] = np_slab



#parallel writing of tiff stack into zarr array
def write_to_zarr(self,
zarray : zarr.Array,
client : Client
):
# write a tiff stack slab into a zarr array
zarray[chunk_num : chunk_num + zarray.chunks[0], :, :] = np_slab

# parallel writing of tiff stack into zarr array
def write_to_zarr(self, zarray: zarr.Array, client: Client):
chunks_list = np.arange(0, zarray.shape[0], zarray.chunks[0])
print(chunks_list)

start = time.time()
fut = client.map(lambda v: self.write_tile_slab_to_zarr(v, zarray, self.stack_list), chunks_list)
print(f'Submitted {len(chunks_list)} tasks to the scheduler in {time.time()- start}s')

fut = client.map(
lambda v: self.write_tile_slab_to_zarr(v, zarray, self.stack_list),
chunks_list,
)
print(
f"Submitted {len(chunks_list)} tasks to the scheduler in {time.time()- start}s"
)

# wait for all the futures to complete
result = wait(fut)
print(f'Completed {len(chunks_list)} tasks in {time.time() - start}s')

return 0

print(f"Completed {len(chunks_list)} tasks in {time.time() - start}s")

return 0
88 changes: 57 additions & 31 deletions src/tiff_to_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,31 @@


@click.command()
@click.option('--src','-s',type=click.Path(exists = True),help='Input tiff file location, or tiff stack directory path.')
@click.option('--dest','-s',type=click.STRING,help='Output .zarr file path.')
@click.option('--num_workers','-w',default=100,type=click.INT,help = "Number of dask workers")
@click.option('--cluster', '-c', default='' ,type=click.STRING, help="Which instance of dask client to use. Local client - 'local', cluster 'lsf'")
@click.option('--zarr_chunks', '-zc', nargs=3, default=(64, 128, 128), type=click.INT, help='Chunk size for (z, y, x) axis order. z-axis is normal to the tiff stack plane. Default (64, 128, 128)')
@click.option(
"--src",
"-s",
type=click.Path(exists=True),
help="Input tiff file location, or tiff stack directory path.",
)
@click.option("--dest", "-s", type=click.STRING, help="Output .zarr file path.")
@click.option(
"--num_workers", "-w", default=100, type=click.INT, help="Number of dask workers"
)
@click.option(
"--cluster",
"-c",
default="",
type=click.STRING,
help="Which instance of dask client to use. Local client - 'local', cluster 'lsf'",
)
@click.option(
"--zarr_chunks",
"-zc",
nargs=3,
default=(64, 128, 128),
type=click.INT,
help="Chunk size for (z, y, x) axis order. z-axis is normal to the tiff stack plane. Default (64, 128, 128)",
)
@click.option(
"--axes",
"-a",
Expand Down Expand Up @@ -49,12 +69,12 @@
help="Metadata unit names. Order matters. \n Example: -t nanometer nanometer nanometer",
)
def cli(src, dest, num_workers, cluster, zarr_chunks, axes, translation, scale, units):

# create a dask client to submit tasks
if cluster == '':
print('Did not specify which instance of the dask client to use!')
if cluster == "":
print("Did not specify which instance of the dask client to use!")
sys.exit(0)
elif cluster == 'lsf':
elif cluster == "lsf":
num_cores = 1
cluster = LSFCluster(
cores=num_cores,
Expand All @@ -63,39 +83,45 @@ def cli(src, dest, num_workers, cluster, zarr_chunks, axes, translation, scale,
ncpus=num_cores,
mem=15 * num_cores,
walltime="48:00",
local_directory = "/scratch/$USER/"
)
elif cluster == 'local':
cluster = LocalCluster()
local_directory="/scratch/$USER/",
)

elif cluster == "local":
cluster = LocalCluster()

client = Client(cluster)
with open(os.path.join(os.getcwd(), "dask_dashboard_link" + ".txt"), "w") as text_file:
with open(
os.path.join(os.getcwd(), "dask_dashboard_link" + ".txt"), "w"
) as text_file:
text_file.write(str(client.dashboard_link))
print(client.dashboard_link)

if os.path.isdir(src):
tiff_volume = TiffStack(src, axes, scale, translation, units)
elif src.endswith('.tif') or src.endswith('.tiff'):
tiff_volume = TiffVolume(src, axes, scale, translation, units)
tiff_volume = TiffStack(src, axes, scale, translation, units)
elif src.endswith(".tif") or src.endswith(".tiff"):
tiff_volume = TiffVolume(src, axes, scale, translation, units)

print(tiff_volume.shape)

z_store = zarr.NestedDirectoryStore(dest)
z_root = zarr.open(store=z_store, mode = 'a')
z_arr = z_root.require_dataset(name = 's0',
shape = tiff_volume.shape,
dtype = tiff_volume.dtype,
chunks = zarr_chunks,
compressor = Zstd(level=6))

#write in parallel to zarr using dask
z_root = zarr.open(store=z_store, mode="a")
z_arr = z_root.require_dataset(
name="s0",
shape=tiff_volume.shape,
dtype=tiff_volume.dtype,
chunks=zarr_chunks,
compressor=Zstd(level=6),
)

# write in parallel to zarr using dask
start_time = time.time()
client.cluster.scale(num_workers)
tiff_volume.write_to_zarr(z_arr, client)
client.cluster.scale(0)
print(time.time() - start_time)
#populate zarr metadata
# populate zarr metadata
tiff_volume.populate_zarr_attrs(z_root)
if __name__ == '__main__':


if __name__ == "__main__":
cli()
Loading

0 comments on commit 641abdf

Please sign in to comment.