diff --git a/src/conversion_lib.py b/src/conversion_lib.py deleted file mode 100644 index 109f562..0000000 --- a/src/conversion_lib.py +++ /dev/null @@ -1,172 +0,0 @@ -"""This module contains the Conversion class used to convert tiff file to OME-NGFF Zarr (contains chunked array).""" - -import dask.array as da -import numpy as np -from tifffile import imwrite, TiffFile -import zarr -import sys -import logging -from numcodecs import Zstd -from typing import List - - -class Conversion: - """A class used to convert tiff file to zarr array with properly formatted OME-NGFF metadata.""" - - def __init__( - self, - input_filepath: str, - output_filepath: str, - axes: List[str], - translation: List[float], - scale: List[float], - units: List[str], - ): - """Construct all the necessary attributes for the proper conversion of tiff to OME-NGFF Zarr. - - Args: - input_filepath (str): path to source tiff file - output_filepath (str): path to the output zarr file. - axes (List[str]): list of axes to store in metadata. Order matters. - translation (List[float]): list of coordinates where the top left corner of the output zarr array should be located when displayed in neuroglancer. Order matters - scale (List[float]): physical size of the voxel (in units). Order matters. - units (List[str]): physical dimension units that define in which units the scale attribute is measured. Order matters. - """ - self.input_filepath = input_filepath - self.output_filepath = output_filepath - self.zarr_metadata = { - "axes": axes, - "translation": translation, - "scale": scale, - "units": units, - } - - def read_tiff(self): - """Read tiff file and store array, axes and metadata in a dictionary. - - Returns: - [numpy.array, [str], dict]: returns tiff image as numpy array object, axis naming and order, and imagej style metadata. - """ - try: - with TiffFile(self.input_filepath) as tiff: - volume_numpy = tiff.asarray() - # volume_dask = da.from_array(tiff.asarray(), chunks=chunks) - axes = tiff.series[0].axes - imagej_metadata = tiff.imagej_metadata - except IOError as e: - logging.error( - "Failed to open {0}. Error reason: {1}".format(self.input_filepath, e) - ) - sys.exit(1) - return [volume_numpy, axes, imagej_metadata] - - def dask_to_zarray(self, tiff_data: List, chunks: List): - """Store dask array in a zarr file. - - Args: - tiff_data (List): a list containing tiff image as numpy array object, axis naming and order, and imagej style metadata. - chunks (List): what chunk size to use for output zarr array - """ - # create root group - root = zarr.group( - store=zarr.NestedDirectoryStore(self.output_filepath), overwrite=True - ) - # create zarr array - # zarr_data = root.create_dataset('data', shape=tiff_data[0].shape, chunks=chunks, dtype=tiff_data[0].dtype) - dask_arr = da.from_array(tiff_data[0], chunks=chunks) - zarr_data = zarr.create( - store=zarr.NestedDirectoryStore(self.output_filepath), - path="s0", - shape=dask_arr.shape, - chunks=chunks, - dtype=dask_arr.dtype, - compressor=Zstd(level=6), - ) - - # store .tiff data in a .zarr file - da.store(dask_arr, zarr_data) - - # add metadata to zarr .attrs. - self.populate_zarr_attrs( - root, tiff_data[1], self.zarr_metadata, zarr_data.name.lstrip("/") - ) - - def numpy_to_zarray(self, tiff_data, chunks): - """Store numpy array in a zarr file. - - Args: - tiff_data (List): a list containing tiff image as numpy array object, axis naming and order, and imagej style metadata. - chunks (List): what chunk size to use for output zarr array - """ - # create root group - root = zarr.group( - store=zarr.NestedDirectoryStore(self.output_filepath), overwrite=True - ) - # create zarr array - zarr_data = zarr.create( - store=zarr.NestedDirectoryStore(self.output_filepath), - path="s0", - shape=tiff_data[0].shape, - chunks=chunks, - dtype=tiff_data[0].dtype, - ) - zarr_data[:] = tiff_data[0] - - self.populate_zarr_attrs( - root, tiff_data[1], self.zarr_metadata, zarr_data.name.lstrip("/") - ) - - def populate_zarr_attrs(self, root, axes, zarr_metadata, data_address): - """Add selected tiff metadata to zarr attributes file (.zattrs). - - Args: - root (zarr.Group): root group of the output zarr array - axes (List): axes naming order (z,y,x or x,y,z) - zarr_metadata (): combined zarr metadata from input translation, scale, axes names and units - data_address (str): path to array - """ - tiff_axes = [*axes] - - # json template for a multiscale structure - multscale_dict = { - "multiscales": [ - { - "axes": [], - "coordinateTransformations": [ - {"scale": [1.0, 1.0, 1.0], "type": "scale"} - ], - "datasets": [ - { - "coordinateTransformations": [ - {"scale": [], "type": "scale"}, - {"translation": [], "type": "translation"}, - ], - "path": "unknown", - } - ], - "name": "unknown", - "version": "0.4", - } - ] - } - - # write metadata info into a multiscale scheme - for axis, scale, offset, unit in zip( - zarr_metadata["axes"], - zarr_metadata["scale"], - zarr_metadata["translation"], - zarr_metadata["units"], - ): - multscale_dict["multiscales"][0]["axes"].append( - {"name": axis, "type": "space", "unit": unit} - ) - multscale_dict["multiscales"][0]["datasets"][0][ - "coordinateTransformations" - ][0]["scale"].append(scale) - multscale_dict["multiscales"][0]["datasets"][0][ - "coordinateTransformations" - ][1]["translation"].append(offset) - multscale_dict["multiscales"][0]["datasets"][0]["path"] = data_address ## - - # add multiscale template to .attrs - root.attrs["multiscales"] = multscale_dict["multiscales"] diff --git a/src/n5_attrs_template.json b/src/n5_attrs_template.json deleted file mode 100644 index 010ff36..0000000 --- a/src/n5_attrs_template.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "axes": ["x", "y", "z"], - "multiscales": [{"datasets": [], "name": "em/fibsem-uint8"}], - "pixelResolution": {"dimensions": [0.0, 0.0, 0.0], "unit": "nm"}, - "ordering": "C", - "scales": [], - "units": ["nm", "nm", "nm"], - "translate": [0, 0, 0] -} \ No newline at end of file diff --git a/src/tif_to_zarr.py b/src/tif_to_zarr.py deleted file mode 100644 index c3464e6..0000000 --- a/src/tif_to_zarr.py +++ /dev/null @@ -1,71 +0,0 @@ -"""This module contains all necessary code to run tiff to zarr conversion script in a command line interface mode, using Click python library.""" - -import conversion_lib as conv -import click -import numpy as np - - -@click.command() -@click.option( - "--src", "-s", type=click.Path(exists=True), help="Input .tiff file location." -) -@click.option("--dest", "-d", type=click.Path(), help="Output .tiff file location.") -@click.option( - "--axes", - "-a", - nargs=3, - default=("z", "y", "x"), - type=str, - help="Metadata axis names. Order matters. \n Example: -a z y x", -) -@click.option( - "--translation", - "-t", - nargs=3, - default=(0.0, 0.0, 0.0), - type=float, - help="Metadata translation(offset) value. Order matters. \n Example: -t 1.0 2.0 3.0", -) -@click.option( - "--scale", - "-s", - nargs=3, - default=(1.0, 1.0, 1.0), - type=float, - help="Metadata scale value. Order matters. \n Example: -s 1.0 2.0 3.0", -) -@click.option( - "--units", - "-u", - nargs=3, - default=("nm", "nm", "nm"), - type=str, - help="Metadata unit names. Order matters. \n Example: -t nanometer nanometer nanometer", -) -def tif_to_zarr(src, dest, axes, translation, scale, units): - """Accept input parameters from click python cli and convert input tiff to ome-ngff zarr. - - Args: - src (str): path to source tiff file - dest (str): path to the output zarr file. - axes (List[str]): list of axes to store in metadata. Order matters. - translation (List[float]): list of coordinates where the top left corner of the output zarr array should be located when displayed in neuroglancer. Order matters - scale (List[float]): physical size of the voxel (in units). Order matters. - units (List[str]): physical dimension units that define in which units the scale attribute is measured. Order matters. - """ - # load tiff data - c = conv.Conversion(src, dest, axes, translation, scale, units) - tiff_data = c.read_tiff() - # transpose tiff array - # tiff_data_mod = [np.transpose(tiff_data[0]), tiff_data[1]] - # flip values along y axis - tiff_data_mod = [tiff_data[0], tiff_data[1]] - # tiff_data_mod = [np.flip(tiff_data[0], axis=1), tiff_data[1]] - print(tiff_data_mod[0].shape) - # store tiff data in a .zarr file - chunks = (64, 64, 64) - c.dask_to_zarray(tiff_data_mod, chunks) - - -if __name__ == "__main__": - tif_to_zarr() diff --git a/src/tiff_stack.py b/src/tiff_stack.py index 88d126b..a77a64a 100644 --- a/src/tiff_stack.py +++ b/src/tiff_stack.py @@ -7,28 +7,37 @@ import dask.array as da from natsort import natsorted from glob import glob +from tiff_volume import TiffVolume -class TiffStack(): +class TiffStack(TiffVolume): def __init__(self, - src_path: str): + src_path: str, + axes : list[str], + scale : list[float], + translation : list[float], + units : list[str]): """Construct all the necessary attributes for the proper conversion of tiff to OME-NGFF Zarr. Args: input_filepath (str): path to source tiff file. """ - self.input_filepath = src_path - - + self.src_path = src_path self.stack_list = natsorted(glob(os.path.join(src_path, '*.tif*'))) probe_image_store = imread(os.path.join(src_path, self.stack_list[0]), aszarr=True) probe_image_arr = da.from_zarr(probe_image_store) self.dtype = probe_image_arr.dtype self.shape = [len(self.stack_list)] + list(probe_image_arr.shape) - - - + + #metadata + self.zarr_metadata = { + "axes": axes, + "translation": translation, + "scale": scale, + "units": units, + } + def write_tile_slab_to_zarr(self, chunk_num : int, zarray : zarr.Array, @@ -52,16 +61,16 @@ def write_tile_slab_to_zarr(self, print(f'Tiff tile with index {slab_index} is not present in tiff stack.') np_slab[slab_index - chunk_num, :, :] = image_tile - # write a tiff stack slab into zarr array + # write a tiff stack slab into a zarr array zarray[chunk_num : chunk_num+ zarray.chunks[0], :, :] = np_slab #parallel writing of tiff stack into zarr array def write_to_zarr(self, - zarray : zarr.Group, - client : Client - ): + zarray : zarr.Array, + client : Client + ): chunks_list = np.arange(0, zarray.shape[0], zarray.chunks[0]) print(chunks_list) diff --git a/src/tiff_stack_to_zarr.py b/src/tiff_stack_to_zarr.py deleted file mode 100644 index 3688647..0000000 --- a/src/tiff_stack_to_zarr.py +++ /dev/null @@ -1,91 +0,0 @@ -from tifffile import imread, imwrite -import zarr -import multiprocessing as mp -import numpy as np -from numcodecs import Zstd -from writes import write_tiles_strobbing -import os -from natsort import natsorted -import click -import sys -from dask_jobqueue import LSFCluster -from dask.distributed import Client, LocalCluster -import multiprocessing as mp -import time -from glob import glob -import dask.array as da - - -@click.command() -@click.option('--src','-s',type=click.Path(exists = True),help='Input tiff file location, or tiff stack directory path.') -@click.option('--dest','-s',type=click.STRING,help='Output .zarr file path.') -@click.option('--num_workers','-w',default=100,type=click.INT,help = "Number of dask workers") -@click.option('--cluster', '-c', default='' ,type=click.STRING, help="Which instance of dask client to use. Local client - 'local', cluster 'lsf'") -@click.option('--zarr_chunks', '-zc', nargs=3, default=(64, 128, 128), type=click.INT, help='Chunk size for (z, y, x) axis order. z-axis is normal to the tiff stack plane. Default (64, 128, 128)') -def cli(src, dest, num_workers, cluster, zarr_chunks): - - # create a dask client to submit tasks - if cluster == '': - print('Did not specify which instance of the dask client to use!') - sys.exit(0) - elif cluster == 'lsf': - num_cores = 1 - cluster = LSFCluster( - cores=num_cores, - processes=num_cores, - memory=f"{15 * num_cores}GB", - ncpus=num_cores, - mem=15 * num_cores, - walltime="48:00", - local_directory = "/scratch/$USER/" - ) - - elif cluster == 'local': - cluster = LocalCluster() - - client = Client(cluster) - with open(os.path.join(os.getcwd(), "dask_dashboard_link" + ".txt"), "w") as text_file: - text_file.write(str(client.dashboard_link)) - print(client.dashboard_link) - - if os.path.isdir(src): - tiff_type = 'stack' - elif src.endswith('.tif') or src.endswith('.tiff'): - tiff_type = 'volume' - - if tiff_type == 'stack': - - src_volume = natsorted(glob(os.path.join(src, '*.tif*'))) - probe_image_store = imread(os.path.join(src, src_volume[0]), aszarr=True) - probe_image_arr = da.from_zarr(probe_image_store) - - tiff_3d_shape = [len(src_volume)] + list(probe_image_arr.shape) - tiff_3d_dtype = probe_image_arr.dtype - - elif tiff_type == 'volume': - src_volume = src - tiff_3d_store = imread(os.path.join(src), aszarr=True) - tiff_volume = da.from_zarr(tiff_3d_store) - print(type(tiff_volume)) - - tiff_3d_shape = tiff_volume.shape - tiff_3d_dtype = tiff_volume.dtype - - z_store = zarr.NestedDirectoryStore(dest) - z_root = zarr.open(store=z_store, mode = 'a') - z_arr = z_root.require_dataset(name = 's0', - shape = tiff_3d_shape, - dtype = tiff_3d_dtype, - chunks = zarr_chunks, - compressor = Zstd(level=6)) - - - start_time = time.time() - - client.cluster.scale(num_workers) - write_tiles_strobbing(z_arr, src_volume, client) - client.cluster.scale(0) - - print(time.time() - start_time) -if __name__ == '__main__': - cli() diff --git a/src/tiff_to_zarr.py b/src/tiff_to_zarr.py index 02478b2..2d2ac81 100644 --- a/src/tiff_to_zarr.py +++ b/src/tiff_to_zarr.py @@ -44,7 +44,7 @@ "--units", "-u", nargs=3, - default=("nm", "nm", "nm"), + default=("nanometer", "nanometer", "nanometer"), type=str, help="Metadata unit names. Order matters. \n Example: -t nanometer nanometer nanometer", ) @@ -75,10 +75,12 @@ def cli(src, dest, num_workers, cluster, zarr_chunks, axes, translation, scale, print(client.dashboard_link) if os.path.isdir(src): - tiff_volume = TiffStack(src) + tiff_volume = TiffStack(src, axes, scale, translation, units) elif src.endswith('.tif') or src.endswith('.tiff'): - tiff_volume = TiffVolume(src) + tiff_volume = TiffVolume(src, axes, scale, translation, units) + print(tiff_volume.shape) + z_store = zarr.NestedDirectoryStore(dest) z_root = zarr.open(store=z_store, mode = 'a') z_arr = z_root.require_dataset(name = 's0', @@ -93,5 +95,7 @@ def cli(src, dest, num_workers, cluster, zarr_chunks, axes, translation, scale, tiff_volume.write_to_zarr(z_arr, client) client.cluster.scale(0) print(time.time() - start_time) + #populate zarr metadata + tiff_volume.populate_zarr_attrs(z_root) if __name__ == '__main__': cli() diff --git a/src/tiff_volume.py b/src/tiff_volume.py index 1b397ed..50aef55 100644 --- a/src/tiff_volume.py +++ b/src/tiff_volume.py @@ -10,7 +10,11 @@ class TiffVolume(): def __init__(self, - src_path: str): + src_path: str, + axes : list[str], + scale : list[float], + translation : list[float], + units : list[str]): """Construct all the necessary attributes for the proper conversion of tiff to OME-NGFF Zarr. Args: @@ -24,7 +28,16 @@ def __init__(self, self.shape = self.zarr_arr.shape self.dtype = self.zarr_arr.dtype - + + #metadata + self.zarr_metadata = { + "axes": axes, + "translation": translation, + "scale": scale, + "units": units, + } + + # multiprocess writing tiff stack into zarr array def write_to_zarr(self, zarray : zarr.Group, @@ -44,6 +57,59 @@ def write_to_zarr(self, print(f'Completed {len(chunks_list)} tasks in {time.time() - start}s') return 0 + + def populate_zarr_attrs(self, root): + """Add selected tiff metadata to zarr attributes file (.zattrs). + + Args: + root (zarr.Group): root group of the output zarr array + zarr_metadata (): combined zarr metadata from input translation, scale, axes names and units + data_address (str): path to array + """ + # json template for a multiscale structure + multscale_dict = { + "multiscales": [ + { + "axes": [], + "coordinateTransformations": [ + {"scale": [1.0, 1.0, 1.0], "type": "scale"} + ], + "datasets": [ + { + "coordinateTransformations": [ + {"scale": [], "type": "scale"}, + {"translation": [], "type": "translation"}, + ], + "path": "unknown", + } + ], + "name": ("/" if root.path=="" else root.path), + "version": "0.4", + } + ] + } + + # write metadata info into a multiscale scheme + for axis, scale, offset, unit in zip( + self.zarr_metadata["axes"], + self.zarr_metadata["scale"], + self.zarr_metadata["translation"], + self.zarr_metadata["units"], + ): + multscale_dict["multiscales"][0]["axes"].append( + {"name": axis, "type": "space", "unit": unit} + ) + multscale_dict["multiscales"][0]["datasets"][0][ + "coordinateTransformations" + ][0]["scale"].append(scale) + multscale_dict["multiscales"][0]["datasets"][0][ + "coordinateTransformations" + ][1]["translation"].append(offset) + multscale_dict["multiscales"][0]["datasets"][0]["path"] = list(root.array_keys())[0] + + # add multiscale template to .attrs + root.attrs["multiscales"] = multscale_dict["multiscales"] + def write_volume_slab_to_zarr( chunk_num : int, @@ -64,4 +130,5 @@ def write_volume_slab_to_zarr( np_slab[0 : zarray.chunks[0], :, :] = tiff_slab # write a tiff stack slab into zarr array - zarray[chunk_num : chunk_num+ zarray.chunks[0], :, :] = np_slab \ No newline at end of file + zarray[chunk_num : chunk_num+ zarray.chunks[0], :, :] = np_slab + diff --git a/src/writes.py b/src/writes.py deleted file mode 100644 index 03bb4af..0000000 --- a/src/writes.py +++ /dev/null @@ -1,62 +0,0 @@ -from tifffile import imread -import numpy as np -import zarr -import os -from dask.distributed import Client, wait -import time -import dask.array as da - - -def write_tileslab_to_zarr( - chunk_num : int, - zarray : zarr.Array, - src_volume : list | str): - - # check if the slab is at the array boundary or not - if chunk_num + zarray.chunks[0] > zarray.shape[0]: - slab_thickness = zarray.shape[0] - chunk_num - else: - slab_thickness = zarray.chunks[0] - - slab_shape = [slab_thickness] + list(zarray.shape[-2:]) - np_slab = np.empty(slab_shape, zarray.dtype) - - if isinstance(src_volume, list): # for a tiff stack - # combine tiles into a slab with thickness equal to the chunk size in z direction - for slab_index in np.arange(chunk_num, chunk_num+slab_thickness, 1): - try: - print(slab_index) - image_tile = imread(src_volume[slab_index]) - except: - print(f'Tiff tile with index {slab_index} is not present in tiff stack.') - np_slab[slab_index - chunk_num, :, :] = image_tile - - elif isinstance(src_volume, str): # for a 3D tiff file - print(np_slab.shape) - tiff_slab = imread(src_volume, key=range(chunk_num, chunk_num + slab_thickness, 1)) - print(tiff_slab.shape) - np_slab[0 : zarray.chunks[0], :, :] = tiff_slab - - # write a tiff stack slab into zarr array - zarray[chunk_num : chunk_num+ zarray.chunks[0], :, :] = np_slab - - - -# multiprocess writing tiff stack into zarr array -def write_tiles_strobbing(zarray : zarr.Group, - src_volume : list | str, - client : Client - ): - chunks_list = np.arange(0, zarray.shape[0], zarray.chunks[0]) - print(chunks_list) - - start = time.time() - fut = client.map(lambda v: write_tileslab_to_zarr(v, zarray, src_volume), chunks_list) - print(f'Submitted {len(chunks_list)} tasks to the scheduler in {time.time()- start}s') - - # wait for all the futures to complete - result = wait(fut) - print(f'Completed {len(chunks_list)} tasks in {time.time() - start}s') - - return 0 -