From 4ddf6d9313777850a224da4cd83db1b12a12661c Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Sun, 8 Sep 2024 16:24:36 +0200 Subject: [PATCH 1/5] adjust for crc2 dataset --- src/spatialdata_io/readers/_utils/_utils.py | 4 +- src/spatialdata_io/readers/visium_hd.py | 52 +++++++++++++++------ 2 files changed, 39 insertions(+), 17 deletions(-) diff --git a/src/spatialdata_io/readers/_utils/_utils.py b/src/spatialdata_io/readers/_utils/_utils.py index 63f5bdf2..4623fced 100644 --- a/src/spatialdata_io/readers/_utils/_utils.py +++ b/src/spatialdata_io/readers/_utils/_utils.py @@ -18,8 +18,8 @@ NDArrayA = NDArray[Any] except (ImportError, TypeError): - NDArray = np.ndarray # type: ignore[misc] - NDArrayA = np.ndarray # type: ignore[misc] + NDArray = np.ndarray + NDArrayA = np.ndarray def _read_counts( diff --git a/src/spatialdata_io/readers/visium_hd.py b/src/spatialdata_io/readers/visium_hd.py index f147a33a..15216ad2 100644 --- a/src/spatialdata_io/readers/visium_hd.py +++ b/src/spatialdata_io/readers/visium_hd.py @@ -92,9 +92,7 @@ def visium_hd( if dataset_id is None: dataset_id = _infer_dataset_id(path) - filename_prefix = f"{dataset_id}_" - else: - filename_prefix = "" + filename_prefix = f"{dataset_id}_" def load_image(path: Path, suffix: str, scale_factors: list[int] | None = None) -> None: _load_image( @@ -266,15 +264,24 @@ def _get_bins(path: Path) -> list[str]: fullres_image_filenames = [ f for f in os.listdir(path_fullres) if os.path.isfile(os.path.join(path_fullres, f)) ] - if len(fullres_image_filenames) > 1: - warnings.warn( - f"Multiple files found in {path_fullres}, using the first one: {fullres_image_filenames[0]}. Please" - " specify the path to the full resolution image manually using the `fullres_image_file` argument.", - UserWarning, - stacklevel=2, - ) - fullres_image_filename = fullres_image_filenames[0] - fullres_image_file = path_fullres / fullres_image_filename + fullres_image_paths = [path_fullres / image_filename for image_filename in fullres_image_filenames] + elif list((path_fullres := (path / f"{filename_prefix}tissue_image")).parent.glob(f"{path_fullres.name}.*")): + fullres_image_paths = list(path_fullres.parent.glob(f"{path_fullres.name}.*")) + if len(fullres_image_paths) > 1: + warnings.warn( + f"Multiple files found in {path_fullres}, using the first one: {fullres_image_paths[0].stem}. Please" + " specify the path to the full resolution image manually using the `fullres_image_file` argument.", + UserWarning, + stacklevel=2, + ) + if len(fullres_image_paths) == 0: + warnings.warn( + "No full resolution image found. If incorrect, please specify the path in the " + "`fullres_image_file` parameter when calling the `visium_hd` reader function.", + UserWarning, + stacklevel=2, + ) + fullres_image_file = fullres_image_paths[0] if len(fullres_image_paths) > 0 else None if fullres_image_file is not None: load_image( @@ -284,8 +291,13 @@ def _get_bins(path: Path) -> list[str]: ) # hires image + hires_image_path = path / VisiumHDKeys.IMAGE_HIRES_FILE load_image( - path=path / VisiumHDKeys.IMAGE_HIRES_FILE, + path=( + hires_image_path + if hires_image_path.exists() + else path / f"{filename_prefix}spatial" / VisiumHDKeys.IMAGE_HIRES_FILE + ), suffix="_hires_image", ) set_transformation( @@ -295,8 +307,13 @@ def _get_bins(path: Path) -> list[str]: ) # lowres image + lowres_image_path = path / VisiumHDKeys.IMAGE_LOWRES_FILE load_image( - path=path / VisiumHDKeys.IMAGE_LOWRES_FILE, + path=( + lowres_image_path + if lowres_image_path.exists() + else path / f"{filename_prefix}spatial" / VisiumHDKeys.IMAGE_LOWRES_FILE + ), suffix="_lowres_image", ) set_transformation( @@ -306,9 +323,14 @@ def _get_bins(path: Path) -> list[str]: ) # cytassist image + cytassist_path = path / VisiumHDKeys.IMAGE_CYTASSIST if load_all_images: load_image( - path=path / VisiumHDKeys.IMAGE_CYTASSIST, + path=( + cytassist_path + if cytassist_path.exists() + else path / f"{filename_prefix}spatial" / VisiumHDKeys.IMAGE_CYTASSIST + ), suffix="_cytassist_image", ) image = images[dataset_id + "_cytassist_image"] From ebc339676c1f9febea6c8e875b27cc17751066b1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 8 Sep 2024 14:28:25 +0000 Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spatialdata_io/readers/_utils/_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/spatialdata_io/readers/_utils/_utils.py b/src/spatialdata_io/readers/_utils/_utils.py index 709008d0..4623fced 100644 --- a/src/spatialdata_io/readers/_utils/_utils.py +++ b/src/spatialdata_io/readers/_utils/_utils.py @@ -22,7 +22,6 @@ NDArrayA = np.ndarray - def _read_counts( path: str | Path, counts_file: str, From 1dc9d2781378f2647d62415525a59fc45c2a4722 Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Sun, 8 Sep 2024 17:28:53 +0200 Subject: [PATCH 3/5] remove spatial from keys --- src/spatialdata_io/_constants/_constants.py | 6 +++--- src/spatialdata_io/readers/visium_hd.py | 13 +++++-------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py index e4f77d5f..9ec43fa9 100644 --- a/src/spatialdata_io/_constants/_constants.py +++ b/src/spatialdata_io/_constants/_constants.py @@ -353,9 +353,9 @@ class VisiumHDKeys(ModeEnum): TISSUE_POSITIONS_FILE = "tissue_positions.parquet" # images - IMAGE_HIRES_FILE = "spatial/tissue_hires_image.png" - IMAGE_LOWRES_FILE = "spatial/tissue_lowres_image.png" - IMAGE_CYTASSIST = "spatial/cytassist_image.tiff" + IMAGE_HIRES_FILE = "tissue_hires_image.png" + IMAGE_LOWRES_FILE = "tissue_lowres_image.png" + IMAGE_CYTASSIST = "cytassist_image.tiff" # scalefactors SCALEFACTORS_FILE = "scalefactors_json.json" diff --git a/src/spatialdata_io/readers/visium_hd.py b/src/spatialdata_io/readers/visium_hd.py index 15216ad2..5a434cf8 100644 --- a/src/spatialdata_io/readers/visium_hd.py +++ b/src/spatialdata_io/readers/visium_hd.py @@ -86,6 +86,7 @@ def visium_hd( SpatialData object for the Visium HD data. """ path = Path(path) + all_files = [file for file in path.rglob("*") if file.is_file()] tables = {} shapes = {} images: dict[str, Any] = {} @@ -291,13 +292,9 @@ def _get_bins(path: Path) -> list[str]: ) # hires image - hires_image_path = path / VisiumHDKeys.IMAGE_HIRES_FILE + hires_image_path = [path for path in all_files if VisiumHDKeys.IMAGE_HIRES_FILE in str(path)][0] load_image( - path=( - hires_image_path - if hires_image_path.exists() - else path / f"{filename_prefix}spatial" / VisiumHDKeys.IMAGE_HIRES_FILE - ), + path=hires_image_path, suffix="_hires_image", ) set_transformation( @@ -307,7 +304,7 @@ def _get_bins(path: Path) -> list[str]: ) # lowres image - lowres_image_path = path / VisiumHDKeys.IMAGE_LOWRES_FILE + lowres_image_path = [path for path in all_files if VisiumHDKeys.IMAGE_LOWRES_FILE in str(path)][0] load_image( path=( lowres_image_path @@ -323,7 +320,7 @@ def _get_bins(path: Path) -> list[str]: ) # cytassist image - cytassist_path = path / VisiumHDKeys.IMAGE_CYTASSIST + cytassist_path = [path for path in all_files if VisiumHDKeys.IMAGE_CYTASSIST in str(path)][0] if load_all_images: load_image( path=( From 76408909ff17860b64bdb62f3d6d3e7b1a528894 Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Sun, 8 Sep 2024 19:05:53 +0200 Subject: [PATCH 4/5] raise error when image not found --- src/spatialdata_io/readers/visium_hd.py | 26 ++++++++++++------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/spatialdata_io/readers/visium_hd.py b/src/spatialdata_io/readers/visium_hd.py index 5a434cf8..6af8bc89 100644 --- a/src/spatialdata_io/readers/visium_hd.py +++ b/src/spatialdata_io/readers/visium_hd.py @@ -292,9 +292,11 @@ def _get_bins(path: Path) -> list[str]: ) # hires image - hires_image_path = [path for path in all_files if VisiumHDKeys.IMAGE_HIRES_FILE in str(path)][0] + hires_image_path = [path for path in all_files if VisiumHDKeys.IMAGE_HIRES_FILE in str(path)] + if len(hires_image_path) == 0: + raise OSError(f"No image path found containing the hires image: {VisiumHDKeys.IMAGE_HIRES_FILE}") load_image( - path=hires_image_path, + path=hires_image_path[0], suffix="_hires_image", ) set_transformation( @@ -304,13 +306,11 @@ def _get_bins(path: Path) -> list[str]: ) # lowres image - lowres_image_path = [path for path in all_files if VisiumHDKeys.IMAGE_LOWRES_FILE in str(path)][0] + lowres_image_path = [path for path in all_files if VisiumHDKeys.IMAGE_LOWRES_FILE in str(path)] + if len(lowres_image_path) == 0: + raise OSError(f"No image path found containing the lowres image: {VisiumHDKeys.IMAGE_LOWRES_FILE}") load_image( - path=( - lowres_image_path - if lowres_image_path.exists() - else path / f"{filename_prefix}spatial" / VisiumHDKeys.IMAGE_LOWRES_FILE - ), + path=lowres_image_path[0], suffix="_lowres_image", ) set_transformation( @@ -320,14 +320,12 @@ def _get_bins(path: Path) -> list[str]: ) # cytassist image - cytassist_path = [path for path in all_files if VisiumHDKeys.IMAGE_CYTASSIST in str(path)][0] + cytassist_path = [path for path in all_files if VisiumHDKeys.IMAGE_CYTASSIST in str(path)] + if len(cytassist_path) == 0: + raise OSError(f"No image path found containing the cytassist image: {VisiumHDKeys.IMAGE_CYTASSIST}") if load_all_images: load_image( - path=( - cytassist_path - if cytassist_path.exists() - else path / f"{filename_prefix}spatial" / VisiumHDKeys.IMAGE_CYTASSIST - ), + path=cytassist_path[0], suffix="_cytassist_image", ) image = images[dataset_id + "_cytassist_image"] From fa77f6a94fe7ef018781417ed0154742a1513f7e Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Mon, 9 Sep 2024 11:35:57 +0200 Subject: [PATCH 5/5] check shapes --- src/spatialdata_io/readers/visium_hd.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/spatialdata_io/readers/visium_hd.py b/src/spatialdata_io/readers/visium_hd.py index 6af8bc89..e79596bb 100644 --- a/src/spatialdata_io/readers/visium_hd.py +++ b/src/spatialdata_io/readers/visium_hd.py @@ -126,8 +126,12 @@ def _get_bins(path: Path) -> list[str]: ] ) - if VisiumHDKeys.BINNED_OUTPUTS in os.listdir(path): - path_bins = path / VisiumHDKeys.BINNED_OUTPUTS + all_path_bins = [path_bin for path_bin in all_files if VisiumHDKeys.BINNED_OUTPUTS in str(path_bin)] + if len(all_path_bins) != 0: + path_bins_parts = all_path_bins[ + -1 + ].parts # just choosing last one here as users might have tar file which would be first + path_bins = Path(*path_bins_parts[: path_bins_parts.index(VisiumHDKeys.BINNED_OUTPUTS) + 1]) else: path_bins = path all_bin_sizes = _get_bins(path_bins)