Add pad if needed3d (#2196)

* Empty-Commit * Added PadIfNeeded * Fix * Fixes in some tests * Tests pass * Added PadIfNeeded3D and ToTensor3D * Sourcery fixes * Sourcery fixes --------- Co-authored-by: Vladimir Iglovikov <[email protected]>
albumentations-team · Dec 13, 2024 · 6a0ad9b · 6a0ad9b
1 parent 852dfef
commit 6a0ad9b
Show file tree

Hide file tree

Showing 28 changed files with 1,103 additions and 177 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -40,7 +40,7 @@ repos:
     - id: check-docstrings
       name: Check Docstrings for '---' sequences
       entry: python tools/check_docstrings.py
-      language: system
+      language: python
       types: [python]
   - repo: local
     hooks:

diff --git a/README.md b/README.md
@@ -297,6 +297,14 @@ Spatial-level transforms will simultaneously change both an input image as well
 | [VerticalFlip](https://explore.albumentations.ai/transform/VerticalFlip)                         | ✓     | ✓    | ✓      | ✓         |
 | [XYMasking](https://explore.albumentations.ai/transform/XYMasking)                               | ✓     | ✓    | ✓      | ✓         |
 
+### 3D transforms
+
+3D transforms operate on volumetric data and can modify both the input volume and associated 3D mask.
+
+| Transform                                                                  | Image | Mask |
+| -------------------------------------------------------------------------- | :---: | :--: |
+| [PadIfNeeded3D](https://explore.albumentations.ai/transform/PadIfNeeded3D) | ✓     | ✓    |
+
 ## A few more examples of **augmentations**
 
 ### Semantic segmentation on the Inria dataset

diff --git a/albumentations/augmentations/__init__.py b/albumentations/augmentations/__init__.py
@@ -21,4 +21,5 @@
 from .text.functional import *
 from .text.transforms import *
 from .transforms import *
+from .transforms3d.transforms import *
 from .utils import *
diff --git a/albumentations/augmentations/dropout/coarse_dropout.py b/albumentations/augmentations/dropout/coarse_dropout.py
@@ -40,7 +40,7 @@ class CoarseDropout(BaseDropout):
             - 'inpaint_telea': uses OpenCV Telea inpainting method
             - 'inpaint_ns': uses OpenCV Navier-Stokes inpainting method
             Default: 0
-        mask_fill_value (ColorType | None): Fill value for dropout regions in the mask.
+        fill_mask (ColorType | None): Fill value for dropout regions in the mask.
             If None, mask regions corresponding to image dropouts are unchanged. Default: None
         p (float): Probability of applying the transform. Default: 0.5
 

diff --git a/albumentations/augmentations/geometric/functional.py b/albumentations/augmentations/geometric/functional.py
@@ -2888,7 +2888,7 @@ def bboxes_piecewise_affine(
     return bboxes
 
 
-def _get_dimension_padding(
+def get_dimension_padding(
     current_size: int,
     min_size: int | None,
     divisor: int | None,
@@ -2940,12 +2940,12 @@ def get_padding_params(
     """
     rows, cols = image_shape[:2]
 
-    h_pad_top, h_pad_bottom = _get_dimension_padding(
+    h_pad_top, h_pad_bottom = get_dimension_padding(
         rows,
         min_height,
         pad_height_divisor,
     )
-    w_pad_left, w_pad_right = _get_dimension_padding(cols, min_width, pad_width_divisor)
+    w_pad_left, w_pad_right = get_dimension_padding(cols, min_width, pad_width_divisor)
 
     return h_pad_top, h_pad_bottom, w_pad_left, w_pad_right
 

diff --git a/albumentations/augmentations/transforms3d/__init__.py b/albumentations/augmentations/transforms3d/__init__.py
@@ -0,0 +1,2 @@
+from .functional import *
+from .transforms import *
diff --git a/albumentations/augmentations/transforms3d/functional.py b/albumentations/augmentations/transforms3d/functional.py
@@ -0,0 +1,86 @@
+import random
+from typing import Literal
+
+import numpy as np
+
+from albumentations.core.types import NUM_VOLUME_DIMENSIONS, ColorType
+
+
+def adjust_padding_by_position3d(
+    paddings: list[tuple[int, int]],  # [(front, back), (top, bottom), (left, right)]
+    position: Literal["center", "random"],
+    py_random: random.Random,
+) -> tuple[int, int, int, int, int, int]:
+    """Adjust padding values based on desired position for 3D data.
+
+    Args:
+        paddings: List of tuples containing padding pairs for each dimension [(d_pad), (h_pad), (w_pad)]
+        position: Position of the image after padding. Either 'center' or 'random'
+        py_random: Random number generator
+
+    Returns:
+        tuple[int, int, int, int, int, int]: Final padding values (d_front, d_back, h_top, h_bottom, w_left, w_right)
+    """
+    if position == "center":
+        return (
+            paddings[0][0],  # d_front
+            paddings[0][1],  # d_back
+            paddings[1][0],  # h_top
+            paddings[1][1],  # h_bottom
+            paddings[2][0],  # w_left
+            paddings[2][1],  # w_right
+        )
+
+    # For random position, redistribute padding for each dimension
+    d_pad = sum(paddings[0])
+    h_pad = sum(paddings[1])
+    w_pad = sum(paddings[2])
+
+    return (
+        py_random.randint(0, d_pad),  # d_front
+        d_pad - py_random.randint(0, d_pad),  # d_back
+        py_random.randint(0, h_pad),  # h_top
+        h_pad - py_random.randint(0, h_pad),  # h_bottom
+        py_random.randint(0, w_pad),  # w_left
+        w_pad - py_random.randint(0, w_pad),  # w_right
+    )
+
+
+def pad_3d_with_params(
+    img: np.ndarray,
+    padding: tuple[int, int, int, int, int, int],  # (d_front, d_back, h_top, h_bottom, w_left, w_right)
+    value: ColorType,
+) -> np.ndarray:
+    """Pad 3D image with given parameters.
+
+    Args:
+        img: Input image with shape (depth, height, width) or (depth, height, width, channels)
+        padding: Padding values (d_front, d_back, h_top, h_bottom, w_left, w_right)
+        value: Padding value
+
+    Returns:
+        Padded image with same number of dimensions as input
+    """
+    d_front, d_back, h_top, h_bottom, w_left, w_right = padding
+
+    # Skip if no padding is needed
+    if d_front == d_back == h_top == h_bottom == w_left == w_right == 0:
+        return img
+
+    # Handle both 3D and 4D arrays
+    pad_width = [
+        (d_front, d_back),  # depth padding
+        (h_top, h_bottom),  # height padding
+        (w_left, w_right),  # width padding
+    ]
+
+    # Add channel padding if 4D array
+    if img.ndim == NUM_VOLUME_DIMENSIONS:
+        pad_width.append((0, 0))  # no padding for channels
+
+    return np.pad(
+        img,
+        pad_width=pad_width,
+        mode="constant",
+        constant_values=value,
+    )
diff --git a/albumentations/augmentations/transforms3d/transforms.py b/albumentations/augmentations/transforms3d/transforms.py
@@ -0,0 +1,163 @@
+from __future__ import annotations
+
+from typing import Annotated, Any, Literal, cast
+
+import numpy as np
+from pydantic import AfterValidator, model_validator
+from typing_extensions import Self
+
+from albumentations.augmentations.geometric import functional as fgeometric
+from albumentations.augmentations.transforms3d import functional as f3d
+from albumentations.core.pydantic import check_range_bounds_3d
+from albumentations.core.transforms_interface import Transform3D
+from albumentations.core.types import ColorType, Targets
+
+__all__ = ["PadIfNeeded3D"]
+
+
+class PadIfNeeded3D(Transform3D):
+    """Pads the sides of a 3D volume if its dimensions are less than specified minimum dimensions.
+    If the pad_divisor_zyx is specified, the function additionally ensures that the volume
+    dimensions are divisible by these values.
+
+    Args:
+        min_zyx (tuple[int, int, int] | None): Minimum desired size as (depth, height, width).
+            Ensures volume dimensions are at least these values.
+            If not specified, pad_divisor_zyx must be provided.
+        pad_divisor_zyx (tuple[int, int, int] | None): If set, pads each dimension to make it
+            divisible by corresponding value in format (depth_div, height_div, width_div).
+            If not specified, min_zyx must be provided.
+        position (Literal["center", "random"]): Position where the volume is to be placed after padding.
+            Default is 'center'.
+        fill (ColorType): Value to fill the border voxels for images. Default: 0
+        fill_mask (ColorType): Value to fill the border voxels for masks. Default: 0
+        p (float): Probability of applying the transform. Default: 1.0
+
+    Targets:
+        images, masks
+
+    Image types:
+        uint8, float32
+
+    Note:
+        - Either min_zyx or pad_divisor_zyx must be set, but not both for each dimension.
+        - The transform will maintain consistency across all targets (image and mask).
+        - Input volumes can be either 3D arrays (depth, height, width) or
+          4D arrays (depth, height, width, channels).
+        - Padding is always applied using constant values specified by fill/fill_mask.
+
+    Example:
+        >>> import albumentations as A
+        >>> transform = A.Compose([
+        ...     A.PadIfNeeded3D(
+        ...         min_zyx=(64, 128, 128),  # Minimum size for each dimension
+        ...         fill=0,  # Fill value for images
+        ...         fill_mask=0,  # Fill value for masks
+        ...     ),
+        ... ])
+        >>> # For divisible dimensions
+        >>> transform = A.Compose([
+        ...     A.PadIfNeeded3D(
+        ...         pad_divisor_zyx=(16, 16, 16),  # Make dimensions divisible by 16
+        ...         fill=0,
+        ...     ),
+        ... ])
+        >>> transformed = transform(image=volume, masks=masks)
+        >>> padded_volume = transformed['images']
+        >>> padded_masks = transformed['masks']
+    """
+
+    _targets = (Targets.IMAGE, Targets.MASK)
+
+    class InitSchema(Transform3D.InitSchema):
+        min_zyx: Annotated[tuple[int, int, int] | None, AfterValidator(check_range_bounds_3d(0, None))]
+        pad_divisor_zyx: Annotated[tuple[int, int, int] | None, AfterValidator(check_range_bounds_3d(1, None))]
+        position: Literal["center", "random"]
+        fill: ColorType
+        fill_mask: ColorType
+
+        @model_validator(mode="after")
+        def validate_params(self) -> Self:
+            if self.min_zyx is None and self.pad_divisor_zyx is None:
+                msg = "At least one of min_zyx or pad_divisor_zyx must be set"
+                raise ValueError(msg)
+            return self
+
+    def __init__(
+        self,
+        min_zyx: tuple[int, int, int] | None = None,
+        pad_divisor_zyx: tuple[int, int, int] | None = None,
+        position: Literal["center", "random"] = "center",
+        fill: ColorType = 0,
+        fill_mask: ColorType = 0,
+        p: float = 1.0,
+        always_apply: bool | None = None,
+    ):
+        super().__init__(p=p, always_apply=always_apply)
+        self.min_zyx = min_zyx
+        self.pad_divisor_zyx = pad_divisor_zyx
+        self.position = position
+        self.fill = fill
+        self.fill_mask = fill_mask
+
+    def get_params_dependent_on_data(
+        self,
+        params: dict[str, Any],
+        data: dict[str, Any],
+    ) -> dict[str, Any]:
+        depth, height, width = data["images"].shape[:3]
+        sizes = (depth, height, width)
+
+        paddings = [
+            fgeometric.get_dimension_padding(
+                current_size=size,
+                min_size=self.min_zyx[i] if self.min_zyx else None,
+                divisor=self.pad_divisor_zyx[i] if self.pad_divisor_zyx else None,
+            )
+            for i, size in enumerate(sizes)
+        ]
+
+        padding = f3d.adjust_padding_by_position3d(
+            paddings=paddings,
+            position=self.position,
+            py_random=self.py_random,
+        )
+
+        return {"padding": padding}  # (d_front, d_back, h_top, h_bottom, w_left, w_right)
+
+    def apply_to_images(
+        self,
+        images: np.ndarray,
+        padding: tuple[int, int, int, int, int, int],
+        **params: Any,
+    ) -> np.ndarray:
+        if padding == (0, 0, 0, 0, 0, 0):
+            return images
+        return f3d.pad_3d_with_params(
+            img=images,
+            padding=padding,  # (d_front, d_back, h_top, h_bottom, w_left, w_right)
+            value=cast(ColorType, self.fill),
+        )
+
+    def apply_to_masks(
+        self,
+        masks: np.ndarray,
+        padding: tuple[int, int, int, int, int, int],
+        **params: Any,
+    ) -> np.ndarray:
+        if padding == (0, 0, 0, 0, 0, 0):
+            return masks
+        return f3d.pad_3d_with_params(
+            img=masks,
+            padding=padding,  # (d_front, d_back, h_top, h_bottom, w_left, w_right)
+            value=cast(ColorType, self.fill_mask),
+        )
+
+    def get_transform_init_args_names(self) -> tuple[str, ...]:
+        return (
+            "min_zyx",
+            "pad_divisor_zyx",
+            "position",
+            "fill",
+            "fill_mask",
+        )
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from .functional import *
		from .transforms import *