Skip to content

Commit

Permalink
Add pad if needed3d (#2196)
Browse files Browse the repository at this point in the history
* Empty-Commit

* Added PadIfNeeded

* Fix

* Fixes in some tests

* Tests pass

* Added PadIfNeeded3D and ToTensor3D

* Sourcery fixes

* Sourcery fixes

---------

Co-authored-by: Vladimir Iglovikov <[email protected]>
  • Loading branch information
ternaus and Vladimir Iglovikov authored Dec 13, 2024
1 parent 852dfef commit 6a0ad9b
Show file tree
Hide file tree
Showing 28 changed files with 1,103 additions and 177 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ repos:
- id: check-docstrings
name: Check Docstrings for '---' sequences
entry: python tools/check_docstrings.py
language: system
language: python
types: [python]
- repo: local
hooks:
Expand Down
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,14 @@ Spatial-level transforms will simultaneously change both an input image as well
| [VerticalFlip](https://explore.albumentations.ai/transform/VerticalFlip) |||||
| [XYMasking](https://explore.albumentations.ai/transform/XYMasking) |||||

### 3D transforms

3D transforms operate on volumetric data and can modify both the input volume and associated 3D mask.

| Transform | Image | Mask |
| -------------------------------------------------------------------------- | :---: | :--: |
| [PadIfNeeded3D](https://explore.albumentations.ai/transform/PadIfNeeded3D) |||

## A few more examples of **augmentations**

### Semantic segmentation on the Inria dataset
Expand Down
1 change: 1 addition & 0 deletions albumentations/augmentations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,5 @@
from .text.functional import *
from .text.transforms import *
from .transforms import *
from .transforms3d.transforms import *
from .utils import *
2 changes: 1 addition & 1 deletion albumentations/augmentations/dropout/coarse_dropout.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class CoarseDropout(BaseDropout):
- 'inpaint_telea': uses OpenCV Telea inpainting method
- 'inpaint_ns': uses OpenCV Navier-Stokes inpainting method
Default: 0
mask_fill_value (ColorType | None): Fill value for dropout regions in the mask.
fill_mask (ColorType | None): Fill value for dropout regions in the mask.
If None, mask regions corresponding to image dropouts are unchanged. Default: None
p (float): Probability of applying the transform. Default: 0.5
Expand Down
6 changes: 3 additions & 3 deletions albumentations/augmentations/geometric/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -2888,7 +2888,7 @@ def bboxes_piecewise_affine(
return bboxes


def _get_dimension_padding(
def get_dimension_padding(
current_size: int,
min_size: int | None,
divisor: int | None,
Expand Down Expand Up @@ -2940,12 +2940,12 @@ def get_padding_params(
"""
rows, cols = image_shape[:2]

h_pad_top, h_pad_bottom = _get_dimension_padding(
h_pad_top, h_pad_bottom = get_dimension_padding(
rows,
min_height,
pad_height_divisor,
)
w_pad_left, w_pad_right = _get_dimension_padding(cols, min_width, pad_width_divisor)
w_pad_left, w_pad_right = get_dimension_padding(cols, min_width, pad_width_divisor)

return h_pad_top, h_pad_bottom, w_pad_left, w_pad_right

Expand Down
2 changes: 2 additions & 0 deletions albumentations/augmentations/transforms3d/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .functional import *
from .transforms import *
86 changes: 86 additions & 0 deletions albumentations/augmentations/transforms3d/functional.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import random
from typing import Literal

import numpy as np

from albumentations.core.types import NUM_VOLUME_DIMENSIONS, ColorType


def adjust_padding_by_position3d(
paddings: list[tuple[int, int]], # [(front, back), (top, bottom), (left, right)]
position: Literal["center", "random"],
py_random: random.Random,
) -> tuple[int, int, int, int, int, int]:
"""Adjust padding values based on desired position for 3D data.
Args:
paddings: List of tuples containing padding pairs for each dimension [(d_pad), (h_pad), (w_pad)]
position: Position of the image after padding. Either 'center' or 'random'
py_random: Random number generator
Returns:
tuple[int, int, int, int, int, int]: Final padding values (d_front, d_back, h_top, h_bottom, w_left, w_right)
"""
if position == "center":
return (
paddings[0][0], # d_front
paddings[0][1], # d_back
paddings[1][0], # h_top
paddings[1][1], # h_bottom
paddings[2][0], # w_left
paddings[2][1], # w_right
)

# For random position, redistribute padding for each dimension
d_pad = sum(paddings[0])
h_pad = sum(paddings[1])
w_pad = sum(paddings[2])

return (
py_random.randint(0, d_pad), # d_front
d_pad - py_random.randint(0, d_pad), # d_back
py_random.randint(0, h_pad), # h_top
h_pad - py_random.randint(0, h_pad), # h_bottom
py_random.randint(0, w_pad), # w_left
w_pad - py_random.randint(0, w_pad), # w_right
)


def pad_3d_with_params(
img: np.ndarray,
padding: tuple[int, int, int, int, int, int], # (d_front, d_back, h_top, h_bottom, w_left, w_right)
value: ColorType,
) -> np.ndarray:
"""Pad 3D image with given parameters.
Args:
img: Input image with shape (depth, height, width) or (depth, height, width, channels)
padding: Padding values (d_front, d_back, h_top, h_bottom, w_left, w_right)
value: Padding value
Returns:
Padded image with same number of dimensions as input
"""
d_front, d_back, h_top, h_bottom, w_left, w_right = padding

# Skip if no padding is needed
if d_front == d_back == h_top == h_bottom == w_left == w_right == 0:
return img

# Handle both 3D and 4D arrays
pad_width = [
(d_front, d_back), # depth padding
(h_top, h_bottom), # height padding
(w_left, w_right), # width padding
]

# Add channel padding if 4D array
if img.ndim == NUM_VOLUME_DIMENSIONS:
pad_width.append((0, 0)) # no padding for channels

return np.pad(
img,
pad_width=pad_width,
mode="constant",
constant_values=value,
)
163 changes: 163 additions & 0 deletions albumentations/augmentations/transforms3d/transforms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
from __future__ import annotations

from typing import Annotated, Any, Literal, cast

import numpy as np
from pydantic import AfterValidator, model_validator
from typing_extensions import Self

from albumentations.augmentations.geometric import functional as fgeometric
from albumentations.augmentations.transforms3d import functional as f3d
from albumentations.core.pydantic import check_range_bounds_3d
from albumentations.core.transforms_interface import Transform3D
from albumentations.core.types import ColorType, Targets

__all__ = ["PadIfNeeded3D"]


class PadIfNeeded3D(Transform3D):
"""Pads the sides of a 3D volume if its dimensions are less than specified minimum dimensions.
If the pad_divisor_zyx is specified, the function additionally ensures that the volume
dimensions are divisible by these values.
Args:
min_zyx (tuple[int, int, int] | None): Minimum desired size as (depth, height, width).
Ensures volume dimensions are at least these values.
If not specified, pad_divisor_zyx must be provided.
pad_divisor_zyx (tuple[int, int, int] | None): If set, pads each dimension to make it
divisible by corresponding value in format (depth_div, height_div, width_div).
If not specified, min_zyx must be provided.
position (Literal["center", "random"]): Position where the volume is to be placed after padding.
Default is 'center'.
fill (ColorType): Value to fill the border voxels for images. Default: 0
fill_mask (ColorType): Value to fill the border voxels for masks. Default: 0
p (float): Probability of applying the transform. Default: 1.0
Targets:
images, masks
Image types:
uint8, float32
Note:
- Either min_zyx or pad_divisor_zyx must be set, but not both for each dimension.
- The transform will maintain consistency across all targets (image and mask).
- Input volumes can be either 3D arrays (depth, height, width) or
4D arrays (depth, height, width, channels).
- Padding is always applied using constant values specified by fill/fill_mask.
Example:
>>> import albumentations as A
>>> transform = A.Compose([
... A.PadIfNeeded3D(
... min_zyx=(64, 128, 128), # Minimum size for each dimension
... fill=0, # Fill value for images
... fill_mask=0, # Fill value for masks
... ),
... ])
>>> # For divisible dimensions
>>> transform = A.Compose([
... A.PadIfNeeded3D(
... pad_divisor_zyx=(16, 16, 16), # Make dimensions divisible by 16
... fill=0,
... ),
... ])
>>> transformed = transform(image=volume, masks=masks)
>>> padded_volume = transformed['images']
>>> padded_masks = transformed['masks']
"""

_targets = (Targets.IMAGE, Targets.MASK)

class InitSchema(Transform3D.InitSchema):
min_zyx: Annotated[tuple[int, int, int] | None, AfterValidator(check_range_bounds_3d(0, None))]
pad_divisor_zyx: Annotated[tuple[int, int, int] | None, AfterValidator(check_range_bounds_3d(1, None))]
position: Literal["center", "random"]
fill: ColorType
fill_mask: ColorType

@model_validator(mode="after")
def validate_params(self) -> Self:
if self.min_zyx is None and self.pad_divisor_zyx is None:
msg = "At least one of min_zyx or pad_divisor_zyx must be set"
raise ValueError(msg)
return self

def __init__(
self,
min_zyx: tuple[int, int, int] | None = None,
pad_divisor_zyx: tuple[int, int, int] | None = None,
position: Literal["center", "random"] = "center",
fill: ColorType = 0,
fill_mask: ColorType = 0,
p: float = 1.0,
always_apply: bool | None = None,
):
super().__init__(p=p, always_apply=always_apply)
self.min_zyx = min_zyx
self.pad_divisor_zyx = pad_divisor_zyx
self.position = position
self.fill = fill
self.fill_mask = fill_mask

def get_params_dependent_on_data(
self,
params: dict[str, Any],
data: dict[str, Any],
) -> dict[str, Any]:
depth, height, width = data["images"].shape[:3]
sizes = (depth, height, width)

paddings = [
fgeometric.get_dimension_padding(
current_size=size,
min_size=self.min_zyx[i] if self.min_zyx else None,
divisor=self.pad_divisor_zyx[i] if self.pad_divisor_zyx else None,
)
for i, size in enumerate(sizes)
]

padding = f3d.adjust_padding_by_position3d(
paddings=paddings,
position=self.position,
py_random=self.py_random,
)

return {"padding": padding} # (d_front, d_back, h_top, h_bottom, w_left, w_right)

def apply_to_images(
self,
images: np.ndarray,
padding: tuple[int, int, int, int, int, int],
**params: Any,
) -> np.ndarray:
if padding == (0, 0, 0, 0, 0, 0):
return images
return f3d.pad_3d_with_params(
img=images,
padding=padding, # (d_front, d_back, h_top, h_bottom, w_left, w_right)
value=cast(ColorType, self.fill),
)

def apply_to_masks(
self,
masks: np.ndarray,
padding: tuple[int, int, int, int, int, int],
**params: Any,
) -> np.ndarray:
if padding == (0, 0, 0, 0, 0, 0):
return masks
return f3d.pad_3d_with_params(
img=masks,
padding=padding, # (d_front, d_back, h_top, h_bottom, w_left, w_right)
value=cast(ColorType, self.fill_mask),
)

def get_transform_init_args_names(self) -> tuple[str, ...]:
return (
"min_zyx",
"pad_divisor_zyx",
"position",
"fill",
"fill_mask",
)
Loading

0 comments on commit 6a0ad9b

Please sign in to comment.