albumentations-team · ternaus · Nov 19, 2024 · Oct 2, 2024 · Nov 19, 2024 · Nov 19, 2024
diff --git a/albumentations/augmentations/geometric/functional.py b/albumentations/augmentations/geometric/functional.py
@@ -3064,3 +3064,67 @@
 
     # Combine nonlinear and affine transformations
     return kernel_matrix @ nonlinear_weights + affine_terms @ affine_weights
+
+
+def get_camera_matrix_distortion_maps(
+    image_shape: tuple[int, int],
+    cx: float,
+    cy: float,
+    k: float,
+) -> tuple[np.ndarray, np.ndarray]:
+    """Generate distortion maps using camera matrix model.
+
+    Args:
+        image_shape: Image shape
+        cx: x-coordinate of distortion center
+        cy: y-coordinate of distortion center
+        k: Distortion coefficient
+
+    Returns:
+        tuple of:
+        - map_x: Horizontal displacement map
+        - map_y: Vertical displacement map
+    """
+    height, width = image_shape[:2]
+    camera_matrix = np.array([[width, 0, cx], [0, height, cy], [0, 0, 1]], dtype=np.float32)
+    distortion = np.array([k, k, 0, 0, 0], dtype=np.float32)
+    return cv2.initUndistortRectifyMap(camera_matrix, distortion, None, None, (width, height), cv2.CV_32FC1)
+
+
+def get_fisheye_distortion_maps(
+    image_shape: tuple[int, int],
+    cx: float,
+    cy: float,
+    k: float,
+) -> tuple[np.ndarray, np.ndarray]:
+    """Generate distortion maps using fisheye model.
+
+    Args:
+        image_shape: Image shape
+        cx: x-coordinate of distortion center
+        cy: y-coordinate of distortion center
+        k: Distortion coefficient
+
+    Returns:
+        tuple of:
+        - map_x: Horizontal displacement map
+        - map_y: Vertical displacement map
+    """
+    height, width = image_shape[:2]
+    # Create coordinate grid
+    y, x = np.mgrid[:height, :width].astype(np.float32)
+    x = x - cx
+    y = y - cy
+
+    # Calculate polar coordinates
+    r = np.sqrt(x * x + y * y)
+    theta = np.arctan2(y, x)
+
+    # Apply fisheye distortion
+    r_dist = r * (1 + k * r * r)
+
+    # Convert back to cartesian coordinates
+    map_x = cx + r_dist * np.cos(theta)
+    map_y = cy + r_dist * np.sin(theta)
+
+    return map_x, map_y
diff --git a/albumentations/augmentations/geometric/transforms.py b/albumentations/augmentations/geometric/transforms.py
@@ -1402,30 +1402,49 @@
 class OpticalDistortion(BaseDistortion):
     """Apply optical distortion to images, masks, bounding boxes, and keypoints.
 
-    This transformation simulates lens distortion effects by warping the image using
-    a camera matrix and distortion coefficients. It's particularly useful for
-    augmenting data in computer vision tasks where camera lens effects are relevant.
+    Supports two distortion models:
+    1. Camera matrix model (original):
+       Uses OpenCV's camera calibration model with k1=k2=k distortion coefficients
+
+    2. Fisheye model:
+       Direct radial distortion: r_dist = r * (1 + gamma * r²)
 
     Args:
-        distort_limit (float or tuple of float): Range of distortion coefficient.
-            If distort_limit is a single float, the range will be (-distort_limit, distort_limit).
-            Default: (-0.05, 0.05).
-        shift_limit (float or tuple of float): Range of shifts for the image center.
-            If shift_limit is a single float, the range will be (-shift_limit, shift_limit).
-            Default: (-0.05, 0.05).
+        distort_limit (float | tuple[float, float]): Range of distortion coefficient.
+            For camera model: recommended range (-0.05, 0.05)
+            For fisheye model: recommended range (-0.3, 0.3)
+            Default: (-0.05, 0.05)
+
+        shift_limit (float | tuple[float, float]): Range of relative shifts for the image center.
+            Values are multiplied by image dimensions to get absolute shift in pixels:
+            - dx = shift_x * image_width
+            - dy = shift_y * image_height
+            If shift_limit is a single float value, the range will be (-shift_limit, shift_limit).
+            Default: (-0.05, 0.05)
+
+        mode (Literal['camera', 'fisheye']): Distortion model to use:
+            - 'camera': Original camera matrix model
+            - 'fisheye': Fisheye lens model
+            Default: 'camera'
+
         interpolation (OpenCV flag): Interpolation method used for image transformation.
             Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC,
             cv2.INTER_AREA, cv2.INTER_LANCZOS4. Default: cv2.INTER_LINEAR.
+
         border_mode (OpenCV flag): Border mode used for handling pixels outside the image.
             Should be one of: cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT,
             cv2.BORDER_WRAP, cv2.BORDER_REFLECT_101. Default: cv2.BORDER_REFLECT_101.
+
         value (int, float, list of int, list of float): Padding value if border_mode
             is cv2.BORDER_CONSTANT. Default: None.
+
         mask_value (int, float, list of int, list of float): Padding value for mask
             if border_mode is cv2.BORDER_CONSTANT. Default: None.
+
         mask_interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm for mask.
             Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
             Default: cv2.INTER_NEAREST.
+
         p (float): Probability of applying the transform. Default: 0.5.
 
     Targets:
@@ -1439,6 +1458,8 @@
         - The distortion coefficient (k) is randomly sampled from the distort_limit range.
         - The image center is shifted by dx and dy, randomly sampled from the shift_limit range.
         - Bounding boxes and keypoints are transformed along with the image to maintain consistency.
+        - Fisheye model directly applies radial distortion
+        - Both models use shift_limit to control distortion center
 
     Example:
         >>> import albumentations as A
@@ -1455,6 +1476,7 @@
     class InitSchema(BaseDistortion.InitSchema):
         distort_limit: SymmetricRangeType
         shift_limit: SymmetricRangeType
+        mode: Literal["camera", "fisheye"]
 
     def __init__(
         self,
@@ -1465,6 +1487,7 @@
         value: ColorType | None = None,
         mask_value: ColorType | None = None,
         mask_interpolation: int = cv2.INTER_NEAREST,
+        mode: Literal["camera", "fisheye"] = "camera",
         p: float = 0.5,
         always_apply: bool | None = None,
     ):
@@ -1478,28 +1501,31 @@
         )
         self.shift_limit = cast(tuple[float, float], shift_limit)
         self.distort_limit = cast(tuple[float, float], distort_limit)
+        self.mode = mode
 
     def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
-        height, width = params["shape"][:2]
-
-        fx = width
-        fy = height
+        image_shape = params["shape"][:2]
+        height, width = image_shape
 
+        # Get distortion coefficient
         k = self.py_random.uniform(*self.distort_limit)
-        dx = round(self.py_random.uniform(*self.shift_limit))
-        dy = round(self.py_random.uniform(*self.shift_limit))
 
+        # Calculate center shift
+        dx = round(self.py_random.uniform(*self.shift_limit) * width)
+        dy = round(self.py_random.uniform(*self.shift_limit) * height)
         cx = width * 0.5 + dx
         cy = height * 0.5 + dy
 
-        camera_matrix = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32)
-        distortion = np.array([k, k, 0, 0, 0], dtype=np.float32)
-        map_x, map_y = cv2.initUndistortRectifyMap(camera_matrix, distortion, None, None, (width, height), cv2.CV_32FC1)
+        # Get distortion maps based on mode
+        if self.mode == "camera":
+            map_x, map_y = fgeometric.get_camera_matrix_distortion_maps(image_shape, cx, cy, k)
+        else:  # fisheye
+            map_x, map_y = fgeometric.get_fisheye_distortion_maps(image_shape, cx, cy, k)
 
         return {"map_x": map_x, "map_y": map_y}
 
     def get_transform_init_args_names(self) -> tuple[str, ...]:
-        return (*super().get_transform_init_args_names(), "distort_limit", "shift_limit")
+        return ("distort_limit", "shift_limit", "mode", *super().get_transform_init_args_names())
 
 
 class GridDistortion(BaseDistortion):