Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fisheye #2157

Merged
merged 3 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions albumentations/augmentations/geometric/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -3064,3 +3064,67 @@

# Combine nonlinear and affine transformations
return kernel_matrix @ nonlinear_weights + affine_terms @ affine_weights


def get_camera_matrix_distortion_maps(
ternaus marked this conversation as resolved.
Show resolved Hide resolved
image_shape: tuple[int, int],
cx: float,
cy: float,
k: float,
) -> tuple[np.ndarray, np.ndarray]:
"""Generate distortion maps using camera matrix model.

Args:
image_shape: Image shape
cx: x-coordinate of distortion center
cy: y-coordinate of distortion center
k: Distortion coefficient

Returns:
tuple of:
- map_x: Horizontal displacement map
- map_y: Vertical displacement map
"""
height, width = image_shape[:2]
camera_matrix = np.array([[width, 0, cx], [0, height, cy], [0, 0, 1]], dtype=np.float32)
distortion = np.array([k, k, 0, 0, 0], dtype=np.float32)
return cv2.initUndistortRectifyMap(camera_matrix, distortion, None, None, (width, height), cv2.CV_32FC1)


def get_fisheye_distortion_maps(
image_shape: tuple[int, int],
cx: float,
cy: float,
k: float,
) -> tuple[np.ndarray, np.ndarray]:
"""Generate distortion maps using fisheye model.

Args:
image_shape: Image shape
cx: x-coordinate of distortion center
cy: y-coordinate of distortion center
k: Distortion coefficient

Returns:
tuple of:
- map_x: Horizontal displacement map
- map_y: Vertical displacement map
"""
height, width = image_shape[:2]

Check warning on line 3113 in albumentations/augmentations/geometric/functional.py

View check run for this annotation

Codecov / codecov/patch

albumentations/augmentations/geometric/functional.py#L3113

Added line #L3113 was not covered by tests
# Create coordinate grid
y, x = np.mgrid[:height, :width].astype(np.float32)
x = x - cx
y = y - cy

Check warning on line 3117 in albumentations/augmentations/geometric/functional.py

View check run for this annotation

Codecov / codecov/patch

albumentations/augmentations/geometric/functional.py#L3115-L3117

Added lines #L3115 - L3117 were not covered by tests

# Calculate polar coordinates
r = np.sqrt(x * x + y * y)
theta = np.arctan2(y, x)

Check warning on line 3121 in albumentations/augmentations/geometric/functional.py

View check run for this annotation

Codecov / codecov/patch

albumentations/augmentations/geometric/functional.py#L3120-L3121

Added lines #L3120 - L3121 were not covered by tests

# Apply fisheye distortion
r_dist = r * (1 + k * r * r)

Check warning on line 3124 in albumentations/augmentations/geometric/functional.py

View check run for this annotation

Codecov / codecov/patch

albumentations/augmentations/geometric/functional.py#L3124

Added line #L3124 was not covered by tests

# Convert back to cartesian coordinates
map_x = cx + r_dist * np.cos(theta)
map_y = cy + r_dist * np.sin(theta)

Check warning on line 3128 in albumentations/augmentations/geometric/functional.py

View check run for this annotation

Codecov / codecov/patch

albumentations/augmentations/geometric/functional.py#L3127-L3128

Added lines #L3127 - L3128 were not covered by tests

return map_x, map_y

Check warning on line 3130 in albumentations/augmentations/geometric/functional.py

View check run for this annotation

Codecov / codecov/patch

albumentations/augmentations/geometric/functional.py#L3130

Added line #L3130 was not covered by tests
64 changes: 45 additions & 19 deletions albumentations/augmentations/geometric/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1402,30 +1402,49 @@
class OpticalDistortion(BaseDistortion):
"""Apply optical distortion to images, masks, bounding boxes, and keypoints.

This transformation simulates lens distortion effects by warping the image using
a camera matrix and distortion coefficients. It's particularly useful for
augmenting data in computer vision tasks where camera lens effects are relevant.
Supports two distortion models:
1. Camera matrix model (original):
Uses OpenCV's camera calibration model with k1=k2=k distortion coefficients

2. Fisheye model:
Direct radial distortion: r_dist = r * (1 + gamma * r²)

Args:
distort_limit (float or tuple of float): Range of distortion coefficient.
If distort_limit is a single float, the range will be (-distort_limit, distort_limit).
Default: (-0.05, 0.05).
shift_limit (float or tuple of float): Range of shifts for the image center.
If shift_limit is a single float, the range will be (-shift_limit, shift_limit).
Default: (-0.05, 0.05).
distort_limit (float | tuple[float, float]): Range of distortion coefficient.
For camera model: recommended range (-0.05, 0.05)
For fisheye model: recommended range (-0.3, 0.3)
Default: (-0.05, 0.05)

shift_limit (float | tuple[float, float]): Range of relative shifts for the image center.
Values are multiplied by image dimensions to get absolute shift in pixels:
- dx = shift_x * image_width
- dy = shift_y * image_height
If shift_limit is a single float value, the range will be (-shift_limit, shift_limit).
Default: (-0.05, 0.05)

mode (Literal['camera', 'fisheye']): Distortion model to use:
- 'camera': Original camera matrix model
- 'fisheye': Fisheye lens model
Default: 'camera'

interpolation (OpenCV flag): Interpolation method used for image transformation.
Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC,
cv2.INTER_AREA, cv2.INTER_LANCZOS4. Default: cv2.INTER_LINEAR.

border_mode (OpenCV flag): Border mode used for handling pixels outside the image.
Should be one of: cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT,
cv2.BORDER_WRAP, cv2.BORDER_REFLECT_101. Default: cv2.BORDER_REFLECT_101.

value (int, float, list of int, list of float): Padding value if border_mode
is cv2.BORDER_CONSTANT. Default: None.

mask_value (int, float, list of int, list of float): Padding value for mask
if border_mode is cv2.BORDER_CONSTANT. Default: None.

mask_interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm for mask.
Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
Default: cv2.INTER_NEAREST.

p (float): Probability of applying the transform. Default: 0.5.

Targets:
Expand All @@ -1439,6 +1458,8 @@
- The distortion coefficient (k) is randomly sampled from the distort_limit range.
- The image center is shifted by dx and dy, randomly sampled from the shift_limit range.
- Bounding boxes and keypoints are transformed along with the image to maintain consistency.
- Fisheye model directly applies radial distortion
- Both models use shift_limit to control distortion center

Example:
>>> import albumentations as A
Expand All @@ -1455,6 +1476,7 @@
class InitSchema(BaseDistortion.InitSchema):
distort_limit: SymmetricRangeType
shift_limit: SymmetricRangeType
mode: Literal["camera", "fisheye"]

def __init__(
self,
Expand All @@ -1465,6 +1487,7 @@
value: ColorType | None = None,
mask_value: ColorType | None = None,
mask_interpolation: int = cv2.INTER_NEAREST,
mode: Literal["camera", "fisheye"] = "camera",
p: float = 0.5,
always_apply: bool | None = None,
):
Expand All @@ -1478,28 +1501,31 @@
)
self.shift_limit = cast(tuple[float, float], shift_limit)
self.distort_limit = cast(tuple[float, float], distort_limit)
self.mode = mode

def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
height, width = params["shape"][:2]

fx = width
fy = height
image_shape = params["shape"][:2]
height, width = image_shape

# Get distortion coefficient
k = self.py_random.uniform(*self.distort_limit)
dx = round(self.py_random.uniform(*self.shift_limit))
dy = round(self.py_random.uniform(*self.shift_limit))

# Calculate center shift
dx = round(self.py_random.uniform(*self.shift_limit) * width)
dy = round(self.py_random.uniform(*self.shift_limit) * height)
cx = width * 0.5 + dx
cy = height * 0.5 + dy

camera_matrix = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32)
distortion = np.array([k, k, 0, 0, 0], dtype=np.float32)
map_x, map_y = cv2.initUndistortRectifyMap(camera_matrix, distortion, None, None, (width, height), cv2.CV_32FC1)
# Get distortion maps based on mode
if self.mode == "camera":
map_x, map_y = fgeometric.get_camera_matrix_distortion_maps(image_shape, cx, cy, k)
else: # fisheye
map_x, map_y = fgeometric.get_fisheye_distortion_maps(image_shape, cx, cy, k)

Check warning on line 1523 in albumentations/augmentations/geometric/transforms.py

View check run for this annotation

Codecov / codecov/patch

albumentations/augmentations/geometric/transforms.py#L1523

Added line #L1523 was not covered by tests

return {"map_x": map_x, "map_y": map_y}

def get_transform_init_args_names(self) -> tuple[str, ...]:
return (*super().get_transform_init_args_names(), "distort_limit", "shift_limit")
return ("distort_limit", "shift_limit", "mode", *super().get_transform_init_args_names())


class GridDistortion(BaseDistortion):
Expand Down