Add hflip and vflip (#30)

* Added doctring to from_float * Added hflip * Added benchamrk * More clear benchmark * cleanup
albumentations-team · Sep 18, 2024 · 5f9611f · 5f9611f
1 parent bbe7076
commit 5f9611f
Show file tree

Hide file tree

Showing 19 changed files with 527 additions and 18 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -53,13 +53,13 @@ repos:
   #   hooks:
   #     - id: markdownlint
   - repo: https://github.com/tox-dev/pyproject-fmt
-    rev: "2.2.3"
+    rev: "2.2.4"
     hooks:
       - id: pyproject-fmt
         additional_dependencies: ["tomli"]
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.6.4
+    rev: v0.6.5
     hooks:
       # Run the linter.
       - id: ruff

diff --git a/albucore/__init__.py b/albucore/__init__.py
@@ -1,3 +1,4 @@
-__version__ = "0.0.15"
+__version__ = "0.0.16"
 
 from .functions import *
+from .utils import *
diff --git a/albucore/functions.py b/albucore/functions.py
@@ -13,6 +13,7 @@
     ValueType,
     clip,
     clipped,
+    contiguous,
     convert_value,
     get_max_value,
     get_num_channels,
@@ -574,16 +575,16 @@ def to_float(img: np.ndarray, max_value: float | None = None) -> np.ndarray:
     return to_float_numpy(img, max_value)
 
 
-def from_float_numpy(img: np.ndarray, dtype: np.dtype, max_value: float | None = None) -> np.ndarray:
+def from_float_numpy(img: np.ndarray, target_dtype: np.dtype, max_value: float | None = None) -> np.ndarray:
     if max_value is None:
-        max_value = get_max_value(dtype)
-    return clip(np.rint(img * max_value), dtype)
+        max_value = get_max_value(target_dtype)
+    return clip(np.rint(img * max_value), target_dtype)
 
 
 @preserve_channel_dim
-def from_float_opencv(img: np.ndarray, dtype: np.dtype, max_value: float | None = None) -> np.ndarray:
+def from_float_opencv(img: np.ndarray, target_dtype: np.dtype, max_value: float | None = None) -> np.ndarray:
     if max_value is None:
-        max_value = get_max_value(dtype)
+        max_value = get_max_value(target_dtype)
 
     img_float = img.astype(np.float32)
 
@@ -592,14 +593,62 @@ def from_float_opencv(img: np.ndarray, dtype: np.dtype, max_value: float | None
     if num_channels > MAX_OPENCV_WORKING_CHANNELS:
         # For images with more than 4 channels, create a full-sized multiplier
         max_value_array = np.full_like(img_float, max_value)
-        return clip(np.rint(cv2.multiply(img_float, max_value_array)), dtype)
+        return clip(np.rint(cv2.multiply(img_float, max_value_array)), target_dtype)
 
     # For images with 4 or fewer channels, use scalar multiplication
-    return clip(np.rint(img * max_value), dtype)
+    return clip(np.rint(img * max_value), target_dtype)
 
 
-def from_float(img: np.ndarray, dtype: np.dtype, max_value: float | None = None) -> np.ndarray:
+def from_float(img: np.ndarray, target_dtype: np.dtype, max_value: float | None = None) -> np.ndarray:
+    """Convert a floating-point image to the specified target data type.
+
+    This function converts an input floating-point image to the specified target data type,
+    scaling the values appropriately based on the max_value parameter or the maximum value
+    of the target data type.
+
+    Args:
+        img (np.ndarray): Input floating-point image array.
+        target_dtype (np.dtype): Target numpy data type for the output image.
+        max_value (float | None, optional): Maximum value to use for scaling. If None,
+            the maximum value of the target data type will be used. Defaults to None.
+
+    Returns:
+        np.ndarray: Image converted to the target data type.
+
+    Notes:
+        - If the input image is of type float32, the function uses OpenCV for faster processing.
+        - For other input types, it falls back to a numpy-based implementation.
+        - The function clips values to ensure they fit within the range of the target data type.
+    """
     if img.dtype == np.float32:
-        return from_float_opencv(img, dtype, max_value)
+        return from_float_opencv(img, target_dtype, max_value)
+
+    return from_float_numpy(img, target_dtype, max_value)
+
+
+@contiguous
+def hflip_numpy(img: np.ndarray) -> np.ndarray:
+    return img[:, ::-1, ...]
+
+
+@preserve_channel_dim
+def hflip_cv2(img: np.ndarray) -> np.ndarray:
+    return cv2.flip(img, 1)
+
+
+def hflip(img: np.ndarray) -> np.ndarray:
+    return hflip_cv2(img)
+
+
+@preserve_channel_dim
+def vflip_cv2(img: np.ndarray) -> np.ndarray:
+    return cv2.flip(img, 0)
+
+
+@contiguous
+def vflip_numpy(img: np.ndarray) -> np.ndarray:
+    return img[::-1, ...]
+
 
-    return from_float_numpy(img, dtype, max_value)
+def vflip(img: np.ndarray) -> np.ndarray:
+    return vflip_cv2(img)
diff --git a/benchmark.sh b/benchmark.sh
@@ -13,13 +13,20 @@ data_dir="$1"
 channels=(1 3 5)
 
 # Define the array of image types
-types=("uint8" "float32")
+types=("float32" "uint8")
 
 # Loop over each channel
 for ch in "${channels[@]}"; do
     # Nested loop over each image type
     for type in "${types[@]}"; do
         # Command to run your program, using the provided data directory
-        python -m benchmark.benchmark --num_channels $ch --img_type $type --markdown -n 2000 --show-std -r 5 -d "$data_dir"
+        python -m benchmark.benchmark \
+            --num_channels $ch \
+            --img_type $type \
+            --markdown \
+            -n 1000 \
+            --show-std \
+            -r 10 \
+            -d "$data_dir"
     done
 done
diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
@@ -91,7 +91,7 @@ def __str__(self) -> str:
         return self.__class__.__name__
 
     def albucore(self, img: np.ndarray) -> np.ndarray:
-        return self.albucore_transform(img)
+        return clip(self.albucore_transform(img), img.dtype)
 
     def opencv(self, img: np.ndarray) -> np.ndarray:
         return clip(self.opencv_transform(img), img.dtype)
@@ -510,6 +510,40 @@ def torchvision_transform(self, img: torch.Tensor) -> torch.Tensor:
         return (img * MAX_VALUES_BY_DTYPE[self.dtype]).to(torch.uint8)
 
 
+class HorizontalFlip(BenchmarkTest):
+    def __init__(self, num_channels: int) -> None:
+        super().__init__(num_channels)
+
+    def albucore_transform(self, img: np.ndarray) -> np.ndarray:
+        return albucore.hflip(img)
+
+    def numpy_transform(self, img: np.ndarray) -> np.ndarray:
+        return albucore.hflip_numpy(img)
+
+    def opencv_transform(self, img: np.ndarray) -> np.ndarray:
+        return albucore.hflip_cv2(img)
+
+    def torchvision_transform(self, img: torch.Tensor) -> torch.Tensor:
+        return torchf.hflip(img)
+
+
+class VerticalFlip(BenchmarkTest):
+    def __init__(self, num_channels: int) -> None:
+        super().__init__(num_channels)
+
+    def albucore_transform(self, img: np.ndarray) -> np.ndarray:
+        return albucore.vflip(img)
+
+    def numpy_transform(self, img: np.ndarray) -> np.ndarray:
+        return albucore.vflip_numpy(img)
+
+    def opencv_transform(self, img: np.ndarray) -> np.ndarray:
+        return albucore.vflip_cv2(img)
+
+    def torchvision_transform(self, img: torch.Tensor) -> torch.Tensor:
+        return torchf.vflip(img)
+
+
 def get_images_from_dir(data_dir: Path, num_images: int, num_channels: int, dtype: str) -> list[np.ndarray]:
     image_paths = list(data_dir.expanduser().absolute().glob("*.*"))[:num_images]
     images = []
@@ -664,6 +698,8 @@ def main() -> None:
         MultiplyAdd,
         ToFloat,
         FromFloat,
+        HorizontalFlip,
+        VerticalFlip,
     ]
 
     args = parse_args()

diff --git a/benchmark/results/float32_1/HorizontalFlip.md b/benchmark/results/float32_1/HorizontalFlip.md
@@ -0,0 +1,27 @@
+# Benchmark Results: HorizontalFlip
+
+Number of images: 1000
+
+## CPU Information
+
+- CPU: Apple M1 Pro
+- Frequency: Current: 3228.00 MHz, Min: 600.00 MHz, Max: 3228.00 MHz
+- Physical cores: 10
+- Total cores: 10
+
+## Package Versions
+
+| Python                                   | albucore   | opencv-python-headless   | numpy   | torchvision   |
+|:-----------------------------------------|:-----------|:-------------------------|:--------|:--------------|
+| 3.8.19 (default, Mar 20 2024, 15:27:52)  | 0.0.14     | 4.9.0.80                 | 1.24.4  | 0.19.1        |
+| [Clang 14.0.6 ]                          |            |                          |         |               |
+
+## Performance (images/second)
+
+Raw data:
+                  albucore  lut      opencv       numpy torchvision
+HorizontalFlip  7655 ± 491  nan  4126 ± 239  3476 ± 119  5027 ± 318
+
+|                | albucore   |   lut | opencv     | numpy      | torchvision   |
+|:---------------|:-----------|------:|:-----------|:-----------|:--------------|
+| HorizontalFlip | 7655 ± 491 |   nan | 4126 ± 239 | 3476 ± 119 | 5027 ± 318    |
diff --git a/benchmark/results/float32_1/VerticalFlip.md b/benchmark/results/float32_1/VerticalFlip.md
@@ -0,0 +1,27 @@
+# Benchmark Results: VerticalFlip
+
+Number of images: 1000
+
+## CPU Information
+
+- CPU: Apple M1 Pro
+- Frequency: Current: 3228.00 MHz, Min: 600.00 MHz, Max: 3228.00 MHz
+- Physical cores: 10
+- Total cores: 10
+
+## Package Versions
+
+| Python                                   | albucore   | opencv-python-headless   | numpy   | torchvision   |
+|:-----------------------------------------|:-----------|:-------------------------|:--------|:--------------|
+| 3.8.19 (default, Mar 20 2024, 15:27:52)  | 0.0.14     | 4.9.0.80                 | 1.24.4  | 0.19.1        |
+| [Clang 14.0.6 ]                          |            |                          |         |               |
+
+## Performance (images/second)
+
+Raw data:
+                albucore  lut      opencv       numpy  torchvision
+VerticalFlip  2602 ± 600  nan  1800 ± 527  2276 ± 884  3585 ± 1422
+
+|              | albucore   |   lut | opencv     | numpy      | torchvision   |
+|:-------------|:-----------|------:|:-----------|:-----------|:--------------|
+| VerticalFlip | 2602 ± 600 |   nan | 1800 ± 527 | 2276 ± 884 | 3585 ± 1422   |
diff --git a/benchmark/results/float32_3/HorizontalFlip.md b/benchmark/results/float32_3/HorizontalFlip.md
@@ -0,0 +1,27 @@
+# Benchmark Results: HorizontalFlip
+
+Number of images: 1000
+
+## CPU Information
+
+- CPU: Apple M1 Pro
+- Frequency: Current: 3228.00 MHz, Min: 600.00 MHz, Max: 3228.00 MHz
+- Physical cores: 10
+- Total cores: 10
+
+## Package Versions
+
+| Python                                   | albucore   | opencv-python-headless   | numpy   | torchvision   |
+|:-----------------------------------------|:-----------|:-------------------------|:--------|:--------------|
+| 3.8.19 (default, Mar 20 2024, 15:27:52)  | 0.0.14     | 4.9.0.80                 | 1.24.4  | 0.19.1        |
+| [Clang 14.0.6 ]                          |            |                          |         |               |
+
+## Performance (images/second)
+
+Raw data:
+                 albucore  lut     opencv     numpy torchvision
+HorizontalFlip  885 ± 424  nan  777 ± 249  253 ± 32   337 ± 104
+
+|                | albucore   |   lut | opencv    | numpy    | torchvision   |
+|:---------------|:-----------|------:|:----------|:---------|:--------------|
+| HorizontalFlip | 885 ± 424  |   nan | 777 ± 249 | 253 ± 32 | 337 ± 104     |
diff --git a/benchmark/results/float32_3/VerticalFlip.md b/benchmark/results/float32_3/VerticalFlip.md
@@ -0,0 +1,27 @@
+# Benchmark Results: VerticalFlip
+
+Number of images: 1000
+
+## CPU Information
+
+- CPU: Apple M1 Pro
+- Frequency: Current: 3228.00 MHz, Min: 600.00 MHz, Max: 3228.00 MHz
+- Physical cores: 10
+- Total cores: 10
+
+## Package Versions
+
+| Python                                   | albucore   | opencv-python-headless   | numpy   | torchvision   |
+|:-----------------------------------------|:-----------|:-------------------------|:--------|:--------------|
+| 3.8.19 (default, Mar 20 2024, 15:27:52)  | 0.0.14     | 4.9.0.80                 | 1.24.4  | 0.19.1        |
+| [Clang 14.0.6 ]                          |            |                          |         |               |
+
+## Performance (images/second)
+
+Raw data:
+              albucore  lut    opencv     numpy torchvision
+VerticalFlip  432 ± 24  nan  364 ± 35  419 ± 56   480 ± 195
+
+|              | albucore   |   lut | opencv   | numpy    | torchvision   |
+|:-------------|:-----------|------:|:---------|:---------|:--------------|
+| VerticalFlip | 432 ± 24   |   nan | 364 ± 35 | 419 ± 56 | 480 ± 195     |
diff --git a/benchmark/results/float32_5/HorizontalFlip.md b/benchmark/results/float32_5/HorizontalFlip.md
@@ -0,0 +1,27 @@
+# Benchmark Results: HorizontalFlip
+
+Number of images: 1000
+
+## CPU Information
+
+- CPU: Apple M1 Pro
+- Frequency: Current: 3228.00 MHz, Min: 600.00 MHz, Max: 3228.00 MHz
+- Physical cores: 10
+- Total cores: 10
+
+## Package Versions
+
+| Python                                   | albucore   | opencv-python-headless   | numpy   | torchvision   |
+|:-----------------------------------------|:-----------|:-------------------------|:--------|:--------------|
+| 3.8.19 (default, Mar 20 2024, 15:27:52)  | 0.0.14     | 4.9.0.80                 | 1.24.4  | 0.19.1        |
+| [Clang 14.0.6 ]                          |            |                          |         |               |
+
+## Performance (images/second)
+
+Raw data:
+               albucore  lut    opencv     numpy torchvision
+HorizontalFlip  52 ± 15  nan  179 ± 20  136 ± 40      35 ± 3
+
+|                | albucore   |   lut | opencv   | numpy    | torchvision   |
+|:---------------|:-----------|------:|:---------|:---------|:--------------|
+| HorizontalFlip | 52 ± 15    |   nan | 179 ± 20 | 136 ± 40 | 35 ± 3        |
diff --git a/benchmark/results/float32_5/VerticalFlip.md b/benchmark/results/float32_5/VerticalFlip.md
@@ -0,0 +1,27 @@
+# Benchmark Results: VerticalFlip
+
+Number of images: 1000
+
+## CPU Information
+
+- CPU: Apple M1 Pro
+- Frequency: Current: 3228.00 MHz, Min: 600.00 MHz, Max: 3228.00 MHz
+- Physical cores: 10
+- Total cores: 10
+
+## Package Versions
+
+| Python                                   | albucore   | opencv-python-headless   | numpy   | torchvision   |
+|:-----------------------------------------|:-----------|:-------------------------|:--------|:--------------|
+| 3.8.19 (default, Mar 20 2024, 15:27:52)  | 0.0.14     | 4.9.0.80                 | 1.24.4  | 0.19.1        |
+| [Clang 14.0.6 ]                          |            |                          |         |               |
+
+## Performance (images/second)
+
+Raw data:
+              albucore  lut    opencv     numpy torchvision
+VerticalFlip  161 ± 56  nan  227 ± 11  226 ± 20      44 ± 8
+
+|              | albucore   |   lut | opencv   | numpy    | torchvision   |
+|:-------------|:-----------|------:|:---------|:---------|:--------------|
+| VerticalFlip | 161 ± 56   |   nan | 227 ± 11 | 226 ± 20 | 44 ± 8        |
diff --git a/benchmark/results/uint8_1/HorizontalFlip.md b/benchmark/results/uint8_1/HorizontalFlip.md
@@ -0,0 +1,27 @@
+# Benchmark Results: HorizontalFlip
+
+Number of images: 1000
+
+## CPU Information
+
+- CPU: Apple M1 Pro
+- Frequency: Current: 3228.00 MHz, Min: 600.00 MHz, Max: 3228.00 MHz
+- Physical cores: 10
+- Total cores: 10
+
+## Package Versions
+
+| Python                                   | albucore   | opencv-python-headless   | numpy   | torchvision   |
+|:-----------------------------------------|:-----------|:-------------------------|:--------|:--------------|
+| 3.8.19 (default, Mar 20 2024, 15:27:52)  | 0.0.14     | 4.9.0.80                 | 1.24.4  | 0.19.1        |
+| [Clang 14.0.6 ]                          |            |                          |         |               |
+
+## Performance (images/second)
+
+Raw data:
+                    albucore  lut        opencv       numpy torchvision
+HorizontalFlip  25404 ± 3064  nan  13977 ± 1410  5912 ± 312  7336 ± 366
+
+|                | albucore     |   lut | opencv       | numpy      | torchvision   |
+|:---------------|:-------------|------:|:-------------|:-----------|:--------------|
+| HorizontalFlip | 25404 ± 3064 |   nan | 13977 ± 1410 | 5912 ± 312 | 7336 ± 366    |