From ff7d9045d513f83b2445488c96dc8053b5a935cf Mon Sep 17 00:00:00 2001 From: Hongyu Chiu <20734616+james77777778@users.noreply.github.com> Date: Mon, 12 Aug 2024 11:07:32 +0800 Subject: [PATCH 1/3] Add ToDType and fix seed bugs in RandAugment and TrivialAugmentWide --- keras_aug/_src/backend/image.py | 4 +- .../_src/layers/base/vision_random_layer.py | 14 ++- keras_aug/_src/layers/vision/rand_augment.py | 2 +- keras_aug/_src/layers/vision/to_dtype.py | 52 +++++++++++ keras_aug/_src/layers/vision/to_dtype_test.py | 93 +++++++++++++++++++ .../_src/layers/vision/trivial_augment.py | 6 +- keras_aug/_src/ops/image.py | 6 +- keras_aug/_src/utils/test_utils.py | 2 + keras_aug/layers/vision/__init__.py | 1 + 9 files changed, 173 insertions(+), 7 deletions(-) create mode 100644 keras_aug/_src/layers/vision/to_dtype.py create mode 100644 keras_aug/_src/layers/vision/to_dtype_test.py diff --git a/keras_aug/_src/backend/image.py b/keras_aug/_src/backend/image.py index 3ffab8b..b7a34b1 100644 --- a/keras_aug/_src/backend/image.py +++ b/keras_aug/_src/backend/image.py @@ -10,7 +10,7 @@ class ImageBackend(DynamicBackend): def __init__(self, name=None): super().__init__(name=name) - def transform_dtype(self, images, from_dtype, to_dtype): + def transform_dtype(self, images, from_dtype, to_dtype, scale=True): # Ref: torchvision.transforms.v2.ToDtype ops = self.backend from_dtype = backend.standardize_dtype(from_dtype) @@ -18,6 +18,8 @@ def transform_dtype(self, images, from_dtype, to_dtype): if from_dtype == to_dtype: return images + if scale is False: + return ops.cast(images, to_dtype) is_float_input = backend.is_float_dtype(from_dtype) is_float_output = backend.is_float_dtype(to_dtype) diff --git a/keras_aug/_src/layers/base/vision_random_layer.py b/keras_aug/_src/layers/base/vision_random_layer.py index 8f0f5f0..34d3e4c 100644 --- a/keras_aug/_src/layers/base/vision_random_layer.py +++ b/keras_aug/_src/layers/base/vision_random_layer.py @@ -99,6 +99,7 @@ def __init__(self, has_generator=True, seed=None, **kwargs): self._convert_input_args = False self._allow_non_tensor_positional_args = True self.autocast = False + self._transform_dtype_scale = True @property def image_dtype(self): @@ -122,6 +123,14 @@ def backend(self): def random_generator(self): return self._random_generator.random_generator + @property + def transform_dtype_scale(self): + return self._transform_dtype_scale + + @transform_dtype_scale.setter + def transform_dtype_scale(self, value): + self._transform_dtype_scale = bool(value) + def get_params( self, batch_size, @@ -389,7 +398,10 @@ def _cast_inputs(self, inputs): if self.IMAGES in inputs: inputs[self.IMAGES] = ops.convert_to_tensor(inputs[self.IMAGES]) inputs[self.IMAGES] = self.image_backend.transform_dtype( - inputs[self.IMAGES], inputs[self.IMAGES].dtype, self.image_dtype + inputs[self.IMAGES], + inputs[self.IMAGES].dtype, + self.image_dtype, + scale=self.transform_dtype_scale, ) if self.LABELS in inputs: inputs[self.LABELS] = ops.convert_to_tensor(inputs[self.LABELS]) diff --git a/keras_aug/_src/layers/vision/rand_augment.py b/keras_aug/_src/layers/vision/rand_augment.py index 3373d90..2ab930f 100644 --- a/keras_aug/_src/layers/vision/rand_augment.py +++ b/keras_aug/_src/layers/vision/rand_augment.py @@ -141,7 +141,7 @@ def get_params(self, batch_size, images=None, **kwargs): ops.numpy.log(fn_idx_p), self.num_ops, seed=random_generator ) fn_idx = fn_idx[0] - signed_p = ops.random.uniform([batch_size]) > 0.5 + signed_p = ops.random.uniform([batch_size], seed=random_generator) > 0.5 signed = ops.cast(ops.numpy.where(signed_p, 1.0, -1.0), dtype="float32") return dict( p=p, # shape: (batch_size,) diff --git a/keras_aug/_src/layers/vision/to_dtype.py b/keras_aug/_src/layers/vision/to_dtype.py new file mode 100644 index 0000000..62bd89a --- /dev/null +++ b/keras_aug/_src/layers/vision/to_dtype.py @@ -0,0 +1,52 @@ +import keras +from keras import backend + +from keras_aug._src.keras_aug_export import keras_aug_export +from keras_aug._src.layers.base.vision_random_layer import VisionRandomLayer + + +@keras_aug_export(parent_path=["keras_aug.layers.vision"]) +@keras.saving.register_keras_serializable(package="keras_aug") +class ToDType(VisionRandomLayer): + """Converts the input to a specific dtype, optionally scaling the values. + + If + + Args: + to_dtype: A string specifying the target dtype. + scale: Whether to scale the values. Defaults to `False`. + """ + + def __init__(self, to_dtype, scale=False, **kwargs): + to_dtype = backend.standardize_dtype(to_dtype) + self.scale = bool(scale) + if "dtype" in kwargs: + kwargs.pop("dtype") + super().__init__(has_generator=False, dtype=to_dtype, **kwargs) + self.to_dtype = to_dtype + self.transform_dtype_scale = self.scale + + def compute_output_shape(self, input_shape): + return input_shape + + def augment_images(self, images, transformations, **kwargs): + return images + + def augment_labels(self, labels, transformations, **kwargs): + return labels + + def augment_bounding_boxes(self, bounding_boxes, transformations, **kwargs): + return bounding_boxes + + def augment_segmentation_masks( + self, segmentation_masks, transformations, **kwargs + ): + return segmentation_masks + + def augment_keypoints(self, keypoints, transformations, **kwargs): + return keypoints + + def get_config(self): + config = super().get_config() + config.update({"to_dtype": self.to_dtype, "scale": self.scale}) + return config diff --git a/keras_aug/_src/layers/vision/to_dtype_test.py b/keras_aug/_src/layers/vision/to_dtype_test.py new file mode 100644 index 0000000..4c79ac6 --- /dev/null +++ b/keras_aug/_src/layers/vision/to_dtype_test.py @@ -0,0 +1,93 @@ +import keras +import numpy as np +from absl.testing import parameterized +from keras import backend +from keras.src import testing +from keras.src.testing.test_utils import named_product + +from keras_aug._src.layers.vision.to_dtype import ToDType +from keras_aug._src.utils.test_utils import get_images + + +class ToDTypeTest(testing.TestCase, parameterized.TestCase): + def setUp(self): + # Defaults to channels_last + self.data_format = backend.image_data_format() + backend.set_image_data_format("channels_last") + return super().setUp() + + def tearDown(self) -> None: + backend.set_image_data_format(self.data_format) + return super().tearDown() + + @parameterized.named_parameters( + named_product( + from_dtype=["uint8", "float16", "float32"], + to_dtype=["uint8", "float16", "float32"], + scale=[True, False], + ) + ) + def test_correctness(self, from_dtype, to_dtype, scale): + import torch + import torchvision.transforms.v2.functional as TF + from keras.src.backend.torch import to_torch_dtype + + # Test channels_last + x = get_images(from_dtype, "channels_last") + layer = ToDType(to_dtype, scale) + y = layer(x) + + ref_y = TF.to_dtype( + torch.tensor(np.transpose(x, [0, 3, 1, 2])), + dtype=to_torch_dtype(to_dtype), + scale=scale, + ) + ref_y = np.transpose(ref_y.cpu().numpy(), [0, 2, 3, 1]) + self.assertDType(y, to_dtype) + self.assertAllClose(y, ref_y) + + def test_shape(self): + # Test dynamic shape + x = keras.KerasTensor((None, None, None, 3)) + y = ToDType("float32", scale=True)(x) + self.assertEqual(y.shape, (None, None, None, 3)) + backend.set_image_data_format("channels_first") + x = keras.KerasTensor((None, 3, None, None)) + y = ToDType("float32", scale=True)(x) + self.assertEqual(y.shape, (None, 3, None, None)) + + # Test static shape + backend.set_image_data_format("channels_last") + x = keras.KerasTensor((None, 32, 32, 3)) + y = ToDType("float32", scale=True)(x) + self.assertEqual(y.shape, (None, 32, 32, 3)) + backend.set_image_data_format("channels_first") + x = keras.KerasTensor((None, 3, 32, 32)) + y = ToDType("float32", scale=True)(x) + self.assertEqual(y.shape, (None, 3, 32, 32)) + + def test_model(self): + layer = ToDType("float32", scale=True) + inputs = keras.layers.Input(shape=[None, None, 5]) + outputs = layer(inputs) + model = keras.models.Model(inputs, outputs) + self.assertEqual(model.output_shape, (None, None, None, 5)) + + def test_config(self): + x = get_images("float32", "channels_last") + layer = ToDType("float32", scale=True) + y = layer(x) + + layer = ToDType.from_config(layer.get_config()) + y2 = layer(x) + self.assertAllClose(y, y2) + + def test_tf_data_compatibility(self): + import tensorflow as tf + + layer = ToDType("float32", scale=True) + x = get_images("float32", "channels_last") + ds = tf.data.Dataset.from_tensor_slices(x).batch(2).map(layer) + for output in ds.take(1): + self.assertIsInstance(output, tf.Tensor) + self.assertEqual(output.shape, (2, 32, 32, 3)) diff --git a/keras_aug/_src/layers/vision/trivial_augment.py b/keras_aug/_src/layers/vision/trivial_augment.py index 4c688c9..c129928 100644 --- a/keras_aug/_src/layers/vision/trivial_augment.py +++ b/keras_aug/_src/layers/vision/trivial_augment.py @@ -119,13 +119,15 @@ def get_params(self, batch_size, images=None, **kwargs): random_generator = self.random_generator p = ops.random.uniform([batch_size], seed=random_generator) - magnitude = ops.random.randint([batch_size], 0, self.num_magnitude_bins) + magnitude = ops.random.randint( + [batch_size], 0, self.num_magnitude_bins, seed=random_generator + ) fn_idx_p = ops.convert_to_tensor([self.fn_idx_p]) fn_idx = ops.random.categorical( ops.numpy.log(fn_idx_p), 1, seed=random_generator ) fn_idx = fn_idx[0] - signed_p = ops.random.uniform([batch_size]) > 0.5 + signed_p = ops.random.uniform([batch_size], seed=random_generator) > 0.5 signed = ops.cast(ops.numpy.where(signed_p, 1.0, -1.0), dtype="float32") return dict( p=p, # shape: (batch_size,) diff --git a/keras_aug/_src/ops/image.py b/keras_aug/_src/ops/image.py index 13fb57e..47dc23c 100644 --- a/keras_aug/_src/ops/image.py +++ b/keras_aug/_src/ops/image.py @@ -5,9 +5,11 @@ @keras_aug_export(parent_path=["keras_aug.ops.image"]) -def transform_dtype(images, from_dtype, to_dtype): +def transform_dtype(images, from_dtype, to_dtype, scale=True): backend = "tensorflow" if in_tf_graph() else None - return ImageBackend(backend).transform_dtype(images, from_dtype, to_dtype) + return ImageBackend(backend).transform_dtype( + images, from_dtype, to_dtype, scale=scale + ) @keras_aug_export(parent_path=["keras_aug.ops.image"]) diff --git a/keras_aug/_src/utils/test_utils.py b/keras_aug/_src/utils/test_utils.py index 2d2dd57..614e5f3 100644 --- a/keras_aug/_src/utils/test_utils.py +++ b/keras_aug/_src/utils/test_utils.py @@ -9,6 +9,8 @@ def get_images(dtype, data_format="channels_first", size=(32, 32)): x = np.random.uniform(0, 1, (2, 3, *size)).astype(dtype) elif dtype == "bfloat16": x = np.random.uniform(0, 1, (2, 3, *size)).astype(dtype) + elif dtype == "float16": + x = np.random.uniform(0, 1, (2, 3, *size)).astype(dtype) elif dtype == "uint8": x = np.random.uniform(0, 255, (2, 3, *size)).astype(dtype) elif dtype == "int8": diff --git a/keras_aug/layers/vision/__init__.py b/keras_aug/layers/vision/__init__.py index da063c2..21283d7 100644 --- a/keras_aug/layers/vision/__init__.py +++ b/keras_aug/layers/vision/__init__.py @@ -35,4 +35,5 @@ from keras_aug._src.layers.vision.random_solarize import RandomSolarize from keras_aug._src.layers.vision.rescale import Rescale from keras_aug._src.layers.vision.resize import Resize +from keras_aug._src.layers.vision.to_dtype import ToDType from keras_aug._src.layers.vision.trivial_augment import TrivialAugmentWide From a0f05791cee17c162dce9905efbb557cfbca1cbe Mon Sep 17 00:00:00 2001 From: Hongyu Chiu <20734616+james77777778@users.noreply.github.com> Date: Mon, 12 Aug 2024 11:08:07 +0800 Subject: [PATCH 2/3] Update version --- keras_aug/__init__.py | 2 +- keras_aug/_src/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/keras_aug/__init__.py b/keras_aug/__init__.py index 93896ec..79e6274 100644 --- a/keras_aug/__init__.py +++ b/keras_aug/__init__.py @@ -9,4 +9,4 @@ from keras_aug import visualization from keras_aug._src.version import version -__version__ = "1.0.1" +__version__ = "1.1.0" diff --git a/keras_aug/_src/version.py b/keras_aug/_src/version.py index 492743b..2526df4 100644 --- a/keras_aug/_src/version.py +++ b/keras_aug/_src/version.py @@ -1,6 +1,6 @@ from keras_aug._src.keras_aug_export import keras_aug_export -__version__ = "1.0.1" +__version__ = "1.1.0" @keras_aug_export("keras_aug") From 95f4d304d194082c2a412b16c4626ab0366e462f Mon Sep 17 00:00:00 2001 From: Hongyu Chiu <20734616+james77777778@users.noreply.github.com> Date: Mon, 12 Aug 2024 11:39:50 +0800 Subject: [PATCH 3/3] Loosen dtype constraint --- keras_aug/_src/backend/image.py | 23 ++++++++++++++++--- .../_src/layers/base/vision_random_layer.py | 9 +++++--- keras_aug/_src/layers/vision/to_dtype.py | 6 ++++- keras_aug/_src/layers/vision/to_dtype_test.py | 13 +++++++++-- keras_aug/_src/utils/test_utils.py | 6 +++++ 5 files changed, 48 insertions(+), 9 deletions(-) diff --git a/keras_aug/_src/backend/image.py b/keras_aug/_src/backend/image.py index b7a34b1..3184a4e 100644 --- a/keras_aug/_src/backend/image.py +++ b/keras_aug/_src/backend/image.py @@ -53,13 +53,30 @@ def transform_dtype(self, images, from_dtype, to_dtype, scale=True): num_bits_input = self._num_bits_of_dtype(from_dtype) num_bits_output = self._num_bits_of_dtype(to_dtype) + def right_shift(inputs, bits): + if self.name == "tensorflow": + import tensorflow as tf + + return tf.bitwise.right_shift(inputs, bits) + else: + return inputs >> bits + + def left_shift(inputs, bits): + if self.name == "tensorflow": + import tensorflow as tf + + return tf.bitwise.left_shift(inputs, bits) + else: + return inputs << bits + if num_bits_input > num_bits_output: return ops.cast( - images >> (num_bits_input - num_bits_output), to_dtype + right_shift(images, (num_bits_input - num_bits_output)), + to_dtype, ) else: - return ops.cast(images, to_dtype) << ( - num_bits_output - num_bits_input + return left_shift( + ops.cast(images, to_dtype), num_bits_output - num_bits_input ) def crop(self, images, top, left, height, width, data_format=None): diff --git a/keras_aug/_src/layers/base/vision_random_layer.py b/keras_aug/_src/layers/base/vision_random_layer.py index 34d3e4c..6946c9f 100644 --- a/keras_aug/_src/layers/base/vision_random_layer.py +++ b/keras_aug/_src/layers/base/vision_random_layer.py @@ -74,14 +74,17 @@ class VisionRandomLayer(keras.Layer): IS_DICT = "is_dict" BATCHED = "batched" + SUPPORTED_INT_DTYPES = ("uint8", "int16", "int32") + def __init__(self, has_generator=True, seed=None, **kwargs): super().__init__(**kwargs) # Check dtype if not backend.is_float_dtype(self.compute_dtype): - if self.compute_dtype != "uint8": + if self.compute_dtype not in self.SUPPORTED_INT_DTYPES: raise ValueError( - "Only floating and 'uint8' are supported for compute dtype." - f" Received: compute_dtype={self.compute_dtype}" + f"Only floating and {self.SUPPORTED_INT_DTYPES} are " + "supported for compute dtype. " + f"Received: compute_dtype={self.compute_dtype}" ) self._backend = DynamicBackend(backend.backend()) diff --git a/keras_aug/_src/layers/vision/to_dtype.py b/keras_aug/_src/layers/vision/to_dtype.py index 62bd89a..a4fe50e 100644 --- a/keras_aug/_src/layers/vision/to_dtype.py +++ b/keras_aug/_src/layers/vision/to_dtype.py @@ -10,7 +10,11 @@ class ToDType(VisionRandomLayer): """Converts the input to a specific dtype, optionally scaling the values. - If + If `scale` is `True`, the value range will changed as follows: + - `"uint8"`: `[0, 255]` + - `"int16"`: `[-32768, 32767]` + - `"int32"`: `[-2147483648, 2147483647]` + - float: `[0.0, 1.0]` Args: to_dtype: A string specifying the target dtype. diff --git a/keras_aug/_src/layers/vision/to_dtype_test.py b/keras_aug/_src/layers/vision/to_dtype_test.py index 4c79ac6..d706737 100644 --- a/keras_aug/_src/layers/vision/to_dtype_test.py +++ b/keras_aug/_src/layers/vision/to_dtype_test.py @@ -22,8 +22,8 @@ def tearDown(self) -> None: @parameterized.named_parameters( named_product( - from_dtype=["uint8", "float16", "float32"], - to_dtype=["uint8", "float16", "float32"], + from_dtype=["uint8", "int16", "int32", "bfloat16", "float32"], + to_dtype=["uint8", "int16", "bfloat16", "float32"], scale=[True, False], ) ) @@ -37,13 +37,22 @@ def test_correctness(self, from_dtype, to_dtype, scale): layer = ToDType(to_dtype, scale) y = layer(x) + if from_dtype == "bfloat16": + x = x.astype("float32") ref_y = TF.to_dtype( torch.tensor(np.transpose(x, [0, 3, 1, 2])), dtype=to_torch_dtype(to_dtype), scale=scale, ) + + if to_dtype == "bfloat16": + y = keras.ops.cast(y, "float32") + ref_y = ref_y.to(torch.float32) + to_dtype = "float32" ref_y = np.transpose(ref_y.cpu().numpy(), [0, 2, 3, 1]) self.assertDType(y, to_dtype) + if from_dtype == "bfloat16" and to_dtype in ("uint8", "int16"): + return self.assertAllClose(y, ref_y) def test_shape(self): diff --git a/keras_aug/_src/utils/test_utils.py b/keras_aug/_src/utils/test_utils.py index 614e5f3..6dc42db 100644 --- a/keras_aug/_src/utils/test_utils.py +++ b/keras_aug/_src/utils/test_utils.py @@ -15,6 +15,12 @@ def get_images(dtype, data_format="channels_first", size=(32, 32)): x = np.random.uniform(0, 255, (2, 3, *size)).astype(dtype) elif dtype == "int8": x = np.random.uniform(-128, 127, (2, 3, *size)).astype(dtype) + elif dtype == "int16": + x = np.random.uniform(-32768, 32767, (2, 3, *size)).astype(dtype) + elif dtype == "int32": + x = np.random.uniform(-2147483648, 2147483647, (2, 3, *size)).astype( + dtype + ) if data_format == "channels_last": x = np.transpose(x, [0, 2, 3, 1]) return x