From 4ba24e8c1b477c701f20838229474a57d01a2416 Mon Sep 17 00:00:00 2001
From: Andrew Montanez <andrew@sdv.dev>
Date: Wed, 14 Aug 2024 14:54:49 -0500
Subject: [PATCH 1/8] =?UTF-8?q?Bump=20version:=201.12.3=20=E2=86=92=201.12?=
 =?UTF-8?q?.4.dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pyproject.toml  | 2 +-
 rdt/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index 728ea2e0..8b9a9cb4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -137,7 +137,7 @@ collect_ignore = ['pyproject.toml']
 exclude_lines = ['NotImplementedError()']
 
 [tool.bumpversion]
-current_version = "1.12.3"
+current_version = "1.12.4.dev0"
 parse = '(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?'
 serialize = [
     '{major}.{minor}.{patch}.{release}{candidate}',
diff --git a/rdt/__init__.py b/rdt/__init__.py
index dbaaf673..a6b78122 100644
--- a/rdt/__init__.py
+++ b/rdt/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = 'DataCebo, Inc.'
 __email__ = 'info@sdv.dev'
-__version__ = '1.12.3'
+__version__ = '1.12.4.dev0'
 
 
 import sys

From 23df922e7538ff82fb253434bbb19ca501f3e28a Mon Sep 17 00:00:00 2001
From: SDV Team <98988753+sdv-team@users.noreply.github.com>
Date: Wed, 14 Aug 2024 16:39:10 -0400
Subject: [PATCH 2/8] Latest Code Analysis (#868)

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 static_code_analysis.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/static_code_analysis.txt b/static_code_analysis.txt
index acc2df0d..af2bb1bb 100644
--- a/static_code_analysis.txt
+++ b/static_code_analysis.txt
@@ -1,10 +1,10 @@
-Run started:2024-07-09 19:56:52.363070
+Run started:2024-08-14 20:11:54.714181
 
 Test results:
 	No issues identified.
 
 Code scanned:
-	Total lines of code: 5539
+	Total lines of code: 5530
 	Total lines skipped (#nosec): 0
 	Total potential issues skipped due to specifically being disabled (e.g., #nosec BXXX): 0
 

From 8cccc819ab554d9ec6870b96dba195159c2a16be Mon Sep 17 00:00:00 2001
From: SDV Team <98988753+sdv-team@users.noreply.github.com>
Date: Tue, 20 Aug 2024 12:19:47 -0400
Subject: [PATCH 3/8] Automated Latest Dependency Updates (#870)

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 latest_requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/latest_requirements.txt b/latest_requirements.txt
index 0d039b1a..4d27b67b 100644
--- a/latest_requirements.txt
+++ b/latest_requirements.txt
@@ -1,4 +1,4 @@
-Faker==26.3.0
+Faker==27.0.0
 copulas==0.11.0
 numpy==2.0.1
 pandas==2.2.2

From 2fab0919dd8864a9becad9c9fe8c2e5e2fff8b22 Mon Sep 17 00:00:00 2001
From: R-Palazzo <116157184+R-Palazzo@users.noreply.github.com>
Date: Fri, 23 Aug 2024 08:23:38 +0200
Subject: [PATCH 4/8] Make create_anonymized_columns work with multi columns
 transformer (#872)

---
 rdt/hyper_transformer.py             |  17 +++-
 tests/unit/test_hyper_transformer.py | 112 +++++++++++++++++++++++++++
 2 files changed, 128 insertions(+), 1 deletion(-)

diff --git a/rdt/hyper_transformer.py b/rdt/hyper_transformer.py
index 799a05a4..1ff2e8d9 100644
--- a/rdt/hyper_transformer.py
+++ b/rdt/hyper_transformer.py
@@ -871,8 +871,23 @@ def create_anonymized_columns(self, num_rows, column_names):
                 'list of valid column names.'
             )
 
+        columns_to_generate = set()
+        for column in column_names:
+            if column not in self._multi_column_fields:
+                columns_to_generate.add(column)
+                continue
+
+            multi_columns = self._multi_column_fields[column]
+            if any(col not in column_names for col in multi_columns):
+                raise InvalidConfigError(
+                    f"Column '{column}' is part of a multi-column field. You must include all "
+                    'columns inside the multi-column field to generate the anonymized columns.'
+                )
+
+            columns_to_generate.add(multi_columns)
+
         transformers = []
-        for column_name in column_names:
+        for column_name in sorted(columns_to_generate):
             transformer = self.field_transformers.get(column_name)
             if not transformer.is_generator():
                 raise TransformerProcessingError(
diff --git a/tests/unit/test_hyper_transformer.py b/tests/unit/test_hyper_transformer.py
index 6260ecc9..dbdc2e80 100644
--- a/tests/unit/test_hyper_transformer.py
+++ b/tests/unit/test_hyper_transformer.py
@@ -1478,6 +1478,7 @@ def test_create_anonymized_columns(self):
         instance._modified_config = False
         instance._subset.return_value = False
         instance.random_state = {}
+        instance._multi_column_fields = {}
 
         random_element = AnonymizedFaker(
             function_name='random_element', function_kwargs={'elements': ['a']}
@@ -1622,6 +1623,7 @@ def test_create_anonymized_columns_invalid_transformers(self):
         instance._fitted = True
         instance._modified_config = False
         instance._subset.return_value = False
+        instance._multi_column_fields = {}
 
         instance.field_transformers = {
             'datetime': FloatFormatter(),
@@ -1641,6 +1643,116 @@ def test_create_anonymized_columns_invalid_transformers(self):
                 column_names=['datetime', 'random_element'],
             )
 
+    def test_create_anonymized_columns_multi_column_transformer(self):
+        """Test ``create_anonymized_columns`` with a multi-column transformer."""
+
+        class GeneratorTransformer(BaseMultiColumnTransformer):
+            IS_GENERATOR = True
+
+            def __init__(self):
+                super().__init__()
+                self.output_properties = {}
+
+            def _fit(self, data):
+                self.columns = list(data.columns)
+
+            def _transform(self, data):
+                return pd.DataFrame()
+
+            def _get_prefix(self):
+                return
+
+            def _reverse_transform(self, data):
+                num_rows = data.shape[0]
+                for column in self.columns:
+                    data[column] = np.arange(num_rows)
+
+                return data
+
+        # Setup
+        instance = HyperTransformer()
+        instance._multi_column_fields = {
+            'col1': ('col1', 'col2'),
+            'col2': ('col1', 'col2'),
+        }
+        generator = GeneratorTransformer()
+        instance.field_transformers = {
+            ('col1', 'col2'): generator,
+        }
+        instance.field_sdtypes = {
+            'col1': 'numerical',
+            'col2': 'numerical',
+        }
+        instance.fit(pd.DataFrame({'col1': [1, 2, 3], 'col2': [1, 2, 3]}))
+
+        # Run
+        output = instance.create_anonymized_columns(num_rows=5, column_names=['col1', 'col2'])
+
+        # Assert
+        expected_output = pd.DataFrame({
+            'col1': [0, 1, 2, 3, 4],
+            'col2': [0, 1, 2, 3, 4],
+        })
+        pd.testing.assert_frame_equal(output, expected_output, check_dtype=False)
+
+    def test_create_anonymized_columns_multi_column_transformer_error(self):
+        """Test ``create_anonymized_columns`` raises error with multi-column transformer.
+
+        Test that:
+            - An error occurs when some columns in the column_name list are part of a multi-column
+            transformer, but not all the required columns of the multi-column
+            transformer are present.
+            - An error is raised when a multi-column transformer is not a generator.
+        """
+
+        class MultiColumnTransformer(BaseMultiColumnTransformer):
+            IS_GENERATOR = False
+
+            def __init__(self):
+                super().__init__()
+                self.output_properties = {}
+
+            def _fit(self, data):
+                self.columns = list(data.columns)
+
+            def _transform(self, data):
+                return pd.DataFrame()
+
+            def _get_prefix(self):
+                return
+
+        # Setup
+        instance = HyperTransformer()
+        instance._multi_column_fields = {
+            'col1': ('col1', 'col2'),
+            'col2': ('col1', 'col2'),
+        }
+        not_generator = MultiColumnTransformer()
+        instance.field_transformers = {
+            ('col1', 'col2'): not_generator,
+        }
+        instance.field_sdtypes = {
+            'col1': 'numerical',
+            'col2': 'numerical',
+        }
+        instance.fit(pd.DataFrame({'col1': [1, 2, 3], 'col2': [1, 2, 3]}))
+
+        # Run and Assert
+        error_msg_not_all_multi_column = re.escape(
+            "Column 'col1' is part of a multi-column field. You must include all "
+            'columns inside the multi-column field to generate the anonymized columns.'
+        )
+        with pytest.raises(InvalidConfigError, match=error_msg_not_all_multi_column):
+            instance.create_anonymized_columns(num_rows=5, column_names=['col1'])
+
+        error_msg_not_generator = re.escape(
+            "Column '('col1', 'col2')' cannot be anonymized. All columns must be assigned to "
+            "'AnonymizedFaker', 'RegexGenerator' or other ``generator``. Use "
+            "'get_config()' to see the current transformer assignments."
+        )
+        with pytest.raises(TransformerProcessingError, match=error_msg_not_generator):
+            instance.create_anonymized_columns(num_rows=5, column_names=['col1', 'col2'])
+
     def test_reverse_transform(self):
         """Test the ``reverse_transform`` method.
 

From bb3262e2fa88264dbc8b4d9f497dddd87adee0f2 Mon Sep 17 00:00:00 2001
From: SDV Team <98988753+sdv-team@users.noreply.github.com>
Date: Mon, 26 Aug 2024 09:01:25 -0400
Subject: [PATCH 5/8] Automated Latest Dependency Updates (#873)

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 latest_requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/latest_requirements.txt b/latest_requirements.txt
index 4d27b67b..be8e161e 100644
--- a/latest_requirements.txt
+++ b/latest_requirements.txt
@@ -1,5 +1,5 @@
-Faker==27.0.0
-copulas==0.11.0
+Faker==28.0.0
+copulas==0.11.1
 numpy==2.0.1
 pandas==2.2.2
 scikit-learn==1.5.1

From 50cb70724dfd6e28c39ae473a5dd72cfd3d08786 Mon Sep 17 00:00:00 2001
From: R-Palazzo <116157184+R-Palazzo@users.noreply.github.com>
Date: Wed, 28 Aug 2024 08:53:50 +0200
Subject: [PATCH 6/8] FloatFormatter does not round the data correctly for
 integer columns when using _set_fitted_parameters (#875)

---
 rdt/transformers/numerical.py                 |  3 ++-
 .../transformers/test_numerical.py            | 20 +++++++++++++++++++
 tests/unit/transformers/test_numerical.py     |  1 +
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/rdt/transformers/numerical.py b/rdt/transformers/numerical.py
index 64a5967f..1425670b 100644
--- a/rdt/transformers/numerical.py
+++ b/rdt/transformers/numerical.py
@@ -236,8 +236,9 @@ def _set_fitted_parameters(
             self._min_value = min(min_max_values)
             self._max_value = max(min_max_values)
 
-        if rounding_digits:
+        if rounding_digits is not None:
             self._rounding_digits = rounding_digits
+            self.learn_rounding_scheme = True
 
         if self.null_transformer.models_missing_values():
             self.output_columns.append(column_name + '.is_null')
diff --git a/tests/integration/transformers/test_numerical.py b/tests/integration/transformers/test_numerical.py
index 056cb94a..93a78ab6 100644
--- a/tests/integration/transformers/test_numerical.py
+++ b/tests/integration/transformers/test_numerical.py
@@ -287,6 +287,26 @@ def test__support__nullable_numerical_pandas_dtypes(self):
                 reverse_transformed[column].round(expected_rounding_digits[column]),
             )
 
+    def test__set_fitted_parameter_rounding_to_integer(self):
+        """Test the ``_set_fitted_parameters`` method with rounding_digits set to 0."""
+        # Setup
+        data = pd.DataFrame({
+            'col 1': 100 * np.random.random(10),
+        })
+        transformer = FloatFormatter()
+
+        # Run
+        transformer._set_fitted_parameters(
+            column_name='col 1',
+            null_transformer=NullTransformer(),
+            rounding_digits=0,
+            dtype='float',
+        )
+        reverse_transformed_data = transformer.reverse_transform(data)
+
+        # Assert
+        pd.testing.assert_frame_equal(reverse_transformed_data, data.round(0))
+
 
 class TestGaussianNormalizer:
     def test_stats(self):
diff --git a/tests/unit/transformers/test_numerical.py b/tests/unit/transformers/test_numerical.py
index 08a950ad..0af6b449 100644
--- a/tests/unit/transformers/test_numerical.py
+++ b/tests/unit/transformers/test_numerical.py
@@ -748,6 +748,7 @@ def test__set_fitted_parameters(self):
         assert transformer._max_value == 100.0
         assert transformer._rounding_digits == rounding_digits
         assert transformer._dtype == dtype
+        assert transformer.learn_rounding_scheme is True
 
     def test__set_fitted_parameters_from_column(self):
         """Test ``_set_fitted_parameters`` sets the required parameters for transformer."""

From b16c978972a1cc4b36bc0b0740c961c711934d85 Mon Sep 17 00:00:00 2001
From: SDV Team <98988753+sdv-team@users.noreply.github.com>
Date: Tue, 3 Sep 2024 11:25:51 -0400
Subject: [PATCH 7/8] Automated Latest Dependency Updates (#876)

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 latest_requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/latest_requirements.txt b/latest_requirements.txt
index be8e161e..ca9c1343 100644
--- a/latest_requirements.txt
+++ b/latest_requirements.txt
@@ -1,6 +1,6 @@
-Faker==28.0.0
+Faker==28.1.0
 copulas==0.11.1
-numpy==2.0.1
+numpy==2.0.2
 pandas==2.2.2
 scikit-learn==1.5.1
 scipy==1.13.1

From e6e1cfd6c1352ebdc41e7b29d0bb066bb9e9cb04 Mon Sep 17 00:00:00 2001
From: Andrew Montanez <andrew@sdv.dev>
Date: Thu, 5 Sep 2024 13:18:27 -0500
Subject: [PATCH 8/8] =?UTF-8?q?Bump=20version:=201.12.4.dev0=20=E2=86=92?=
 =?UTF-8?q?=201.12.4.dev1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pyproject.toml  | 2 +-
 rdt/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8b9a9cb4..1bf22f4b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -137,7 +137,7 @@ collect_ignore = ['pyproject.toml']
 exclude_lines = ['NotImplementedError()']
 
 [tool.bumpversion]
-current_version = "1.12.4.dev0"
+current_version = "1.12.4.dev1"
 parse = '(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?'
 serialize = [
     '{major}.{minor}.{patch}.{release}{candidate}',
diff --git a/rdt/__init__.py b/rdt/__init__.py
index a6b78122..bbf0c0c8 100644
--- a/rdt/__init__.py
+++ b/rdt/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = 'DataCebo, Inc.'
 __email__ = 'info@sdv.dev'
-__version__ = '1.12.4.dev0'
+__version__ = '1.12.4.dev1'
 
 
 import sys