Skip to content

Commit

Permalink
convert warning to info message
Browse files Browse the repository at this point in the history
  • Loading branch information
R-Palazzo committed Jan 17, 2024
1 parent 853a413 commit 1a18939
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 5 deletions.
12 changes: 7 additions & 5 deletions rdt/transformers/text.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Transformers for text data."""
import warnings
import logging

import numpy as np
import pandas as pd
Expand All @@ -8,6 +8,8 @@
from rdt.transformers.base import BaseTransformer
from rdt.transformers.utils import strings_from_regex

LOGGER = logging.getLogger(__name__)


class IDGenerator(BaseTransformer):
"""Generate an ID column.
Expand Down Expand Up @@ -161,10 +163,10 @@ def _reverse_transform(self, data):
f"Please use a different regex for column ('{self.get_input_column()}')."
)

warnings.warn(
f"The data has {sample_size} rows but the regex for '{self.get_input_column()}' "
f'can only create {self.generator_size} unique values. Some values in '
f"'{self.get_input_column()}' may be repeated."
LOGGER.info(
"The data has %s rows but the regex for '%s' can only create %s unique values."
" Some values in '%s' may be repeated.",
sample_size, self.get_input_column(), self.generator_size, self.get_input_column()
)

remaining = self.generator_size - self.generated
Expand Down
28 changes: 28 additions & 0 deletions tests/unit/transformers/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,3 +485,31 @@ def test__reverse_transform_enforce_uniqueness_not_enough_remaining(self):
)
with pytest.raises(TransformerProcessingError, match=error_msg):
instance._reverse_transform(columns_data)

@patch('rdt.transformers.text.LOGGER')
def test__reverse_transform_info_message(self, mock_logger):
"""Test the ``_reverse_transform`` method.
Validate that the ``_reverse_transform`` method logs an info message when
``enforce_uniqueness`` is ``False`` and the ``instance.data_length`` is bigger than
``instance.generator_size``.
"""
# Setup
instance = RegexGenerator('[A-Z]', enforce_uniqueness=False)
instance.data_length = 6
instance.generator_size = 5
instance.generated = 0
instance.columns = ['a']
columns_data = pd.Series()

# Run
instance._reverse_transform(columns_data)

# Assert
expected_format = (
"The data has %s rows but the regex for '%s' can only create %s unique values. Some "
"values in '%s' may be repeated."
)
expected_args = (6, 'a', 5, 'a')

mock_logger.info.assert_called_once_with(expected_format, *expected_args)

0 comments on commit 1a18939

Please sign in to comment.