Skip to content

Commit

Permalink
Update logic
Browse files Browse the repository at this point in the history
  • Loading branch information
fealho committed Jan 11, 2024
1 parent de011e2 commit d7f7302
Showing 1 changed file with 16 additions and 37 deletions.
53 changes: 16 additions & 37 deletions rdt/transformers/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,47 +154,26 @@ def _reverse_transform(self, data):
else:
sample_size = self.data_length

if sample_size > self.generator_size:
if self.enforce_uniqueness:
raise TransformerProcessingError(
f'The regex is not able to generate {sample_size} unique values. '
f"Please use a different regex for column ('{self.get_input_column()}')."
)

warnings.warn(
f"The data has {sample_size} rows but the regex for '{self.get_input_column()}' "
f'can only create {self.generator_size} unique values. Some values in '
f"'{self.get_input_column()}' may be repeated."
)

remaining = self.generator_size - self.generated
if sample_size > self.generator_size - self.generated:
if self.enforce_uniqueness:
raise TransformerProcessingError(
f'The regex generator is not able to generate {sample_size} new unique '
f'values (only {remaining} unique value left). Please use '
"'reset_randomization' in order to restart the generator."
warnings.warn(
f"The regex for '{self.get_input_column()}' can only generate {sample_size} "
"unique values. Additional values may not exactly follow the provided regex."
)
else:
warnings.warn(
f"The data has {sample_size} rows but the regex for '{self.get_input_column()}' "
f'can only create {self.generator_size} unique values. Some values in '
f"'{self.get_input_column()}' may be repeated."
)

self.reset_randomization()
remaining = self.generator_size

if remaining >= sample_size:
reverse_transformed = np.array([
next(self.generator)
for _ in range(sample_size)
], dtype=object)

self.generated += sample_size
generated_values = list(self.generator)
reverse_transformed = []
while len(reverse_transformed) < sample_size:
remaining_samples = sample_size - len(reverse_transformed)
reverse_transformed.extend(generated_values[:remaining_samples])
self.generated += remaining_samples

else:
self.generated = self.generator_size
generated_values = list(self.generator)
reverse_transformed = []
while len(reverse_transformed) < sample_size:
remaining_samples = sample_size - len(reverse_transformed)
reverse_transformed.extend(generated_values[:remaining_samples])

reverse_transformed = np.array(reverse_transformed, dtype=object)
reverse_transformed = np.array(reverse_transformed, dtype=object)

return reverse_transformed

0 comments on commit d7f7302

Please sign in to comment.