generated from ydataai/opensource-template
-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: validate and process provided anonym inputs (#122)
* fix: validate and process provided anonym inputs - Validate anonymizer inputs and process in a format that is supported by integration and backend * fix(linting): code formatting * fix: typo for aux function - typo - reserved python word * fix: linter issues * fix(linting): code formatting --------- Co-authored-by: Azory YData Bot <[email protected]>
- Loading branch information
1 parent
d775d68
commit 70bcc82
Showing
3 changed files
with
60 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
""" | ||
Validate and process the payload for the synthesizers anonymizer | ||
""" | ||
|
||
from ydata.datascience.common import AnonymizerType | ||
|
||
|
||
def build_and_validate_anonimization(anonimyze: dict, cols: list) -> dict: | ||
isnested = any(isinstance(i, dict) for i in anonimyze.values()) | ||
|
||
if not all([True if k in cols else False for k in list(anonimyze.keys())]): | ||
# AnonymizationConfigurationError | ||
raise Exception( | ||
'The keys in your configuration must exactly match the column names in the provided dataset. Please check and update your inputs to ensure they align.') | ||
|
||
if isnested: | ||
# Validate the format here. | ||
for k, v in anonimyze.items(): | ||
if 'type' not in list(v.keys()): | ||
raise Exception("""The provided configuration is not correct. Make sure that your anonymization config follow one of the following formats: | ||
{ | ||
'col_name': {'type': 'anonymization_method', kwargs**} | ||
} or | ||
{ | ||
'col_name: 'anonymization_method' | ||
} | ||
""") | ||
else: | ||
anon_type = anonimyze[k]['type'] | ||
anonimyze[k]['type'] = AnonymizerType.get_anonymizer_type( | ||
anon_type).value | ||
config = anonimyze | ||
else: | ||
config = {} | ||
for k, v in anonimyze.items(): | ||
print(k, v) | ||
if AnonymizerType.get_anonymizer_type(v) is None: | ||
col_config = {'type': AnonymizerType.REGEX.value, 'regex': v} | ||
else: | ||
col_config = {'type': AnonymizerType.get_anonymizer_type(v).value} | ||
|
||
config[k] = col_config | ||
|
||
return config |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters