-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Issue sdv 1439 move anonymization (#729)
- Loading branch information
1 parent
2ee0077
commit 98c50d0
Showing
4 changed files
with
275 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
"""Anonymization module for the RDT PII Transformer.""" | ||
|
||
import inspect | ||
import warnings | ||
from functools import lru_cache | ||
|
||
from faker import Faker | ||
from faker.config import AVAILABLE_LOCALES | ||
|
||
from rdt.transformers import AnonymizedFaker | ||
|
||
SDTYPE_ANONYMIZERS = { | ||
'address': { | ||
'provider_name': 'address', | ||
'function_name': 'address' | ||
}, | ||
'email': { | ||
'provider_name': 'internet', | ||
'function_name': 'email' | ||
}, | ||
'ipv4_address': { | ||
'provider_name': 'internet', | ||
'function_name': 'ipv4' | ||
}, | ||
'ipv6_address': { | ||
'provider_name': 'internet', | ||
'function_name': 'ipv6' | ||
}, | ||
'mac_address': { | ||
'provider_name': 'internet', | ||
'function_name': 'mac_address' | ||
}, | ||
'name': { | ||
'provider_name': 'person', | ||
'function_name': 'name' | ||
}, | ||
'phone_number': { | ||
'provider_name': 'phone_number', | ||
'function_name': 'phone_number' | ||
}, | ||
'ssn': { | ||
'provider_name': 'ssn', | ||
'function_name': 'ssn' | ||
}, | ||
'user_agent_string': { | ||
'provider_name': 'user_agent', | ||
'function_name': 'user_agent' | ||
}, | ||
} | ||
|
||
|
||
@lru_cache() | ||
def get_faker_instance(): | ||
"""Return a ``faker.Faker`` instance with all the locales.""" | ||
return Faker(AVAILABLE_LOCALES) | ||
|
||
|
||
def is_faker_function(function_name): | ||
"""Return whether or not the function name is a valid Faker function. | ||
Args: | ||
function_name (str): | ||
String representing predefined ``sdtype`` or a ``faker`` function. | ||
Returns: | ||
True if the ``function_name`` is know to ``Faker``, otherwise False. | ||
""" | ||
try: | ||
with warnings.catch_warnings(): | ||
warnings.filterwarnings('ignore', module='faker') | ||
getattr(get_faker_instance(), function_name) | ||
except AttributeError: | ||
return False | ||
|
||
return True | ||
|
||
|
||
def _detect_provider_name(function_name, locales=None): | ||
function_name = getattr(Faker(locale=locales), function_name) | ||
module = inspect.getmodule(function_name).__name__ | ||
module = module.split('.') | ||
if len(module) == 2: | ||
return 'BaseProvider' | ||
return '.'.join(module[2:]) | ||
|
||
|
||
def get_anonymized_transformer(function_name, transformer_kwargs=None): | ||
"""Get an instance with an ``AnonymizedFaker`` for the given ``function_name``. | ||
Args: | ||
function_name (str): | ||
String representing predefined ``sdtype`` or a ``faker`` function. | ||
transformer_kwargs (dict): | ||
Keyword args to pass into AnonymizedFaker transformer. Optional. | ||
""" | ||
transformer_kwargs = transformer_kwargs or {} | ||
locales = transformer_kwargs.get('locales') | ||
if function_name in SDTYPE_ANONYMIZERS: | ||
transformer_kwargs.update(SDTYPE_ANONYMIZERS[function_name]) | ||
return AnonymizedFaker(**transformer_kwargs) | ||
|
||
provider_name = _detect_provider_name(function_name, locales=locales) | ||
transformer_kwargs.update({ | ||
'function_name': function_name, | ||
'provider_name': provider_name | ||
}) | ||
|
||
return AnonymizedFaker(**transformer_kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
from faker import Faker | ||
|
||
from rdt.transformers.pii.anonymization import is_faker_function | ||
|
||
|
||
def test_is_faker_function(): | ||
"""Test is_faker_function checks if function is a valid Faker function.""" | ||
# Run | ||
result = is_faker_function('address') | ||
|
||
# Assert | ||
assert result is True | ||
|
||
|
||
def test_is_faker_function_non_default_locale(): | ||
"""Test is_faker_function checks non-default locales.""" | ||
# Setup | ||
function_name = 'postcode_in_province' | ||
|
||
# Run | ||
result = is_faker_function(function_name) | ||
|
||
# Assert | ||
assert result is True | ||
assert not hasattr(Faker(), function_name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
from unittest.mock import Mock, patch | ||
|
||
from rdt.transformers.pii.anonymization import ( | ||
_detect_provider_name, get_anonymized_transformer, get_faker_instance, is_faker_function) | ||
|
||
|
||
class TestAnonimization: | ||
|
||
def test__detect_provider_name(self): | ||
"""Test the ``_detect_provider_name`` method. | ||
Test that the function returns an expected provider name from the ``faker.Faker`` instance. | ||
If this is from the ``BaseProvider`` it should also return that name. | ||
Input: | ||
- Faker function name. | ||
Output: | ||
- The faker provider name for that function. | ||
""" | ||
# Run / Assert | ||
email_provider = _detect_provider_name('email') | ||
lexify_provider = _detect_provider_name('lexify') | ||
state_provider = _detect_provider_name('state') | ||
|
||
assert email_provider == 'internet' | ||
assert lexify_provider == 'BaseProvider' | ||
assert state_provider == 'address.en_US' | ||
|
||
@patch('rdt.transformers.pii.anonymization.AnonymizedFaker') | ||
def test_get_anonymized_transformer_with_existing_sdtype(self, mock_anonymized_faker): | ||
"""Test the ``get_anonymized_transformer`` method. | ||
Test that when calling with an existing ``sdtype`` / ``function_name`` from the | ||
``SDTYPE_ANONYMZIERS`` dictionary, their ``provider_name`` and ``function_name`` are being | ||
used by default, and also other ``kwargs`` and provided locales are being passed to the | ||
``AnonymizedFaker``. | ||
Input: | ||
- ``function_name`` from the ``SDTYPE_ANONYMIZERS``. | ||
- ``function_kwargs`` additional keyword arguments for that set of arguments. | ||
Mock: | ||
- Mock ``AnonymizedFaker`` and assert that has been called with the expected | ||
arguments. | ||
Output: | ||
- The return value must be the instance of ``AnonymizedFaker``. | ||
""" | ||
# Setup | ||
output = get_anonymized_transformer('email', transformer_kwargs={ | ||
'function_kwargs': {'domain': '@gmail.com'}, 'locales': ['en_CA', 'fr_CA'] | ||
}) | ||
|
||
# Assert | ||
assert output == mock_anonymized_faker.return_value | ||
mock_anonymized_faker.assert_called_once_with( | ||
provider_name='internet', | ||
function_name='email', | ||
function_kwargs={'domain': '@gmail.com'}, | ||
locales=['en_CA', 'fr_CA'] | ||
) | ||
|
||
@patch('rdt.transformers.pii.anonymization.AnonymizedFaker') | ||
def test_get_anonymized_transformer_with_custom_sdtype(self, mock_anonymized_faker): | ||
"""Test the ``get_anonymized_transformer`` method. | ||
Test that when calling with a custom ``sdtype`` / ``function_name`` that does not belong | ||
to the ``SDTYPE_ANONYMZIERS`` dictionary. The ``provider_name`` is being found | ||
automatically other ``kwargs`` and provided locales are being passed to the | ||
``AnonymizedFaker``. | ||
Input: | ||
- ``function_name`` color. | ||
- ``function_kwargs`` a dictionary with ``'hue': 'red'``. | ||
Mock: | ||
- Mock ``AnonymizedFaker`` and assert that has been called with the expected | ||
arguments. | ||
Output: | ||
- The return value must be the instance of ``AnonymizedFaker``. | ||
""" | ||
# Setup | ||
output = get_anonymized_transformer('color', transformer_kwargs={ | ||
'function_kwargs': {'hue': 'red'}, 'locales': ['en_CA', 'fr_CA'] | ||
}) | ||
|
||
# Assert | ||
assert output == mock_anonymized_faker.return_value | ||
mock_anonymized_faker.assert_called_once_with( | ||
provider_name='color', | ||
function_name='color', | ||
function_kwargs={'hue': 'red'}, | ||
locales=['en_CA', 'fr_CA'] | ||
) | ||
|
||
@patch('rdt.transformers.pii.anonymization.Faker') | ||
def test_is_faker_function(self, faker_mock): | ||
"""Test that the method returns True if the ``function_name`` is a valid faker function. | ||
This test mocks the ``Faker`` method to make sure that the ``function_name`` is an | ||
attribute it has. | ||
""" | ||
# Setup | ||
faker_mock.return_value = Mock(spec=['address']) | ||
|
||
# Run | ||
result = is_faker_function('address') | ||
|
||
# Assert | ||
assert result is True | ||
|
||
@patch('rdt.transformers.pii.anonymization.get_faker_instance') | ||
def test_is_faker_function_error(self, mock_get_faker_instance): | ||
"""Test that the method returns False if ``function_name`` is not a valid faker function. | ||
If the ``function_name`` is not an attribute of ``Faker()`` then we should return false. | ||
This test mocks ``Faker`` to not have the attribute that is passed as ``function_name``. | ||
""" | ||
# Setup | ||
mock_get_faker_instance.return_value = Mock(spec=[]) | ||
|
||
# Run | ||
result = is_faker_function('blah') | ||
|
||
# Assert | ||
assert result is False | ||
mock_get_faker_instance.assert_called_once() | ||
|
||
@patch('rdt.transformers.pii.anonymization.Faker') | ||
def test_get_faker_instance(self, mock_faker): | ||
"""Test that ``get_faker_instance`` returns the same object.""" | ||
# Setup | ||
first_instance = get_faker_instance() | ||
|
||
# Run | ||
second_instance = get_faker_instance() | ||
|
||
# Assert | ||
assert id(first_instance) == id(second_instance) |