Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove tokens and access keys from test data and docstrings #18

Merged
merged 1 commit into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions pebblo/entity_classifier/entity_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,12 @@ def presidio_entity_classifier_and_anonymizer(
Example:

input_text = " My SSN is 222-85-4836.
ITIN number 993-77 0690
And AWS Access Key is: AKIAQIPT4PDORIRTV6PH."
ITIN number 993-77 0690"
response:
entities = {'aws-access-key': 1, 'us-itin': 1, 'us-ssn': 1}
total_count = 3
entities = {'us-itin': 1, 'us-ssn': 1}
total_count = 2
anonymized_text = "My SSN is <US_SSN>.
ITIN number <US_ITIN>
And AWS Access Key is: <AWS_ACCESS_KEY>."
ITIN number <US_ITIN>"
"""
entities = {}
total_count = 0
Expand Down
3 changes: 0 additions & 3 deletions tests/entity_classifier/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Sachin's SSN is 222-85-4836.
ITIN number 993-77 0690
His AWS Access Key is: AKIAQIPT4PDORIRTV6PH.
And Github Token is: ghp_hgu657yiujgwfrtigu3ver238765tyuhygvtrder6t7gyvhbuy5e676578976tyghy76578uygfyfgcyturtdf
"""

input_text2 = """
Expand All @@ -19,11 +18,9 @@
Azure client secret : c4cb6f91-15a7-4e6d-a824-abcdef012345.
AWS Access Key is: AKIAQIPT4PDORIRTV6PH
AWS Secret Key is : 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
Github Token is: ghp_hgu657yiujgwfrtigu3ver238765tyuhygvtrder6t7gyvhbuy5e676578976tyghy76578uygfyfgcyturtdf
Google API key: zaCELgL0imfnc8mVLWwsAawjYr4Rx-Af50DDqtlx
Slack Token is: xoxp-7676545380258-uygh
Azure Client Secret - c4cb6f91-15a7-4e6d-a824-abcdef012345
Slack Token - xoxb-3204014939555-4519358291237-TTIf0243T8YFSAGEVr1wBrWE
Google API key- KLzaSyB_tWrbmfWx8g2bzL7Vhq7znuTUn0JPKmY"
My IP Address - 10.55.60.61
"""
Expand Down
110 changes: 4 additions & 106 deletions tests/entity_classifier/test_entity_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def mocked_entity_classifier_response(mocker):

anonymize_response1: Tuple[list, str] = (
[
TestAnonymizerResult("GITHUB_TOKEN"),
TestAnonymizerResult("AWS_ACCESS_KEY"),
TestAnonymizerResult("US_ITIN"),
TestAnonymizerResult("US_SSN"),
Expand All @@ -53,9 +52,6 @@ def mocked_entity_classifier_response(mocker):
)
anonymize_response2: Tuple[list, str] = (
[
TestAnonymizerResult("SLACK_TOKEN"),
TestAnonymizerResult("SLACK_TOKEN"),
TestAnonymizerResult("GITHUB_TOKEN"),
TestAnonymizerResult("AWS_SECRET_KEY"),
TestAnonymizerResult("AWS_ACCESS_KEY"),
TestAnonymizerResult("US_ITIN"),
Expand Down Expand Up @@ -87,11 +83,6 @@ def mocked_entity_classifier_response(mocker):
"location": "77_97",
"confidence_score": 0.8,
},
{
"entity_type": "GITHUB_TOKEN",
"location": "120_210",
"confidence_score": 0.8,
},
]
analyzed_entities_response2: List[dict] = [
{"entity_type": "US_SSN", "location": "17_25", "confidence_score": 0.85},
Expand All @@ -101,11 +92,6 @@ def mocked_entity_classifier_response(mocker):
"location": "72_88",
"confidence_score": 0.8,
},
{
"entity_type": "GITHUB_TOKEN",
"location": "111_125",
"confidence_score": 0.8,
},
]
analyzed_entities_response3: List[dict] = [
{
Expand All @@ -130,21 +116,6 @@ def mocked_entity_classifier_response(mocker):
"location": "1587_1628",
"confidence_score": 0.8,
},
{
"entity_type": "GITHUB_TOKEN",
"location": "1646_1736",
"confidence_score": 0.8,
},
{
"entity_type": "SLACK_TOKEN",
"location": "1812_1835",
"confidence_score": 0.8,
},
{
"entity_type": "SLACK_TOKEN",
"location": "1911_1968",
"confidence_score": 0.8,
},
{"entity_type": "IP_ADDRESS", "location": "1339_1355", "confidence_score": 0.8},
]
analyzed_entities_response4: List[dict] = [
Expand All @@ -170,21 +141,6 @@ def mocked_entity_classifier_response(mocker):
"location": "1559_1575",
"confidence_score": 0.8,
},
{
"entity_type": "GITHUB_TOKEN",
"location": "1593_1607",
"confidence_score": 0.8,
},
{
"entity_type": "SLACK_TOKEN",
"location": "1683_1696",
"confidence_score": 0.8,
},
{
"entity_type": "SLACK_TOKEN",
"location": "1772_1785",
"confidence_score": 0.8,
},
{"entity_type": "IP_ADDRESS", "location": "1339_1355", "confidence_score": 0.8},
]
analyzed_entities_negative_response1: List = []
Expand Down Expand Up @@ -230,12 +186,11 @@ def test_presidio_entity_classifier_and_anonymizer(
entity_details,
) = entity_classifier.presidio_entity_classifier_and_anonymizer(input_text1)
assert entities == {
"github-token": 1,
"aws-access-key": 1,
"us-itin": 1,
"us-ssn": 1,
}
assert total_count == 4
assert total_count == 3
assert anonymized_text == input_text1
assert entity_details == {
"us-ssn": [
Expand All @@ -259,13 +214,6 @@ def test_presidio_entity_classifier_and_anonymizer(
"entity_group": "secrets_and_tokens",
}
],
"github-token": [
{
"location": "120_210",
"confidence_score": "HIGH",
"entity_group": "secrets_and_tokens",
}
],
}

(
Expand All @@ -275,12 +223,11 @@ def test_presidio_entity_classifier_and_anonymizer(
entity_details,
) = entity_classifier.presidio_entity_classifier_and_anonymizer(input_text1, True)
assert entities == {
"github-token": 1,
"aws-access-key": 1,
"us-itin": 1,
"us-ssn": 1,
}
assert total_count == 4
assert total_count == 3
assert anonymized_text == mock_input_text1_anonymize_snippet_true
assert entity_details == {
"us-ssn": [
Expand All @@ -304,13 +251,6 @@ def test_presidio_entity_classifier_and_anonymizer(
"entity_group": "secrets_and_tokens",
}
],
"github-token": [
{
"location": "111_125",
"confidence_score": "HIGH",
"entity_group": "secrets_and_tokens",
}
],
}

(
Expand All @@ -320,8 +260,6 @@ def test_presidio_entity_classifier_and_anonymizer(
entity_details,
) = entity_classifier.presidio_entity_classifier_and_anonymizer(input_text2)
assert entities == {
"slack-token": 2,
"github-token": 1,
"aws-access-key": 1,
"aws-secret-key": 1,
"us-itin": 1,
Expand All @@ -330,7 +268,7 @@ def test_presidio_entity_classifier_and_anonymizer(
"us-ssn": 1,
"ip-address": 1,
}
assert total_count == 10
assert total_count == 7
assert anonymized_text == input_text2
assert entity_details == {
"credit-card-number": [
Expand Down Expand Up @@ -375,25 +313,6 @@ def test_presidio_entity_classifier_and_anonymizer(
"entity_group": "secrets_and_tokens",
}
],
"github-token": [
{
"location": "1646_1736",
"confidence_score": "HIGH",
"entity_group": "secrets_and_tokens",
}
],
"slack-token": [
{
"location": "1812_1835",
"confidence_score": "HIGH",
"entity_group": "secrets_and_tokens",
},
{
"location": "1911_1968",
"confidence_score": "HIGH",
"entity_group": "secrets_and_tokens",
},
],
"ip-address": [
{
"location": "1339_1355",
Expand All @@ -412,8 +331,6 @@ def test_presidio_entity_classifier_and_anonymizer(
input_text2, anonymize_snippets=True
)
assert entities == {
"slack-token": 2,
"github-token": 1,
"aws-access-key": 1,
"aws-secret-key": 1,
"us-itin": 1,
Expand All @@ -422,7 +339,7 @@ def test_presidio_entity_classifier_and_anonymizer(
"us-ssn": 1,
"ip-address": 1,
}
assert total_count == 10
assert total_count == 7
assert anonymized_text == mock_input_text2_anonymize_snippet_true
assert entity_details == {
"credit-card-number": [
Expand Down Expand Up @@ -467,25 +384,6 @@ def test_presidio_entity_classifier_and_anonymizer(
"entity_group": "secrets_and_tokens",
}
],
"github-token": [
{
"location": "1593_1607",
"confidence_score": "HIGH",
"entity_group": "secrets_and_tokens",
}
],
"slack-token": [
{
"location": "1683_1696",
"confidence_score": "HIGH",
"entity_group": "secrets_and_tokens",
},
{
"location": "1772_1785",
"confidence_score": "HIGH",
"entity_group": "secrets_and_tokens",
},
],
"ip-address": [
{
"location": "1339_1355",
Expand Down