Skip to content

Commit

Permalink
[CAI-249] No masked reference links (#1229)
Browse files Browse the repository at this point in the history
* Update to mask_pii method to  mask only the response content and not the reference links

* Update presidio to accept allow list from params.yaml

* Update params.yaml
  • Loading branch information
mdciri authored Nov 5, 2024
1 parent 344fdbc commit 00d6250
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 11 deletions.
32 changes: 32 additions & 0 deletions apps/chatbot/config/params.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,35 @@ config_presidio:
- ORGANIZATION
- ORG
default_score: 0.8
allow_list:
- Discovery
- discovery
- Rif
- SEND
- send
- Send
- GPD
- gpd
- Gpd
- STATO
- stato
- PagoPA
- pagoPA
- pagopa
- Pagopa
- Firma con IO
- IO
- io
- Io
- PDND
- pdnd
- Pdnd
- IDPay
- idpay
- IDpay
- Idpay
- IdPay
- id-pay
- ID-Pay
- id-Pay
- ID-pay
6 changes: 5 additions & 1 deletion apps/chatbot/src/modules/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,11 @@ def _unmask_reference(self, response_str: str, nodes) -> str:
def mask_pii(self, message: str) -> str:
if USE_PRESIDIO:
try:
return self.pii.mask_pii(message)
split_message = message.split("Rif:")
masked_message = self.pii.mask_pii(split_message[0])
if len(split_message)>1:
masked_message = masked_message + "Rif:" + split_message[1]
return masked_message
except Exception as e:
logging.warning(f"[chatbot.py - mask_pii] exception in mask_pii: {e}")
else:
Expand Down
14 changes: 4 additions & 10 deletions apps/chatbot/src/modules/presidio.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,6 @@
"IT_PHYSICAL_ADDRESS"
]

ALLOW_LIST = [
"Discovery", "discovery", "pagoPA", "PagoPA", "pagopa", "Rif"
]


class EntityTypeCountAnonymizer(Operator):
"""
Expand Down Expand Up @@ -90,7 +86,7 @@ class PresidioPII():

def __init__(
self,
config: Union[Path, str] | dict,
config: dict,
entity_mapping: Dict[str, Dict] = {},
mapping: Dict[str, str] = {},
entities: List[str] | None = None,
Expand All @@ -103,12 +99,10 @@ def __init__(
self.entities = entities if entities else GLOBAL_ENTITIES
self.analyzer_threshold = analyzer_threshold

if isinstance(self.config, (Path, str)):
self.provider = NlpEngineProvider(conf_file=self.config)
elif isinstance(self.config, dict):
if isinstance(self.config, dict):
self.provider = NlpEngineProvider(nlp_configuration=self.config)
else:
raise ValueError("Error! config should be a path or a dictionary.")
raise ValueError("Error! config should be a dictionary.")
nlp_engine = self.provider.create_engine()
self.nlp_engine = nlp_engine
self.analyzer = AnalyzerEngine(
Expand Down Expand Up @@ -157,7 +151,7 @@ def detect_pii(self, text: str) -> List[RecognizerResult]:
text=text,
language=lang,
entities=self.entities + IT_ENTITIES if lang == "it" else self.entities,
allow_list=ALLOW_LIST
allow_list=self.config["allow_list"]
)

return results
Expand Down

0 comments on commit 00d6250

Please sign in to comment.