diff --git a/apps/chatbot/config/params.yaml b/apps/chatbot/config/params.yaml index 15c43866d..0dddd7daf 100644 --- a/apps/chatbot/config/params.yaml +++ b/apps/chatbot/config/params.yaml @@ -52,3 +52,35 @@ config_presidio: - ORGANIZATION - ORG default_score: 0.8 + allow_list: + - Discovery + - discovery + - Rif + - SEND + - send + - Send + - GPD + - gpd + - Gpd + - STATO + - stato + - PagoPA + - pagoPA + - pagopa + - Pagopa + - Firma con IO + - IO + - io + - Io + - PDND + - pdnd + - Pdnd + - IDPay + - idpay + - IDpay + - Idpay + - IdPay + - id-pay + - ID-Pay + - id-Pay + - ID-pay \ No newline at end of file diff --git a/apps/chatbot/src/modules/chatbot.py b/apps/chatbot/src/modules/chatbot.py index f9ffa44ba..4b715b7e9 100644 --- a/apps/chatbot/src/modules/chatbot.py +++ b/apps/chatbot/src/modules/chatbot.py @@ -127,7 +127,11 @@ def _unmask_reference(self, response_str: str, nodes) -> str: def mask_pii(self, message: str) -> str: if USE_PRESIDIO: try: - return self.pii.mask_pii(message) + split_message = message.split("Rif:") + masked_message = self.pii.mask_pii(split_message[0]) + if len(split_message)>1: + masked_message = masked_message + "Rif:" + split_message[1] + return masked_message except Exception as e: logging.warning(f"[chatbot.py - mask_pii] exception in mask_pii: {e}") else: diff --git a/apps/chatbot/src/modules/presidio.py b/apps/chatbot/src/modules/presidio.py index e839958f9..ef0122d88 100644 --- a/apps/chatbot/src/modules/presidio.py +++ b/apps/chatbot/src/modules/presidio.py @@ -35,10 +35,6 @@ "IT_PHYSICAL_ADDRESS" ] -ALLOW_LIST = [ - "Discovery", "discovery", "pagoPA", "PagoPA", "pagopa", "Rif" -] - class EntityTypeCountAnonymizer(Operator): """ @@ -90,7 +86,7 @@ class PresidioPII(): def __init__( self, - config: Union[Path, str] | dict, + config: dict, entity_mapping: Dict[str, Dict] = {}, mapping: Dict[str, str] = {}, entities: List[str] | None = None, @@ -103,12 +99,10 @@ def __init__( self.entities = entities if entities else GLOBAL_ENTITIES self.analyzer_threshold = analyzer_threshold - if isinstance(self.config, (Path, str)): - self.provider = NlpEngineProvider(conf_file=self.config) - elif isinstance(self.config, dict): + if isinstance(self.config, dict): self.provider = NlpEngineProvider(nlp_configuration=self.config) else: - raise ValueError("Error! config should be a path or a dictionary.") + raise ValueError("Error! config should be a dictionary.") nlp_engine = self.provider.create_engine() self.nlp_engine = nlp_engine self.analyzer = AnalyzerEngine( @@ -157,7 +151,7 @@ def detect_pii(self, text: str) -> List[RecognizerResult]: text=text, language=lang, entities=self.entities + IT_ENTITIES if lang == "it" else self.entities, - allow_list=ALLOW_LIST + allow_list=self.config["allow_list"] ) return results