Skip to content

Commit

Permalink
Update entities_checker_util.py
Browse files Browse the repository at this point in the history
  • Loading branch information
jkshj21 authored Oct 13, 2023
1 parent 43c2986 commit 6fa28c0
Showing 1 changed file with 5 additions and 8 deletions.
13 changes: 5 additions & 8 deletions src/dfcx_scrapi/tools/entities_checker_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def _get_entity_type_by_parameter_id(parameters, parameter_id):

return entity_type

def _get_tags_in_intents(self) -> pd.DataFrame:
def get_tags_in_intents(self) -> pd.DataFrame:
"""Get all the tag_texts that are referenced to the specific parameter id & entity type id in the training phrases in the intents
Returns:
Expand Down Expand Up @@ -102,7 +102,7 @@ def _get_tags_in_intents(self) -> pd.DataFrame:

return df

def _get_entity_types_df(self) -> pd.DataFrame:
def get_entity_types_df(self) -> pd.DataFrame:
"""Get all the entity types and store all the entity values and synonyms in one row
Returns:
Expand Down Expand Up @@ -179,7 +179,6 @@ def _unpack_nested_entity_types(self, df, target_kind_type):
else:
is_nested_entity_type = False
break

if new_entity_values and is_nested_entity_type:
df.loc[idx, 'entity_values'] = new_entity_values
df.loc[idx, 'synonyms'] = new_synonyms
Expand Down Expand Up @@ -207,8 +206,8 @@ def generate_hidden_synonym_tags(self) -> pd.DataFrame:
synonyms
is_hidden
"""
tags_intents = self._get_tags_in_intents()
entity_types_mapper = self._get_entity_types_df()
tags_intents = self.get_tags_in_intents()
entity_types_mapper = self.get_entity_types_df()
entity_types_mapper = self._unpack_nested_entity_types(entity_types_mapper, 'KIND_MAP')
hidden_entities = pd.merge(tags_intents, entity_types_mapper, on = 'entity_type_id')
hidden_entities = hidden_entities.drop(hidden_entities[~hidden_entities.kind.str.contains('KIND_MAP')].index)
Expand All @@ -218,13 +217,11 @@ def generate_hidden_synonym_tags(self) -> pd.DataFrame:
for idx, row in hidden_entities.iterrows():
synonyms = row['synonyms']
tag_text = row['tag_text']

for synonym in synonyms:
synonym = synonym.lower()
tag_text = tag_text.lower()
if [sub_synonym for sub_synonym in synonym if sub_synonym.isalnum()] == [sub_tag_text for sub_tag_text in tag_text if sub_tag_text.isalnum()]:
hidden_entities.loc[idx, 'is_hidden'] = 'NO'

if pd.isna(hidden_entities.loc[idx, 'is_hidden']):
hidden_entities.loc[idx, 'is_hidden'] = 'YES'

Expand All @@ -248,7 +245,7 @@ def generate_hidden_regex_tags(self) -> pd.DataFrame:
is_hidden
"""
tags_intents = self._get_tags_in_intents()
entity_types_mapper = self._get_entity_types_df()
entity_types_mapper = self.get_entity_types_df()
entity_types_mapper = self._unpack_nested_entity_types(entity_types_mapper, 'KIND_REGEX')
hidden_entities = pd.merge(tags_intents, entity_types_mapper, on = 'entity_type_id')
hidden_entities = hidden_entities.drop(hidden_entities[~hidden_entities.kind.str.contains('KIND_REGEX')].index)
Expand Down

0 comments on commit 6fa28c0

Please sign in to comment.