Skip to content

Commit

Permalink
Update entities_checker_util.py
Browse files Browse the repository at this point in the history
final version - tested thoroughly
  • Loading branch information
jkshj21 authored Oct 13, 2023
1 parent 6fa28c0 commit dfda1ea
Showing 1 changed file with 72 additions and 45 deletions.
117 changes: 72 additions & 45 deletions src/dfcx_scrapi/tools/entities_checker_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
datefmt="%Y-%m-%d %H:%M:%S",
)

class ParametersCheckerUtil(scrapi_base.ScrapiBase):
class EntitiesCheckerUtil(scrapi_base.ScrapiBase):
"""Utility class for checking DFCX Agent's parameters."""

def __init__(
Expand All @@ -34,10 +34,18 @@ def __init__(
creds=creds,
scope=scope,
)
self.agent_id = agent_id
if creds_path:
self.creds_path = creds_path
self.agent_id = agent_id

if creds_path:
self.creds_path = creds_path

self._intents = Intents(agent_id=self.agent_id, creds_path=self.creds_path)
self._entity_types = EntityTypes(agent_id=self.agent_id, creds_path=self.creds_path)
self.intents_df = pd.DataFrame()
self.entity_types_df = pd.DataFrame()
self._intents_list = []
self._entity_types_list = []

@staticmethod
def _get_entity_type_by_parameter_id(parameters, parameter_id):
"""static method that returns the entity type that is paired with the given parameter id"""
Expand All @@ -50,20 +58,8 @@ def _get_entity_type_by_parameter_id(parameters, parameter_id):

return entity_type

def get_tags_in_intents(self) -> pd.DataFrame:
"""Get all the tag_texts that are referenced to the specific parameter id & entity type id in the training phrases in the intents
Returns:
A dataframe with columns
intent_id - the intent name
intent - the intent display name
training_phrase - the training phrase in the intent
tag_text - the subset of the training phrase that is tagged with the specific entity type id
parameter_id - parameter id
entity_type_id - entity id
"""

df = pd.DataFrame({
def _set_intents_df(self) -> pd.DataFrame:
self.intents_df = pd.DataFrame({
"intent": pd.Series(dtype="str"),
"intent_id": pd.Series(dtype="str"),
"training_phrase": pd.Series(dtype="str"),
Expand Down Expand Up @@ -96,24 +92,13 @@ def get_tags_in_intents(self) -> pd.DataFrame:
"parameter_id": [pair[1]],
"entity_type_id": [pair[2]]
})
df = pd.concat([df, temp])
self.intents_df = pd.concat([self.intents_df, temp])

df = df.reset_index(drop=True)
self.intents_df = self.intents_df.reset_index(drop=True)

return df

def get_entity_types_df(self) -> pd.DataFrame:
"""Get all the entity types and store all the entity values and synonyms in one row
Returns:
A dataframe with columns
entity_type_id
entity_type
kind
entity_values - list of the [entity values]
synonyms - list of the [synonyms]
"""
df = pd.DataFrame({
def _set_entity_types_df(self):
self.entity_types_df = pd.DataFrame({
"entity_type_id": pd.Series(dtype="str"),
"entity_type": pd.Series(dtype="str"),
"kind": pd.Series(dtype="str"),
Expand All @@ -136,11 +121,10 @@ def get_entity_types_df(self) -> pd.DataFrame:
"kind": [entity_type.kind.name],
"entity_values": [entity_values],
"synonyms": [synonyms]})
df = pd.concat([df, temp])
self.entity_types_df = pd.concat([self.entity_types_df, temp])

df = df.reset_index(drop=True)
self.entity_types_df = self.entity_types_df.reset_index(drop=True)

return df

def _unpack_nested_entity_types(self, df, target_kind_type):
"""Unpacking the nested entity types to the comparable dataframe structure
Expand Down Expand Up @@ -186,6 +170,40 @@ def _unpack_nested_entity_types(self, df, target_kind_type):

return df

def get_tag_texts_in_intents(self) -> pd.DataFrame:
"""Get all the tag_texts that are referenced to the specific parameter id & entity type id in the training phrases in the intents
Returns:
A dataframe with columns
intent_id - the intent name
intent - the intent display name
training_phrase - the training phrase in the intent
tag_text - the subset of the training phrase that is tagged with the specific entity type id
parameter_id - parameter id
entity_type_id - entity id
"""

if self.intents_df.empty:
self._set_intents_df()

return self.intents_df

def get_entity_types_df(self) -> pd.DataFrame:
"""Get all the entity types and store all the entity values and synonyms in one row
Returns:
A dataframe with columns
entity_type_id
entity_type
kind
entity_values - list of the [entity values]
synonyms - list of the [synonyms]
"""
if self.entity_types_df.empty:
self._set_entity_types_df()

return self.entity_types_df

def generate_hidden_synonym_tags(self) -> pd.DataFrame:
""" Generate the overall stats that identify the incorrect tags in the training phrases by comparing with the entity type's synonyms
Merges the intents and the entity types dataframes to create the comparable dataframe
Expand All @@ -206,10 +224,14 @@ def generate_hidden_synonym_tags(self) -> pd.DataFrame:
synonyms
is_hidden
"""
tags_intents = self.get_tags_in_intents()
entity_types_mapper = self.get_entity_types_df()
entity_types_mapper = self._unpack_nested_entity_types(entity_types_mapper, 'KIND_MAP')
hidden_entities = pd.merge(tags_intents, entity_types_mapper, on = 'entity_type_id')
if self.intents_df.empty:
self._set_intents_df()

if self.entity_types_df.empty:
self._set_entity_types_df()

unpacked_entity_types_df = self._unpack_nested_entity_types(self.entity_types_df, 'KIND_MAP')
hidden_entities = pd.merge(self.intents_df, unpacked_entity_types_df, on = 'entity_type_id')
hidden_entities = hidden_entities.drop(hidden_entities[~hidden_entities.kind.str.contains('KIND_MAP')].index)
hidden_entities = hidden_entities.reset_index(drop=True)
hidden_entities['is_hidden'] = pd.Series(None, index=hidden_entities.index)
Expand All @@ -229,6 +251,7 @@ def generate_hidden_synonym_tags(self) -> pd.DataFrame:

def generate_hidden_regex_tags(self) -> pd.DataFrame:
""" Generate the overall stats that identify the incorrect tags in the training phrases by comparing with the entity type's regex
if the tag text in Intent is not matched with the regex then is_hidden = YES
Returns:
A dataframe with columns
Expand All @@ -244,10 +267,14 @@ def generate_hidden_regex_tags(self) -> pd.DataFrame:
synonyms
is_hidden
"""
tags_intents = self._get_tags_in_intents()
entity_types_mapper = self.get_entity_types_df()
entity_types_mapper = self._unpack_nested_entity_types(entity_types_mapper, 'KIND_REGEX')
hidden_entities = pd.merge(tags_intents, entity_types_mapper, on = 'entity_type_id')
if self.intents_df.empty:
self._set_intents_df()

if self.entity_types_df.empty:
self._set_entity_types_df()

unpacked_entity_types_df = self._unpack_nested_entity_types(self.entity_types_df, 'KIND_REGEX')
hidden_entities = pd.merge(self.intents_df, unpacked_entity_types_df, on = 'entity_type_id')
hidden_entities = hidden_entities.drop(hidden_entities[~hidden_entities.kind.str.contains('KIND_REGEX')].index)
hidden_entities = hidden_entities.reset_index(drop=True)
hidden_entities['is_hidden'] = pd.Series(None, index=hidden_entities.index)
Expand Down Expand Up @@ -277,7 +304,7 @@ def space_in_entity_values(self) -> pd.DataFrame:
has_space: if the entity value have the space(s) then YES else NO
entities_with_space: list of the entity values that have the space(s)
"""
entity_types_mapper = self._get_entity_types_df()
entity_types_mapper = self.get_entity_types_df()
entity_types_mapper = self._unpack_nested_entity_types(entity_types_mapper, 'KIND_MAP')
entity_types_mapper['has_space'] = pd.Series('NO', index=entity_types_mapper.index)
entity_types_mapper['entities_with_space'] = pd.Series('NA', index=entity_types_mapper.index)
Expand Down

0 comments on commit dfda1ea

Please sign in to comment.