From 2b56a2f3f06b2e7ddc72c35ea73e1d8bf387124a Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Fri, 24 Feb 2023 14:28:56 -0600 Subject: [PATCH 001/151] BUG #110 --- src/dfcx_scrapi/core/transition_route_groups.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/dfcx_scrapi/core/transition_route_groups.py b/src/dfcx_scrapi/core/transition_route_groups.py index 0d9990c9..074a4f78 100644 --- a/src/dfcx_scrapi/core/transition_route_groups.py +++ b/src/dfcx_scrapi/core/transition_route_groups.py @@ -320,9 +320,11 @@ def route_groups_to_dataframe( temp_dict.update({"route_group_name": route_group.display_name}) if route.target_page: - temp_dict.update( - {"target_page": all_pages_map[route.target_page]} - ) + t_p = all_pages_map.get(route.target_page) + if not t_p: + t_p = str(route.target_page).split("/")[-1] + + temp_dict.update({"target_page": t_p}) if route.intent: temp_dict.update({"intent": intents_map[route.intent]}) From e627dcbb08b62c026dec8221b11bc86c63d193fe Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Fri, 24 Feb 2023 14:29:31 -0600 Subject: [PATCH 002/151] BUG #111 --- src/dfcx_scrapi/core/webhooks.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/dfcx_scrapi/core/webhooks.py b/src/dfcx_scrapi/core/webhooks.py index 9731ecab..17128266 100644 --- a/src/dfcx_scrapi/core/webhooks.py +++ b/src/dfcx_scrapi/core/webhooks.py @@ -225,11 +225,13 @@ def update_webhook( webhook_obj.name = webhook_id - # set environment attributes from kwargs - for key, value in kwargs.items(): - setattr(webhook_obj, key, value) - paths = kwargs.keys() - mask = field_mask_pb2.FieldMask(paths=paths) + mask = None + if kwargs: + # set environment attributes from kwargs + for key, value in kwargs.items(): + setattr(webhook_obj, key, value) + paths = kwargs.keys() + mask = field_mask_pb2.FieldMask(paths=paths) client_options = self._set_region(webhook_id) client = services.webhooks.WebhooksClient( @@ -237,7 +239,8 @@ def update_webhook( request = types.webhook.UpdateWebhookRequest() request.webhook = webhook_obj - request.update_mask = mask + if mask: + request.update_mask = mask response = client.update_webhook(request) From dd49a64c4cc11747d90a6c87594c14db93154d4a Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Fri, 24 Feb 2023 14:38:29 -0600 Subject: [PATCH 003/151] BUG #112 --- src/dfcx_scrapi/tools/webhook_util.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/dfcx_scrapi/tools/webhook_util.py b/src/dfcx_scrapi/tools/webhook_util.py index 6bddb296..526d678c 100644 --- a/src/dfcx_scrapi/tools/webhook_util.py +++ b/src/dfcx_scrapi/tools/webhook_util.py @@ -74,7 +74,9 @@ def build_session_info(parameters): return session_info @staticmethod - def build_response(response_text=None, page_info=None, session_info=None): + def build_response( + response_text=None, page_info=None, session_info=None, action="replace" + ): """Builds a Response object for Dialogflow CX. Provides the JSON object structure expected by DFCX for the Response @@ -85,12 +87,18 @@ def build_response(response_text=None, page_info=None, session_info=None): response_text: The text response to be displayed to the user. Can also be empty string if no response to the user is required. page_info: (Optional) The JSON object returned by build_page_info() - session_info: (Optiona) The JSON object returned by + session_info: (Optional) The JSON object returned by build_session_info() + action: (Optional) Whether messages will replace or append to + the list of messages waiting to be sent to the user. + """ + if action not in ["replace", "append"]: + raise ValueError("`action` should be in ['replace', 'append'].") + action = action.upper() if response_text: response_object = { - 'mergeBehavior': 'REPLACE', + 'mergeBehavior': action, 'messages': [ { 'text': { From bd79d8f843e6836eaf5c416ec805c87650fd0ff7 Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Fri, 24 Feb 2023 14:46:31 -0600 Subject: [PATCH 004/151] lint fix --- src/dfcx_scrapi/core/transition_route_groups.py | 2 +- src/dfcx_scrapi/tools/webhook_util.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/dfcx_scrapi/core/transition_route_groups.py b/src/dfcx_scrapi/core/transition_route_groups.py index 074a4f78..96e6343e 100644 --- a/src/dfcx_scrapi/core/transition_route_groups.py +++ b/src/dfcx_scrapi/core/transition_route_groups.py @@ -322,7 +322,7 @@ def route_groups_to_dataframe( if route.target_page: t_p = all_pages_map.get(route.target_page) if not t_p: - t_p = str(route.target_page).split("/")[-1] + t_p = str(route.target_page).rsplit("/", maxsplit=1)[-1] temp_dict.update({"target_page": t_p}) diff --git a/src/dfcx_scrapi/tools/webhook_util.py b/src/dfcx_scrapi/tools/webhook_util.py index 526d678c..4a26ed7f 100644 --- a/src/dfcx_scrapi/tools/webhook_util.py +++ b/src/dfcx_scrapi/tools/webhook_util.py @@ -75,7 +75,7 @@ def build_session_info(parameters): @staticmethod def build_response( - response_text=None, page_info=None, session_info=None, action="replace" + response_text=None, page_info=None, session_info=None, action='replace' ): """Builds a Response object for Dialogflow CX. @@ -91,10 +91,9 @@ def build_response( build_session_info() action: (Optional) Whether messages will replace or append to the list of messages waiting to be sent to the user. - """ - if action not in ["replace", "append"]: - raise ValueError("`action` should be in ['replace', 'append'].") + if action.casefold() not in ['replace', 'append']: + raise ValueError('`action` should be in ["replace", "append"].') action = action.upper() if response_text: response_object = { From 6a7c857cdd858c8579c67aa3714b71e3add8d6f3 Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Wed, 15 Mar 2023 12:40:27 -0500 Subject: [PATCH 005/151] BUG #114 --- src/dfcx_scrapi/builders/intents.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/dfcx_scrapi/builders/intents.py b/src/dfcx_scrapi/builders/intents.py index 124dbe05..0a99f3bc 100644 --- a/src/dfcx_scrapi/builders/intents.py +++ b/src/dfcx_scrapi/builders/intents.py @@ -60,6 +60,7 @@ def _include_spaces_to_phrase(self, phrase: List[str], annots: List[str]): A list of strings that represents parameter_id of each part in phrase. """ + chars_to_ignore_at_beginning = ["'", ",", ".", "?", "!"] i = 0 while True: p_curr, a_curr = phrase[i], annots[i] @@ -73,7 +74,13 @@ def _include_spaces_to_phrase(self, phrase: List[str], annots: List[str]): annots.insert(i+1, "") i += 2 elif a_curr and not a_next: - phrase[i+1] = " " + p_next + flag = any( + ch + for ch in chars_to_ignore_at_beginning + if p_next.startswith(ch) + ) + if not flag: + phrase[i+1] = " " + p_next i += 1 elif not a_curr and a_next: phrase[i] = p_curr + " " From d965ddba52315eb5c8952d81d6fbf2eb7a64bf29 Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Wed, 15 Mar 2023 12:53:50 -0500 Subject: [PATCH 006/151] more readable code --- src/dfcx_scrapi/tools/webhook_util.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/dfcx_scrapi/tools/webhook_util.py b/src/dfcx_scrapi/tools/webhook_util.py index 4a26ed7f..cb799a95 100644 --- a/src/dfcx_scrapi/tools/webhook_util.py +++ b/src/dfcx_scrapi/tools/webhook_util.py @@ -75,7 +75,7 @@ def build_session_info(parameters): @staticmethod def build_response( - response_text=None, page_info=None, session_info=None, action='replace' + response_text=None, page_info=None, session_info=None, append=False ): """Builds a Response object for Dialogflow CX. @@ -90,11 +90,10 @@ def build_response( session_info: (Optional) The JSON object returned by build_session_info() action: (Optional) Whether messages will replace or append to - the list of messages waiting to be sent to the user. + the list of messages waiting to be sent to the use. + Default behavior is to replace. """ - if action.casefold() not in ['replace', 'append']: - raise ValueError('`action` should be in ["replace", "append"].') - action = action.upper() + action = 'APPEND' if append else 'REPLACE' if response_text: response_object = { 'mergeBehavior': action, From 6bea27adb0d2dda318ac639fb5ae6db49522b597 Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Fri, 24 Mar 2023 10:44:43 -0500 Subject: [PATCH 007/151] docs correction --- src/dfcx_scrapi/tools/webhook_util.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dfcx_scrapi/tools/webhook_util.py b/src/dfcx_scrapi/tools/webhook_util.py index cb799a95..197a93c8 100644 --- a/src/dfcx_scrapi/tools/webhook_util.py +++ b/src/dfcx_scrapi/tools/webhook_util.py @@ -89,9 +89,9 @@ def build_response( page_info: (Optional) The JSON object returned by build_page_info() session_info: (Optional) The JSON object returned by build_session_info() - action: (Optional) Whether messages will replace or append to - the list of messages waiting to be sent to the use. - Default behavior is to replace. + append: (Optional) Whether messages will append or replace to + the list of messages waiting to be sent to the user. If append + set to False it will replace the messages. """ action = 'APPEND' if append else 'REPLACE' if response_text: From 98143ab13eb01cce2a11f1283e054995d7c124cc Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Mon, 27 Mar 2023 15:36:31 -0500 Subject: [PATCH 008/151] revert changes for update_webhook --- src/dfcx_scrapi/core/webhooks.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/dfcx_scrapi/core/webhooks.py b/src/dfcx_scrapi/core/webhooks.py index 17128266..9731ecab 100644 --- a/src/dfcx_scrapi/core/webhooks.py +++ b/src/dfcx_scrapi/core/webhooks.py @@ -225,13 +225,11 @@ def update_webhook( webhook_obj.name = webhook_id - mask = None - if kwargs: - # set environment attributes from kwargs - for key, value in kwargs.items(): - setattr(webhook_obj, key, value) - paths = kwargs.keys() - mask = field_mask_pb2.FieldMask(paths=paths) + # set environment attributes from kwargs + for key, value in kwargs.items(): + setattr(webhook_obj, key, value) + paths = kwargs.keys() + mask = field_mask_pb2.FieldMask(paths=paths) client_options = self._set_region(webhook_id) client = services.webhooks.WebhooksClient( @@ -239,8 +237,7 @@ def update_webhook( request = types.webhook.UpdateWebhookRequest() request.webhook = webhook_obj - if mask: - request.update_mask = mask + request.update_mask = mask response = client.update_webhook(request) From 4e64e03cc11f4cbc04eaaebc7edb1da3ca429d2c Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Mon, 27 Mar 2023 17:02:29 -0500 Subject: [PATCH 009/151] patch to v1.6.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ceea9628..a8dbbbdb 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ setup( name='dfcx-scrapi', - version='1.6.0', + version='1.6.1', description='A high level scripting API for bot builders, developers, and\ maintainers.', long_description=long_description, From 92b33e9fc9882280431900998d51fc1315751db5 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Mon, 3 Apr 2023 15:06:19 -0500 Subject: [PATCH 010/151] correct arg assignment for copy entity method --- src/dfcx_scrapi/tools/copy_util.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/dfcx_scrapi/tools/copy_util.py b/src/dfcx_scrapi/tools/copy_util.py index 44704e9d..8e97cd14 100644 --- a/src/dfcx_scrapi/tools/copy_util.py +++ b/src/dfcx_scrapi/tools/copy_util.py @@ -628,7 +628,9 @@ def copy_entity_type_to_agent( # push to destination agent try: - self.entities.create_entity_type(destination_agent, entity_object) + self.entities.create_entity_type( + agent_id=destination_agent, + obj=entity_object) logging.info( "Entity Type %s created successfully", entity_object.display_name, From 4aa431d170ab22f9f5a2761dc981a6b5cf965c58 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Mon, 3 Apr 2023 15:12:48 -0500 Subject: [PATCH 011/151] patch to v1.6.2 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a8dbbbdb..d016d56e 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ setup( name='dfcx-scrapi', - version='1.6.1', + version='1.6.2', description='A high level scripting API for bot builders, developers, and\ maintainers.', long_description=long_description, From 38c13d4782cd08654096eb2eafcc9ee634beb815 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Fri, 14 Apr 2023 12:55:43 -0500 Subject: [PATCH 012/151] refactor for backwards comp --- src/dfcx_scrapi/core/conversation.py | 51 ++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 7 deletions(-) diff --git a/src/dfcx_scrapi/core/conversation.py b/src/dfcx_scrapi/core/conversation.py index 8e23ec87..337f70e4 100644 --- a/src/dfcx_scrapi/core/conversation.py +++ b/src/dfcx_scrapi/core/conversation.py @@ -19,7 +19,7 @@ import traceback import uuid -from typing import Dict +from typing import Dict, Any from operator import attrgetter from threading import Thread @@ -64,15 +64,12 @@ def __init__( agent_id=agent_id, ) - logging.info( + logging.debug( "create conversation with creds_path: %s | agent_id: %s", creds_path, agent_id) - if agent_id or config["agent_path"]: - self.agent_id = agent_id or config["agent_path"] - - self.language_code = language_code or config["language_code"] - + self.agent_id = self._set_agent_id(agent_id, config) + self.language_code = self._set_language_code(language_code, config) self.start_time = None self.query_result = None self.session_id = None @@ -82,6 +79,46 @@ def __init__( self.flows = flows.Flows(creds=self.creds) self.pages = pages.Pages(creds=self.creds) + @staticmethod + def _set_language_code(language_code: str, config: Dict[str, Any]) -> str: + """Determines how to set the language_code based on user inputs. + + We implement this for backwards compatability. + """ + # Config will take precedence if provided + if config: + config_lang_code = config.get("language_code", None) + + # We'll only return if it exist in the config on the off chance that + # some users have provided the langauge_code as a top level arg in + # addition to providing the config + if config_lang_code: + return config_lang_code + + return language_code + + @staticmethod + def _set_agent_id(input_agent_id: str, config: Dict[str, Any]) -> str: + """Determines how to set the agent_id based on user inputs. + + We implement this for backwards compatability. + """ + + # Config will take precedence if provided + if config: + config_agent_path = config.get("agent_path", None) + + # We'll only return if it exist in the config on the off chance that + # some users have provided the agent_id as a top level arg in + # addition to providing the config + if config_agent_path: + return config_agent_path + + elif input_agent_id: + return input_agent_id + + return None + @staticmethod def _get_match_type_from_map(match_type: int): """Translates the match_type enum int value into a more descriptive From adff4427c1f1255565265d39aaf510c5b4be3671 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Fri, 14 Apr 2023 12:56:07 -0500 Subject: [PATCH 013/151] fix: quota project id not being implemented correctly --- src/dfcx_scrapi/core/scrapi_base.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/dfcx_scrapi/core/scrapi_base.py b/src/dfcx_scrapi/core/scrapi_base.py index 0fc3be9f..31c48c03 100644 --- a/src/dfcx_scrapi/core/scrapi_base.py +++ b/src/dfcx_scrapi/core/scrapi_base.py @@ -72,7 +72,7 @@ def __init__( self.agent_id = agent_id @staticmethod - def _set_region(item_id): + def _set_region(resource_id: str): """Different regions have different API endpoints Args: @@ -85,18 +85,27 @@ def _set_region(item_id): if the location is "global" """ try: - location = item_id.split("/")[3] + location = resource_id.split("/")[3] except IndexError as err: - logging.error("IndexError - path too short? %s", item_id) + logging.error("IndexError - path too short? %s", resource_id) raise err + project_id = resource_id.split("/")[1] + if location != "global": api_endpoint = f"{location}-dialogflow.googleapis.com:443" - client_options = {"api_endpoint": api_endpoint} + client_options = { + "api_endpoint": api_endpoint, + "quota_project_id": project_id} return client_options else: - return None # explicit None return when not required + api_endpoint = "dialogflow.googleapis.com:443" + client_options = { + "api_endpoint": api_endpoint, + "quota_project_id": project_id} + + return client_options @staticmethod def pbuf_to_dict(pbuf): From 6cb10a58d3544b6f3db176a08d793c2e675b628a Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Fri, 14 Apr 2023 13:02:38 -0500 Subject: [PATCH 014/151] linting --- src/dfcx_scrapi/core/conversation.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/dfcx_scrapi/core/conversation.py b/src/dfcx_scrapi/core/conversation.py index 337f70e4..4ebb1532 100644 --- a/src/dfcx_scrapi/core/conversation.py +++ b/src/dfcx_scrapi/core/conversation.py @@ -87,13 +87,13 @@ def _set_language_code(language_code: str, config: Dict[str, Any]) -> str: """ # Config will take precedence if provided if config: - config_lang_code = config.get("language_code", None) + config_lang_code = config.get("language_code", None) - # We'll only return if it exist in the config on the off chance that - # some users have provided the langauge_code as a top level arg in - # addition to providing the config - if config_lang_code: - return config_lang_code + # We'll only return if it exist in the config on the off chance that + # some users have provided the langauge_code as a top level arg in + # addition to providing the config + if config_lang_code: + return config_lang_code return language_code @@ -106,13 +106,13 @@ def _set_agent_id(input_agent_id: str, config: Dict[str, Any]) -> str: # Config will take precedence if provided if config: - config_agent_path = config.get("agent_path", None) + config_agent_path = config.get("agent_path", None) - # We'll only return if it exist in the config on the off chance that - # some users have provided the agent_id as a top level arg in - # addition to providing the config - if config_agent_path: - return config_agent_path + # We'll only return if it exist in the config on the off chance that + # some users have provided the agent_id as a top level arg in + # addition to providing the config + if config_agent_path: + return config_agent_path elif input_agent_id: return input_agent_id From e3829f9b21a0b5a275ab109cf6e73d2ec2beb92a Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Fri, 14 Apr 2023 13:17:40 -0500 Subject: [PATCH 015/151] update to v1.6.3 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d016d56e..373401d9 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ setup( name='dfcx-scrapi', - version='1.6.2', + version='1.6.3', description='A high level scripting API for bot builders, developers, and\ maintainers.', long_description=long_description, From 483ee4621976c672fc10ce1ac42dd4f8bea7c728 Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Thu, 27 Apr 2023 14:04:21 -0500 Subject: [PATCH 016/151] add force for delete page --- src/dfcx_scrapi/core/pages.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/dfcx_scrapi/core/pages.py b/src/dfcx_scrapi/core/pages.py index d43be972..d193ecd2 100644 --- a/src/dfcx_scrapi/core/pages.py +++ b/src/dfcx_scrapi/core/pages.py @@ -225,13 +225,16 @@ def update_page( return response - def delete_page(self, page_id: str = None) -> str: + def delete_page(self, page_id: str = None, force: bool = False) -> str: """Deletes the specified Page. Args: page_id: CX Page ID string in the following Format: ``projects//locations//agents// flows//pages/`` + force: (Optional) This field has no effect for pages with no incoming + transitions. If set to True, Dialogflow will remove the page, + as well as any transitions to the page. Returns: String "Page `{page_id}` successfully deleted." @@ -240,6 +243,7 @@ def delete_page(self, page_id: str = None) -> str: client = pages.PagesClient( credentials=self.creds, client_options=client_options ) - client.delete_page(name=page_id) + req = gcdc_page.DeletePageRequest(name=page_id, force=force) + client.delete_page(request=req) return f"Page `{page_id}` successfully deleted." From 593cd676f09d0df61d13e8f98bfd9add8455e380 Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Fri, 28 Apr 2023 18:58:19 -0500 Subject: [PATCH 017/151] decorator for api calls added --- src/dfcx_scrapi/core/scrapi_base.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/dfcx_scrapi/core/scrapi_base.py b/src/dfcx_scrapi/core/scrapi_base.py index 31c48c03..e35c1ec5 100644 --- a/src/dfcx_scrapi/core/scrapi_base.py +++ b/src/dfcx_scrapi/core/scrapi_base.py @@ -275,3 +275,16 @@ def recurse_proto_marshal_to_dict(self, marshal_object): new_dict[k] = v return new_dict + + +def api_call_counter_decorator(func): + """Counts the number of API calls for the function `func`.""" + + def wrapper(*args, **kwargs): + wrapper.api_call_count += 1 + return func(*args, **kwargs) + + wrapper.api_call_count = 0 + wrapper.__name__ = func.__name__ + + return wrapper From 089c05c7dd962b4e0d9ce66d58750d72b4bce3ca Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Fri, 28 Apr 2023 18:59:58 -0500 Subject: [PATCH 018/151] A method to show number of api calls added --- src/dfcx_scrapi/core/scrapi_base.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/dfcx_scrapi/core/scrapi_base.py b/src/dfcx_scrapi/core/scrapi_base.py index e35c1ec5..f9dc4123 100644 --- a/src/dfcx_scrapi/core/scrapi_base.py +++ b/src/dfcx_scrapi/core/scrapi_base.py @@ -276,6 +276,18 @@ def recurse_proto_marshal_to_dict(self, marshal_object): return new_dict + def api_calls_count(self) -> int: + """Show the total number of API calls for this resource. + + Returns: + Total calls to the API so far. + """ + return sum( + getattr(getattr(self, f), "api_call_count", 0) + for f in dir(self) + if callable(getattr(self, f)) + ) + def api_call_counter_decorator(func): """Counts the number of API calls for the function `func`.""" From d5a65e99dc534214e673ce7b3e3850a08e9fe6ed Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Fri, 28 Apr 2023 19:02:46 -0500 Subject: [PATCH 019/151] decorator added for intents --- src/dfcx_scrapi/core/intents.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/dfcx_scrapi/core/intents.py b/src/dfcx_scrapi/core/intents.py index aa55e307..74e93701 100644 --- a/src/dfcx_scrapi/core/intents.py +++ b/src/dfcx_scrapi/core/intents.py @@ -23,7 +23,7 @@ from google.cloud.dialogflowcx_v3beta1 import types from google.protobuf import field_mask_pb2 -from dfcx_scrapi.core.scrapi_base import ScrapiBase +from dfcx_scrapi.core import scrapi_base # logging config logging.basicConfig( @@ -33,7 +33,7 @@ ) -class Intents(ScrapiBase): +class Intents(scrapi_base.ScrapiBase): """Core Class for CX Intent Resource functions.""" def __init__( @@ -409,6 +409,7 @@ def get_intents_map(self, agent_id: str = None, reverse: bool = False): return intents_dict + @scrapi_base.api_call_counter_decorator def list_intents( self, agent_id: str = None, @@ -445,6 +446,7 @@ def list_intents( return intents + @scrapi_base.api_call_counter_decorator def get_intent( self, intent_id: str = None, @@ -477,6 +479,7 @@ def get_intent( return response + @scrapi_base.api_call_counter_decorator def create_intent( self, agent_id: str, @@ -531,6 +534,7 @@ def create_intent( return response + @scrapi_base.api_call_counter_decorator def update_intent( self, intent_id: str = None, @@ -582,6 +586,7 @@ def update_intent( return response + @scrapi_base.api_call_counter_decorator def delete_intent(self, intent_id: str, obj: types.Intent = None) -> None: """Deletes an intent by Intent ID. From 516784d77dd135c19f5c0a586c8031cfcafb348d Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Tue, 2 May 2023 13:26:32 -0500 Subject: [PATCH 020/151] decorator added for agents, flows, pages, webhooks, route groups, and entity types --- src/dfcx_scrapi/core/agents.py | 9 +++++++++ src/dfcx_scrapi/core/entity_types.py | 9 +++++++-- src/dfcx_scrapi/core/flows.py | 9 +++++++++ src/dfcx_scrapi/core/pages.py | 5 +++++ src/dfcx_scrapi/core/transition_route_groups.py | 4 ++++ src/dfcx_scrapi/core/webhooks.py | 4 ++++ 6 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/dfcx_scrapi/core/agents.py b/src/dfcx_scrapi/core/agents.py index 992c99dd..3ba3e04e 100644 --- a/src/dfcx_scrapi/core/agents.py +++ b/src/dfcx_scrapi/core/agents.py @@ -53,6 +53,7 @@ def __init__( self.agent_id = agent_id self.client_options = self._set_region(agent_id) + @scrapi_base.api_call_counter_decorator def _build_list_agents_client_request(self, location_id): """Builds the List Agents Request object.""" @@ -125,6 +126,7 @@ def list_agents( return agents + @scrapi_base.api_call_counter_decorator def get_agent(self, agent_id: str) -> types.Agent: """Retrieves a single CX Agent resource object. @@ -216,6 +218,7 @@ def get_agent_by_display_name( return matched_agent + @scrapi_base.api_call_counter_decorator def create_agent( self, project_id: str, @@ -273,6 +276,7 @@ def create_agent( return response + @scrapi_base.api_call_counter_decorator def validate_agent( self, agent_id: str = None, @@ -309,6 +313,7 @@ def validate_agent( return val_dict + @scrapi_base.api_call_counter_decorator def get_validation_result( self, agent_id: str = None, @@ -354,6 +359,7 @@ def get_validation_result( return val_results_dict + @scrapi_base.api_call_counter_decorator def export_agent( self, agent_id: str, @@ -402,6 +408,7 @@ def export_agent( return response.operation.name + @scrapi_base.api_call_counter_decorator def restore_agent(self, agent_id: str, gcs_bucket_uri: str) -> str: """Restores a CX agent from a gcs_bucket location. @@ -433,6 +440,7 @@ def restore_agent(self, agent_id: str, gcs_bucket_uri: str) -> str: return response.operation.name + @scrapi_base.api_call_counter_decorator def update_agent( self, agent_id: str, obj: types.Agent = None, **kwargs ) -> types.Agent: @@ -470,6 +478,7 @@ def update_agent( return response + @scrapi_base.api_call_counter_decorator def delete_agent(self, agent_id: str) -> str: """Deletes the specified Dialogflow CX Agent. diff --git a/src/dfcx_scrapi/core/entity_types.py b/src/dfcx_scrapi/core/entity_types.py index d90ba722..4ffaac6a 100644 --- a/src/dfcx_scrapi/core/entity_types.py +++ b/src/dfcx_scrapi/core/entity_types.py @@ -22,7 +22,7 @@ from google.cloud.dialogflowcx_v3beta1 import types from google.protobuf import field_mask_pb2 -from dfcx_scrapi.core.scrapi_base import ScrapiBase +from dfcx_scrapi.core import scrapi_base # logging config logging.basicConfig( @@ -32,7 +32,7 @@ ) -class EntityTypes(ScrapiBase): +class EntityTypes(scrapi_base.ScrapiBase): """Core Class for CX Entity Type Resource functions.""" def __init__( @@ -243,6 +243,7 @@ def get_entities_map(self, agent_id: str = None, reverse=False): return entities_dict + @scrapi_base.api_call_counter_decorator def list_entity_types(self, agent_id: str = None): """Returns a list of Entity Type objects. @@ -272,6 +273,7 @@ def list_entity_types(self, agent_id: str = None): return entities + @scrapi_base.api_call_counter_decorator def get_entity_type(self, entity_id: str = None): """Returns a single Entity Type object. @@ -292,6 +294,7 @@ def get_entity_type(self, entity_id: str = None): return response + @scrapi_base.api_call_counter_decorator def create_entity_type( self, agent_id: str = None, @@ -353,6 +356,7 @@ def create_entity_type( return response + @scrapi_base.api_call_counter_decorator def update_entity_type( self, entity_type_id: str = None, @@ -407,6 +411,7 @@ def update_entity_type( return response + @scrapi_base.api_call_counter_decorator def delete_entity_type(self, entity_id: str = None, obj=None) -> None: """Deletes a single Entity Type resouce object. diff --git a/src/dfcx_scrapi/core/flows.py b/src/dfcx_scrapi/core/flows.py index 3646898c..8fd7de97 100644 --- a/src/dfcx_scrapi/core/flows.py +++ b/src/dfcx_scrapi/core/flows.py @@ -129,6 +129,7 @@ def get_flows_map(self, agent_id: str, reverse=False): return flows_dict + @scrapi_base.api_call_counter_decorator def train_flow(self, flow_id: str) -> str: """Trains the specified flow. @@ -155,6 +156,7 @@ def train_flow(self, flow_id: str) -> str: return response + @scrapi_base.api_call_counter_decorator def list_flows(self, agent_id: str) -> List[types.Flow]: """Get a List of all Flows in the current Agent. @@ -208,6 +210,7 @@ def get_flow_by_display_name( return flow + @scrapi_base.api_call_counter_decorator def get_flow(self, flow_id: str) -> types.Flow: """Get a single CX Flow object. @@ -226,6 +229,7 @@ def get_flow(self, flow_id: str) -> types.Flow: return response + @scrapi_base.api_call_counter_decorator def create_flow( self, agent_id: str, @@ -274,6 +278,7 @@ def create_flow( return response + @scrapi_base.api_call_counter_decorator def update_flow( self, flow_id: str, obj: types.Flow = None, **kwargs ) -> types.Flow: @@ -325,6 +330,7 @@ def update_nlu_settings(self, flow_id: str, **kwargs): setattr(current_settings, key, value) self.update_flow(flow_id=flow_id, nlu_settings=current_settings) + @scrapi_base.api_call_counter_decorator def export_flow( self, flow_id: str, gcs_path: str, ref_flows: bool = True ) -> Dict[str, str]: @@ -355,6 +361,7 @@ def export_flow( return response.result() + @scrapi_base.api_call_counter_decorator def export_flow_inline(self, flow_id: str, ref_flows: bool = True) -> bytes: """Export a Flow, returning uncompressed raw byte content for flow. @@ -377,6 +384,7 @@ def export_flow_inline(self, flow_id: str, ref_flows: bool = True) -> bytes: return (response.result()).flow_content + @scrapi_base.api_call_counter_decorator def import_flow( self, agent_id: str, @@ -425,6 +433,7 @@ def import_flow( return response + @scrapi_base.api_call_counter_decorator def delete_flow(self, flow_id: str, force: bool = False): """Deletes a single CX Flow Object resource. diff --git a/src/dfcx_scrapi/core/pages.py b/src/dfcx_scrapi/core/pages.py index d43be972..e2cb989e 100644 --- a/src/dfcx_scrapi/core/pages.py +++ b/src/dfcx_scrapi/core/pages.py @@ -112,6 +112,7 @@ def get_pages_map( return pages_dict + @scrapi_base.api_call_counter_decorator def list_pages(self, flow_id: str = None) -> List[gcdc_page.Page]: """Get a List of all pages for the specified Flow ID. @@ -137,6 +138,7 @@ def list_pages(self, flow_id: str = None) -> List[gcdc_page.Page]: return cx_pages + @scrapi_base.api_call_counter_decorator def get_page(self, page_id: str = None) -> gcdc_page.Page: """Get a single CX Page object based on the provided Page ID. @@ -158,6 +160,7 @@ def get_page(self, page_id: str = None) -> gcdc_page.Page: return response + @scrapi_base.api_call_counter_decorator def create_page( self, flow_id: str = None, obj: gcdc_page.Page = None, **kwargs ) -> gcdc_page.Page: @@ -191,6 +194,7 @@ def create_page( return response + @scrapi_base.api_call_counter_decorator def update_page( self, page_id: str = None, obj: gcdc_page.Page = None, **kwargs ) -> gcdc_page.Page: @@ -225,6 +229,7 @@ def update_page( return response + @scrapi_base.api_call_counter_decorator def delete_page(self, page_id: str = None) -> str: """Deletes the specified Page. diff --git a/src/dfcx_scrapi/core/transition_route_groups.py b/src/dfcx_scrapi/core/transition_route_groups.py index 96e6343e..8c2c8984 100644 --- a/src/dfcx_scrapi/core/transition_route_groups.py +++ b/src/dfcx_scrapi/core/transition_route_groups.py @@ -134,6 +134,7 @@ def get_route_groups_map(self, flow_id: str = None, reverse=False): return pages_dict + @scrapi_base.api_call_counter_decorator def list_transition_route_groups(self, flow_id: str = None): """Exports List of all Route Groups in the specified CX Flow ID. @@ -164,6 +165,7 @@ def list_transition_route_groups(self, flow_id: str = None): return cx_route_groups + @scrapi_base.api_call_counter_decorator def get_transition_route_group(self, route_group_id): """Get a single Transition Route Group object. @@ -183,6 +185,7 @@ def get_transition_route_group(self, route_group_id): return response + @scrapi_base.api_call_counter_decorator def create_transition_route_group( self, flow_id: str = None, @@ -222,6 +225,7 @@ def create_transition_route_group( return response + @scrapi_base.api_call_counter_decorator def update_transition_route_group( self, route_group_id: str = None, diff --git a/src/dfcx_scrapi/core/webhooks.py b/src/dfcx_scrapi/core/webhooks.py index 9731ecab..3a10b4c2 100644 --- a/src/dfcx_scrapi/core/webhooks.py +++ b/src/dfcx_scrapi/core/webhooks.py @@ -88,6 +88,7 @@ def get_webhooks_map( return webhooks_dict + @scrapi_base.api_call_counter_decorator def list_webhooks(self, agent_id: str = None): """List all Webhooks in the specified CX Agent. @@ -116,6 +117,7 @@ def list_webhooks(self, agent_id: str = None): return cx_webhooks + @scrapi_base.api_call_counter_decorator def create_webhook( self, agent_id: str, @@ -149,6 +151,7 @@ def create_webhook( return response + @scrapi_base.api_call_counter_decorator def get_webhook(self, webhook_id:str): """Retrieves the specified webhook. @@ -202,6 +205,7 @@ def get_webhook_by_display_name( return webhook_obj + @scrapi_base.api_call_counter_decorator def update_webhook( self, webhook_id:str, From 1d3144d00b0ace67abefaac0c7f34e9e1a091b34 Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Tue, 9 May 2023 16:51:17 -0500 Subject: [PATCH 021/151] breakdown api calls for each method --- src/dfcx_scrapi/core/scrapi_base.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/dfcx_scrapi/core/scrapi_base.py b/src/dfcx_scrapi/core/scrapi_base.py index f9dc4123..d0300ecd 100644 --- a/src/dfcx_scrapi/core/scrapi_base.py +++ b/src/dfcx_scrapi/core/scrapi_base.py @@ -276,17 +276,28 @@ def recurse_proto_marshal_to_dict(self, marshal_object): return new_dict - def api_calls_count(self) -> int: + def api_calls_count_dict(self) -> Dict[str, int]: + """The number of API calls corresponding to each method. + + Returns: + A dictionary with keys as the method names + and values as number of calls. + """ + out_dict = {} + for attr_name in dir(self): + attr = getattr(self, attr_name) + if callable(attr) and hasattr(attr, "api_call_count"): + out_dict[attr_name] = getattr(attr, "api_call_count") + + return out_dict + + def total_api_calls(self) -> int: """Show the total number of API calls for this resource. Returns: - Total calls to the API so far. + Total calls to the API so far as an int. """ - return sum( - getattr(getattr(self, f), "api_call_count", 0) - for f in dir(self) - if callable(getattr(self, f)) - ) + return sum(self.api_calls_count_dict().values()) def api_call_counter_decorator(func): From a6fc700647a557e90e762a60dd4bd7d6b30f8739 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 May 2023 02:25:03 +0000 Subject: [PATCH 022/151] Bump pyyaml from 5.3.1 to 5.4 Bumps [pyyaml](https://github.com/yaml/pyyaml) from 5.3.1 to 5.4. - [Changelog](https://github.com/yaml/pyyaml/blob/master/CHANGES) - [Commits](https://github.com/yaml/pyyaml/compare/5.3.1...5.4) --- updated-dependencies: - dependency-name: pyyaml dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index a2d91b6d..b6b173fb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,7 @@ pylint==2.8.3 pytest==6.0.2 pytest-cov==2.11.1 pytest-xdist==2.1.0 -pyyaml==5.3.1 +pyyaml==5.4 torch transformers sentencepiece From 62bd05de7b74984b6ad78ec57d1282213cea8ef0 Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Thu, 25 May 2023 19:53:18 -0500 Subject: [PATCH 023/151] api call related methods changed for reusability in tools --- src/dfcx_scrapi/core/scrapi_base.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/dfcx_scrapi/core/scrapi_base.py b/src/dfcx_scrapi/core/scrapi_base.py index d0300ecd..c9a2cc64 100644 --- a/src/dfcx_scrapi/core/scrapi_base.py +++ b/src/dfcx_scrapi/core/scrapi_base.py @@ -283,12 +283,19 @@ def api_calls_count_dict(self) -> Dict[str, int]: A dictionary with keys as the method names and values as number of calls. """ - out_dict = {} + out_dict, this_class_apis = {}, {} + for attr_name in dir(self): attr = getattr(self, attr_name) if callable(attr) and hasattr(attr, "api_call_count"): - out_dict[attr_name] = getattr(attr, "api_call_count") - + this_class_apis[attr_name] = getattr(attr, "api_call_count") + if any( + isinstance(attr, sub_class) + for sub_class in ScrapiBase.__subclasses__() + ): + out_dict[attr_name] = attr.api_calls_count_dict() + + out_dict["THIS"] = this_class_apis return out_dict def total_api_calls(self) -> int: @@ -297,7 +304,20 @@ def total_api_calls(self) -> int: Returns: Total calls to the API so far as an int. """ - return sum(self.api_calls_count_dict().values()) + return self._total_call_helper(self.api_calls_count_dict()) + + + def _total_call_helper(self, dict_): + count = 0 + for v in dict_.values(): + if isinstance(v, int): + count += v + elif isinstance(v, dict): + count += self._total_call_helper(v) + + return count + + def api_call_counter_decorator(func): From dcae059fd0d958da26b3331739d83336d7ee7164 Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Wed, 31 May 2023 11:11:32 -0500 Subject: [PATCH 024/151] api_call decorator added for the rest of the core modules --- src/dfcx_scrapi/core/changelogs.py | 6 ++- src/dfcx_scrapi/core/conversation.py | 1 + src/dfcx_scrapi/core/environments.py | 42 ++++++++++++-------- src/dfcx_scrapi/core/experiments.py | 9 +++-- src/dfcx_scrapi/core/operations.py | 7 +++- src/dfcx_scrapi/core/project.py | 4 +- src/dfcx_scrapi/core/security_settings.py | 5 +++ src/dfcx_scrapi/core/session_entity_types.py | 5 +++ src/dfcx_scrapi/core/sessions.py | 7 +++- src/dfcx_scrapi/core/test_cases.py | 16 +++++++- src/dfcx_scrapi/core/versions.py | 6 +++ 11 files changed, 77 insertions(+), 31 deletions(-) diff --git a/src/dfcx_scrapi/core/changelogs.py b/src/dfcx_scrapi/core/changelogs.py index 21563268..9e8aedf4 100644 --- a/src/dfcx_scrapi/core/changelogs.py +++ b/src/dfcx_scrapi/core/changelogs.py @@ -23,7 +23,7 @@ from google.cloud.dialogflowcx_v3beta1 import services from google.cloud.dialogflowcx_v3beta1 import types -from dfcx_scrapi.core.scrapi_base import ScrapiBase +from dfcx_scrapi.core import scrapi_base # logging config logging.basicConfig( @@ -33,7 +33,7 @@ ) -class Changelogs(ScrapiBase): +class Changelogs(scrapi_base.ScrapiBase): """Tools class that contains methods to support Change History feature.""" def __init__( @@ -76,6 +76,7 @@ def _validate_epoch_time(create_time_epoch_seconds: str): else: return True + @scrapi_base.api_call_counter_decorator def list_changelogs(self, agent_id: str = None, **kwargs): """Lists all Change History logs for a CX Agent. @@ -147,6 +148,7 @@ def list_changelogs(self, agent_id: str = None, **kwargs): return changelogs + @scrapi_base.api_call_counter_decorator def get_changelog(self, changelog_id: str): """Get a single changelog resource object. diff --git a/src/dfcx_scrapi/core/conversation.py b/src/dfcx_scrapi/core/conversation.py index 4ebb1532..3d5cc98c 100644 --- a/src/dfcx_scrapi/core/conversation.py +++ b/src/dfcx_scrapi/core/conversation.py @@ -395,6 +395,7 @@ def checkpoint(self, msg=None, start=False): if msg: print(f"{duration:0.2f}s {msg}") + @scrapi_base.api_call_counter_decorator def reply( self, send_obj: Dict[str, str], diff --git a/src/dfcx_scrapi/core/environments.py b/src/dfcx_scrapi/core/environments.py index fe924aa2..6e516cb2 100644 --- a/src/dfcx_scrapi/core/environments.py +++ b/src/dfcx_scrapi/core/environments.py @@ -38,7 +38,7 @@ class Environments(scrapi_base.ScrapiBase): def __init__( self, creds_path: str = None, - creds_dict: Dict[str,str] = None, + creds_dict: Dict[str, str] = None, creds: service_account.Credentials = None, agent_id: str = None, ): @@ -129,7 +129,8 @@ def get_environments_map( return environments_dict - def list_environments(self, agent_id:str=None): + @scrapi_base.api_call_counter_decorator + def list_environments(self, agent_id: str = None): """List all Versions for a given Flow""" if not agent_id: @@ -152,9 +153,10 @@ def list_environments(self, agent_id:str=None): return environments + @scrapi_base.api_call_counter_decorator def get_environment( self, - environment_id:str) -> types.environment.Environment: + environment_id: str) -> types.environment.Environment: """Get Environment object for specified environment ID. Args: @@ -178,8 +180,8 @@ def get_environment( def get_environment_by_display_name( self, - display_name:str, - agent_id:str) -> types.environment.Environment: + display_name: str, + agent_id: str) -> types.environment.Environment: """Get Environment object for specific environment by its display name. Args: @@ -201,10 +203,11 @@ def get_environment_by_display_name( return result + @scrapi_base.api_call_counter_decorator def create_environment( self, - environment:types.environment.Environment, - agent_id:str=None): + environment: types.environment.Environment, + agent_id: str = None): """Create a new environment for a specified agent. Args: environment: The environment to create. @@ -235,10 +238,10 @@ def create_environment( def create_environment_by_display_name( self, - display_name:str, - version_configs:List[Tuple[str,str]], - description:str=None, - agent_id:str=None): + display_name: str, + version_configs: List[Tuple[str, str]], + description: str = None, + agent_id: str = None): """Create a new environment for a specified agent. Args: display_name: The display name of the Environment to create @@ -286,10 +289,11 @@ def create_environment_by_display_name( return response + @scrapi_base.api_call_counter_decorator def update_environment( self, environment_id: str, - environment_obj:types.Environment = None, + environment_obj: types.Environment = None, **kwargs): """Update an existing environment for a specified agent. @@ -330,7 +334,8 @@ def update_environment( return response - def delete_environment(self, environment_id:str): + @scrapi_base.api_call_counter_decorator + def delete_environment(self, environment_id: str): """Delete a specified environment. Args: @@ -350,10 +355,11 @@ def delete_environment(self, environment_id:str): client.delete_environment(request) + @scrapi_base.api_call_counter_decorator def deploy_flow_to_environment( self, - environment_id:str, - flow_version:str): + environment_id: str, + flow_version: str): """Deploys a flow to the specified environment. Args: @@ -382,9 +388,10 @@ def deploy_flow_to_environment( return response + @scrapi_base.api_call_counter_decorator def lookup_environment_history( self, - environment_id:str) -> List[types.Environment]: + environment_id: str) -> List[types.Environment]: """Looks up the history of the specified environment. Args: @@ -413,7 +420,8 @@ def lookup_environment_history( return history - def list_continuous_test_results(self, environment_id:str): + @scrapi_base.api_call_counter_decorator + def list_continuous_test_results(self, environment_id: str): """Fetches a list of continuous test results for a given environment. Args: diff --git a/src/dfcx_scrapi/core/experiments.py b/src/dfcx_scrapi/core/experiments.py index ca0655c2..eac0c84e 100644 --- a/src/dfcx_scrapi/core/experiments.py +++ b/src/dfcx_scrapi/core/experiments.py @@ -19,7 +19,7 @@ from typing import Dict from google.cloud.dialogflowcx_v3beta1 import services from google.cloud.dialogflowcx_v3beta1 import types -from dfcx_scrapi.core.scrapi_base import ScrapiBase +from dfcx_scrapi.core import scrapi_base # logging config logging.basicConfig( @@ -34,7 +34,7 @@ ] -class ScrapiExperiments(ScrapiBase): +class ScrapiExperiments(scrapi_base.ScrapiBase): """Wrapper for working with Experiments""" def __init__( @@ -55,7 +55,8 @@ def __init__( logging.info("created %s", self.agent_id) - def list_experiments(self, environment_id=None): + @scrapi_base.api_call_counter_decorator + def list_experiments(self, environment_id: str = None): """List out experiments. Args: @@ -75,7 +76,7 @@ def list_experiments(self, environment_id=None): client_options=client_options, credentials=self.creds ) response = client.list_experiments(request) - blob = ScrapiBase.cx_object_to_json(response) + blob = scrapi_base.ScrapiBase.cx_object_to_json(response) if len(blob) < 1: logging.warning( diff --git a/src/dfcx_scrapi/core/operations.py b/src/dfcx_scrapi/core/operations.py index 6cbcd070..05f2577f 100644 --- a/src/dfcx_scrapi/core/operations.py +++ b/src/dfcx_scrapi/core/operations.py @@ -16,8 +16,10 @@ import logging from typing import Dict + from google.api_core import operations_v1, grpc_helpers -from dfcx_scrapi.core.scrapi_base import ScrapiBase + +from dfcx_scrapi.core import scrapi_base # logging config logging.basicConfig( @@ -26,7 +28,7 @@ datefmt="%Y-%m-%d %H:%M:%S", ) -class Operations(ScrapiBase): +class Operations(scrapi_base.ScrapiBase): """Core class for Operations functions, primarily used to extract LRO information on long running jobs for CX. """ @@ -45,6 +47,7 @@ def __init__( scope=scope ) + @scrapi_base.api_call_counter_decorator def get_lro(self, lro: str): """Used to retrieve the status of LROs for Dialogflow CX. diff --git a/src/dfcx_scrapi/core/project.py b/src/dfcx_scrapi/core/project.py index 21d7dbfb..544b426b 100644 --- a/src/dfcx_scrapi/core/project.py +++ b/src/dfcx_scrapi/core/project.py @@ -18,7 +18,7 @@ import time from typing import Dict -from dfcx_scrapi.core.scrapi_base import ScrapiBase +from dfcx_scrapi.core import scrapi_base from dfcx_scrapi.core.agents import Agents # logging config @@ -29,7 +29,7 @@ ) -class Project(ScrapiBase): +class Project(scrapi_base.ScrapiBase): """Top Level class representing the Project level resources when working on a Dialogflow CX project. This Class will allow you to extract information about your GCP project as a whole in relation to diff --git a/src/dfcx_scrapi/core/security_settings.py b/src/dfcx_scrapi/core/security_settings.py index d84a8cc2..0617478f 100644 --- a/src/dfcx_scrapi/core/security_settings.py +++ b/src/dfcx_scrapi/core/security_settings.py @@ -54,6 +54,7 @@ def __init__( self.ss_service = services.security_settings_service self.ss_types = types.security_settings + @scrapi_base.api_call_counter_decorator def list_security_settings(self, location_id: str): """List Security Settings for a given Project and Region. @@ -83,6 +84,7 @@ def list_security_settings(self, location_id: str): return security_settings + @scrapi_base.api_call_counter_decorator def get_security_settings(self, security_setting_id: str): """Get specified CCAI Security Setting. @@ -107,6 +109,7 @@ def get_security_settings(self, security_setting_id: str): return response + @scrapi_base.api_call_counter_decorator def create_security_settings( self, location_id: str, @@ -158,6 +161,7 @@ def create_security_settings( return response + @scrapi_base.api_call_counter_decorator def update_security_settings(self, security_setting_id: str, **kwargs): """Update specified CCAI Security Setting. @@ -192,6 +196,7 @@ def update_security_settings(self, security_setting_id: str, **kwargs): return response + @scrapi_base.api_call_counter_decorator def delete_security_settings(self, security_setting_id: str): """Delete the specified CCAI Security Setting. diff --git a/src/dfcx_scrapi/core/session_entity_types.py b/src/dfcx_scrapi/core/session_entity_types.py index c016acd1..62cbf215 100644 --- a/src/dfcx_scrapi/core/session_entity_types.py +++ b/src/dfcx_scrapi/core/session_entity_types.py @@ -193,6 +193,7 @@ def build_session_entity_type( return st + @scrapi_base.api_call_counter_decorator def list_session_entity_types( self, session_id: str, environment_id: str = None ) -> List[types.SessionEntityType]: @@ -236,6 +237,7 @@ def list_session_entity_types( return session_entities + @scrapi_base.api_call_counter_decorator def get_session_entity_type( self, session_entity_type_id: str, environment_id: str = None ) -> types.SessionEntityType: @@ -272,6 +274,7 @@ def get_session_entity_type( return response + @scrapi_base.api_call_counter_decorator def create_session_entity_type( self, session_id: str, session_entity_type: types.SessionEntityType ) -> types.SessionEntityType: @@ -299,6 +302,7 @@ def create_session_entity_type( return response + @scrapi_base.api_call_counter_decorator def update_session_entity_type( self, session_entity_type_id: str, @@ -361,6 +365,7 @@ def update_session_entity_type( return response + @scrapi_base.api_call_counter_decorator def delete_session_entity_type( self, session_entity_type_id: str, environment_id: str = None ) -> str: diff --git a/src/dfcx_scrapi/core/sessions.py b/src/dfcx_scrapi/core/sessions.py index 8922ecaf..a6cde6df 100644 --- a/src/dfcx_scrapi/core/sessions.py +++ b/src/dfcx_scrapi/core/sessions.py @@ -20,7 +20,7 @@ from google.cloud.dialogflowcx_v3beta1 import services from google.cloud.dialogflowcx_v3beta1 import types -from dfcx_scrapi.core.scrapi_base import ScrapiBase +from dfcx_scrapi.core import scrapi_base # logging config logging.basicConfig( @@ -29,7 +29,7 @@ datefmt="%Y-%m-%d %H:%M:%S", ) -class Sessions(ScrapiBase): +class Sessions(scrapi_base.ScrapiBase): """Core Class for CX Session Resource functions.""" def __init__( @@ -142,6 +142,7 @@ def run_conversation( response = session_client.detect_intent(request=request) + # TODO (miladt): Need to be refactored for api decorator to work for text in conversation: text_input = types.session.TextInput(text=text) query_input = types.session.QueryInput( @@ -225,6 +226,7 @@ def detect_intent( logging.info(f"Starting Session ID {session_id}") + # TODO (miladt): Extra response in if? if parameters: query_params = types.session.QueryParameters(parameters=parameters) @@ -249,6 +251,7 @@ def detect_intent( return query_result + @scrapi_base.api_call_counter_decorator def preset_parameters( self, agent_id: str = None, session_id: str = None, parameters=None ): diff --git a/src/dfcx_scrapi/core/test_cases.py b/src/dfcx_scrapi/core/test_cases.py index 9258bcf5..2055e9c9 100644 --- a/src/dfcx_scrapi/core/test_cases.py +++ b/src/dfcx_scrapi/core/test_cases.py @@ -22,7 +22,7 @@ from google.cloud.dialogflowcx_v3beta1 import types from google.protobuf import field_mask_pb2 -from dfcx_scrapi.core.scrapi_base import ScrapiBase +from dfcx_scrapi.core import scrapi_base # logging config logging.basicConfig( @@ -32,7 +32,7 @@ ) -class TestCases(ScrapiBase): +class TestCases(scrapi_base.ScrapiBase): """Core Class for CX Test Cases.""" def __init__( @@ -59,6 +59,7 @@ def __init__( self.test_case_id = test_case_id self.client_options = self._set_region(self.test_case_id) + @scrapi_base.api_call_counter_decorator def list_test_cases(self, agent_id: str = None): """List test cases from an agent. @@ -91,6 +92,7 @@ def list_test_cases(self, agent_id: str = None): return test_cases + @scrapi_base.api_call_counter_decorator def export_test_cases( self, gcs_uri: str, @@ -137,6 +139,7 @@ def export_test_cases( return response + @scrapi_base.api_call_counter_decorator def create_test_case(self, test_case: types.TestCase, agent_id: str = None): """Create a new Test Case in the specified CX Agent. @@ -162,6 +165,7 @@ def create_test_case(self, test_case: types.TestCase, agent_id: str = None): response = client.create_test_case(request) return response + @scrapi_base.api_call_counter_decorator def get_test_case(self, test_case_id: str): """Get test case object from CX Agent. @@ -184,6 +188,7 @@ def get_test_case(self, test_case_id: str): response = client.get_test_case(request) return response + @scrapi_base.api_call_counter_decorator def import_test_cases(self, gcs_uri: str, agent_id: str = None): """Import test cases from cloud storage. @@ -212,6 +217,7 @@ def import_test_cases(self, gcs_uri: str, agent_id: str = None): result = response.result() return result + @scrapi_base.api_call_counter_decorator def batch_delete_test_cases( self, test_case_ids: List[str], @@ -242,6 +248,7 @@ def batch_delete_test_cases( ) client.batch_delete_test_cases(request) + @scrapi_base.api_call_counter_decorator def list_test_case_results(self, test_case_id: str): """List the results from a specific Test Case. @@ -272,6 +279,7 @@ def list_test_case_results(self, test_case_id: str): return test_case_results + @scrapi_base.api_call_counter_decorator def batch_run_test_cases( self, test_cases: List[str], @@ -309,6 +317,7 @@ def batch_run_test_cases( results = response.result() return results + @scrapi_base.api_call_counter_decorator def update_test_case( self, test_case_id: str = None, @@ -348,6 +357,7 @@ def update_test_case( response = client.update_test_case(request) return response + @scrapi_base.api_call_counter_decorator def run_test_case(self, test_case_id: str, environment: str = None): """Run test case and get result for a specified test case. @@ -375,6 +385,7 @@ def run_test_case(self, test_case_id: str, environment: str = None): results = response.result() return results + @scrapi_base.api_call_counter_decorator def get_test_case_result(self, test_case_result_id: str): """Get test case result for a specified run on a specified test case. @@ -396,6 +407,7 @@ def get_test_case_result(self, test_case_result_id: str): response = client.get_test_case_result(request) return response + @scrapi_base.api_call_counter_decorator def calculate_coverage(self, coverage_type: int, agent_id: str = None): """Calculate coverage of different resources in the test case set. diff --git a/src/dfcx_scrapi/core/versions.py b/src/dfcx_scrapi/core/versions.py index 1d1d2658..cc94fd19 100644 --- a/src/dfcx_scrapi/core/versions.py +++ b/src/dfcx_scrapi/core/versions.py @@ -48,6 +48,7 @@ def __init__( if flow_id: self.flow_id = flow_id + @scrapi_base.api_call_counter_decorator def list_versions(self, flow_id:str): """List all Versions for a given Flow. @@ -79,6 +80,7 @@ def list_versions(self, flow_id:str): return versions + @scrapi_base.api_call_counter_decorator def get_version( self, version_id:str=None, @@ -140,6 +142,7 @@ def get_version_by_display_name(self, display_name:str, flow_id:str): return None + @scrapi_base.api_call_counter_decorator def load_version( self, version:types.version.Version, @@ -176,6 +179,7 @@ def load_version( response = client.load_version(request) return response + @scrapi_base.api_call_counter_decorator def create_version( self, flow_id:str, @@ -212,6 +216,7 @@ def create_version( return response + @scrapi_base.api_call_counter_decorator def delete_version(self, version_id:str): """Delete a specified Version. @@ -229,6 +234,7 @@ def delete_version(self, version_id:str): return client.delete_version(request) + @scrapi_base.api_call_counter_decorator def compare_versions( self, base_version_id:str, From cbcaf88f457375e120be81a86e3a1a2944370015 Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Tue, 6 Jun 2023 16:43:58 -0500 Subject: [PATCH 025/151] get_api_calls updated --- src/dfcx_scrapi/core/scrapi_base.py | 30 ++++++++--------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/src/dfcx_scrapi/core/scrapi_base.py b/src/dfcx_scrapi/core/scrapi_base.py index c9a2cc64..3655e6b9 100644 --- a/src/dfcx_scrapi/core/scrapi_base.py +++ b/src/dfcx_scrapi/core/scrapi_base.py @@ -276,48 +276,34 @@ def recurse_proto_marshal_to_dict(self, marshal_object): return new_dict - def api_calls_count_dict(self) -> Dict[str, int]: + def get_api_calls_details(self) -> Dict[str, int]: """The number of API calls corresponding to each method. Returns: A dictionary with keys as the method names - and values as number of calls. + and values as the number of calls. """ - out_dict, this_class_apis = {}, {} + this_class_methods, sub_class_apis_dict = {}, {} for attr_name in dir(self): attr = getattr(self, attr_name) if callable(attr) and hasattr(attr, "api_call_count"): - this_class_apis[attr_name] = getattr(attr, "api_call_count") + this_class_methods[attr_name] = getattr(attr, "api_call_count") if any( isinstance(attr, sub_class) for sub_class in ScrapiBase.__subclasses__() ): - out_dict[attr_name] = attr.api_calls_count_dict() + sub_class_apis_dict.update(attr.get_api_calls_details()) - out_dict["THIS"] = this_class_apis - return out_dict + return {**this_class_methods, **sub_class_apis_dict} - def total_api_calls(self) -> int: + def get_api_calls_count(self) -> int: """Show the total number of API calls for this resource. Returns: Total calls to the API so far as an int. """ - return self._total_call_helper(self.api_calls_count_dict()) - - - def _total_call_helper(self, dict_): - count = 0 - for v in dict_.values(): - if isinstance(v, int): - count += v - elif isinstance(v, dict): - count += self._total_call_helper(v) - - return count - - + return sum(self.get_api_calls_details().values()) def api_call_counter_decorator(func): From 8fcef2ce1914577c54bcc0500791bb353921be00 Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Mon, 12 Jun 2023 22:12:23 -0500 Subject: [PATCH 026/151] revert changes for core.sessions --- src/dfcx_scrapi/core/sessions.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/dfcx_scrapi/core/sessions.py b/src/dfcx_scrapi/core/sessions.py index a6cde6df..c6e0a665 100644 --- a/src/dfcx_scrapi/core/sessions.py +++ b/src/dfcx_scrapi/core/sessions.py @@ -142,7 +142,6 @@ def run_conversation( response = session_client.detect_intent(request=request) - # TODO (miladt): Need to be refactored for api decorator to work for text in conversation: text_input = types.session.TextInput(text=text) query_input = types.session.QueryInput( @@ -226,7 +225,6 @@ def detect_intent( logging.info(f"Starting Session ID {session_id}") - # TODO (miladt): Extra response in if? if parameters: query_params = types.session.QueryParameters(parameters=parameters) @@ -251,7 +249,6 @@ def detect_intent( return query_result - @scrapi_base.api_call_counter_decorator def preset_parameters( self, agent_id: str = None, session_id: str = None, parameters=None ): From ea8c788886eb2a1a8f68f6379dc4ddd257a7b2dc Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Mon, 12 Jun 2023 22:14:51 -0500 Subject: [PATCH 027/151] update the api_call_counter_decorator and get_api_calls_details due to a bug --- src/dfcx_scrapi/core/scrapi_base.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/dfcx_scrapi/core/scrapi_base.py b/src/dfcx_scrapi/core/scrapi_base.py index 3655e6b9..71399a84 100644 --- a/src/dfcx_scrapi/core/scrapi_base.py +++ b/src/dfcx_scrapi/core/scrapi_base.py @@ -17,8 +17,10 @@ import logging import json import re - +import functools +from collections import defaultdict from typing import Dict + from google.oauth2 import service_account from google.auth.transport.requests import Request from google.protobuf import json_format # type: ignore @@ -71,6 +73,8 @@ def __init__( if agent_id: self.agent_id = agent_id + self.api_calls_dict = defaultdict(int) + @staticmethod def _set_region(resource_id: str): """Different regions have different API endpoints @@ -287,14 +291,17 @@ def get_api_calls_details(self) -> Dict[str, int]: for attr_name in dir(self): attr = getattr(self, attr_name) - if callable(attr) and hasattr(attr, "api_call_count"): - this_class_methods[attr_name] = getattr(attr, "api_call_count") + if callable(attr) and hasattr(attr, "calls_api"): + this_class_methods[attr_name] = 0 if any( isinstance(attr, sub_class) for sub_class in ScrapiBase.__subclasses__() ): sub_class_apis_dict.update(attr.get_api_calls_details()) + if hasattr(self, "api_calls_dict"): + this_class_methods.update(getattr(self, "api_calls_dict")) + return {**this_class_methods, **sub_class_apis_dict} def get_api_calls_count(self) -> int: @@ -309,11 +316,11 @@ def get_api_calls_count(self) -> int: def api_call_counter_decorator(func): """Counts the number of API calls for the function `func`.""" - def wrapper(*args, **kwargs): - wrapper.api_call_count += 1 - return func(*args, **kwargs) + @functools.wraps(func) + def wrapper(self, *args, **kwargs): + self.api_calls_dict[func.__name__] += 1 + return func(self, *args, **kwargs) - wrapper.api_call_count = 0 - wrapper.__name__ = func.__name__ + wrapper.calls_api = True return wrapper From f5dc1042a847af4fa583861c9ee8d15acf73978c Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Tue, 13 Jun 2023 12:29:27 -0500 Subject: [PATCH 028/151] add include_conversation_turns to the list_test_cases --- src/dfcx_scrapi/core/test_cases.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/dfcx_scrapi/core/test_cases.py b/src/dfcx_scrapi/core/test_cases.py index 9258bcf5..c3af41af 100644 --- a/src/dfcx_scrapi/core/test_cases.py +++ b/src/dfcx_scrapi/core/test_cases.py @@ -59,12 +59,17 @@ def __init__( self.test_case_id = test_case_id self.client_options = self._set_region(self.test_case_id) - def list_test_cases(self, agent_id: str = None): + def list_test_cases( + self, agent_id: str = None, include_conversation_turns: bool = False + ): """List test cases from an agent. Args: - agent_id: The agent to list all pages for. + agent_id: The agent to list all test cases for. `projects//locations//agents/` + include_conversation_turns: Either to include the conversation turns + in the test cases or not. Default is False + which shows only the basic metadata about the test cases. Returns: List of test cases from an agent. @@ -73,8 +78,14 @@ def list_test_cases(self, agent_id: str = None): if not agent_id: agent_id = self.agent_id - request = types.test_case.ListTestCasesRequest() - request.parent = agent_id + if include_conversation_turns: + test_case_view = types.ListTestCasesRequest.TestCaseView.FULL + else: + test_case_view = types.ListTestCasesRequest.TestCaseView.BASIC + + request = types.test_case.ListTestCasesRequest( + parent=agent_id, view=test_case_view + ) client_options = self._set_region(agent_id) From 50a2a7232be9340db011008e255dcdf35ac6f61c Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Sat, 17 Jun 2023 15:07:47 -0500 Subject: [PATCH 029/151] fix: refactor list_agents for clarity --- src/dfcx_scrapi/core/agents.py | 66 +++++++++++++++++----------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/src/dfcx_scrapi/core/agents.py b/src/dfcx_scrapi/core/agents.py index 3ba3e04e..b525a6e4 100644 --- a/src/dfcx_scrapi/core/agents.py +++ b/src/dfcx_scrapi/core/agents.py @@ -15,7 +15,7 @@ # limitations under the License. import logging -from typing import Dict, List +from typing import Dict, List, Tuple from google.cloud.dialogflowcx_v3beta1 import services from google.cloud.dialogflowcx_v3beta1 import types from google.protobuf import field_mask_pb2 @@ -54,8 +54,17 @@ def __init__( self.client_options = self._set_region(agent_id) @scrapi_base.api_call_counter_decorator - def _build_list_agents_client_request(self, location_id): - """Builds the List Agents Request object.""" + def _list_agents_client_request(self, location_id) -> List[ + types.agent.Agent + ]: + """Builds the List Agents Request object. + + Args: + location_id: The GCP Location ID in the following format: + `projects//locations/` + + Returns: + List of types.agent.Agent""" request = types.agent.ListAgentsRequest() request.parent = location_id @@ -65,31 +74,34 @@ def _build_list_agents_client_request(self, location_id): credentials=self.creds, client_options=client_options ) - return client, request + response = client.list_agents(request) + + agents = [] + for page in response.pages: + for agent in page.agents: + agents.append(agent) + + return agents def list_agents( self, - location_id: str = None, - project_id: str = None) -> List[types.Agent]: + project_id: str, + location: str = None) -> List[types.Agent]: """Get list of all CX agents in a given GCP Region or Project. - This method allows you to provide a specific Location ID consisting of - a GCP Project ID and Location ID (i.e. GCP Region Name) to retrieve all - of the CX agents associated with that Project/Region. Optionally, you - can provide just the Project ID and the funciton will traverse ALL - available GCP regions to list ALL agents across the regions. + This method allows you to provide a GCP Project ID to retrieve all of + the CX agents across ALL available GCP region. If the optional location + ID is provided, the method will only pull the agents for that region. Args: - location_id: The GCP Project/Location ID in the following format - `projects//locations/` - `projects/my-gcp-project/locations/us-central1` - project_id: The GCP Project ID as a string + project_id: The GCP Project ID. Ex: `my-cool-gcp-project` + location_id: The GCP Location ID. Ex: `global`, `us-central1`, etc. Returns: List of Agent objects """ - if project_id: + if not location: region_list = [ "global", "us-central1", @@ -106,23 +118,11 @@ def list_agents( agents = [] for region in region_list: location_path = f"projects/{project_id}/locations/{region}" - client, request = self._build_list_agents_client_request( - location_path - ) - - agents += self.list_agents(location_id=location_path) + agents += self._list_agents_client_request(location_path) else: - client, request = self._build_list_agents_client_request( - location_id - ) - - response = client.list_agents(request) - - agents = [] - for page in response.pages: - for agent in page.agents: - agents.append(agent) + location_path = f"projects/{project_id}/locations/{location}" + agents = self._list_agents_client_request(location_path) return agents @@ -181,12 +181,12 @@ def get_agent_by_display_name( if location_id: agent_list = self.list_agents( - location_id=location_id + location=location_id ) elif region: agent_list = self.list_agents( - location_id=f"projects/{project_id}/locations/{region}" + location=f"projects/{project_id}/locations/{region}" ) else: agent_list = self.list_agents(project_id=project_id) From 002181501d3e23edc3ee1d14b9d1da29895b42be Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Sat, 17 Jun 2023 15:10:20 -0500 Subject: [PATCH 030/151] fix: resolving patch conflict --- src/dfcx_scrapi/core/pages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dfcx_scrapi/core/pages.py b/src/dfcx_scrapi/core/pages.py index e2cb989e..63109372 100644 --- a/src/dfcx_scrapi/core/pages.py +++ b/src/dfcx_scrapi/core/pages.py @@ -230,7 +230,7 @@ def update_page( return response @scrapi_base.api_call_counter_decorator - def delete_page(self, page_id: str = None) -> str: + def delete_page(self, page_id: str = None, force: bool = False) -> str: """Deletes the specified Page. Args: From 274aa8c7df2db3ce9ee9f491c38a71c71dac7134 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Sat, 17 Jun 2023 15:17:44 -0500 Subject: [PATCH 031/151] fix: linting and merge conflict resolution --- src/dfcx_scrapi/core/agents.py | 12 +++++------- src/dfcx_scrapi/core/pages.py | 6 +++++- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/dfcx_scrapi/core/agents.py b/src/dfcx_scrapi/core/agents.py index b525a6e4..2dde57fd 100644 --- a/src/dfcx_scrapi/core/agents.py +++ b/src/dfcx_scrapi/core/agents.py @@ -15,7 +15,7 @@ # limitations under the License. import logging -from typing import Dict, List, Tuple +from typing import Dict, List from google.cloud.dialogflowcx_v3beta1 import services from google.cloud.dialogflowcx_v3beta1 import types from google.protobuf import field_mask_pb2 @@ -180,14 +180,12 @@ def get_agent_by_display_name( """ if location_id: - agent_list = self.list_agents( - location=location_id - ) + agent_list = self._list_agents_client_request(location_id) elif region: - agent_list = self.list_agents( - location=f"projects/{project_id}/locations/{region}" - ) + agent_list = self._list_agents_client_request( + f"projects/{project_id}/locations/{region}" + ) else: agent_list = self.list_agents(project_id=project_id) diff --git a/src/dfcx_scrapi/core/pages.py b/src/dfcx_scrapi/core/pages.py index 63109372..7438412c 100644 --- a/src/dfcx_scrapi/core/pages.py +++ b/src/dfcx_scrapi/core/pages.py @@ -237,6 +237,9 @@ def delete_page(self, page_id: str = None, force: bool = False) -> str: page_id: CX Page ID string in the following Format: ``projects//locations//agents// flows//pages/`` + force: (Optional) This field has no effect for pages with no incoming + transitions. If set to True, Dialogflow will remove the page, + as well as any transitions to the page. Returns: String "Page `{page_id}` successfully deleted." @@ -245,6 +248,7 @@ def delete_page(self, page_id: str = None, force: bool = False) -> str: client = pages.PagesClient( credentials=self.creds, client_options=client_options ) - client.delete_page(name=page_id) + req = gcdc_page.DeletePageRequest(name=page_id, force=force) + client.delete_page(request=req) return f"Page `{page_id}` successfully deleted." From d5cf67bd3b58792b6aaa7b360820b9a86e08c9a4 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Sat, 17 Jun 2023 15:53:28 -0500 Subject: [PATCH 032/151] feat: add language code arg to several methods --- src/dfcx_scrapi/core/agents.py | 4 +++- src/dfcx_scrapi/core/entity_types.py | 15 ++++++++++++--- src/dfcx_scrapi/core/pages.py | 10 +++++++++- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/dfcx_scrapi/core/agents.py b/src/dfcx_scrapi/core/agents.py index 2dde57fd..4c8185f0 100644 --- a/src/dfcx_scrapi/core/agents.py +++ b/src/dfcx_scrapi/core/agents.py @@ -277,7 +277,8 @@ def create_agent( @scrapi_base.api_call_counter_decorator def validate_agent( self, - agent_id: str = None, + agent_id: str, + language_code: str = "en", timeout: float = None) -> Dict: """Initiates the Validation of the CX Agent or Flow. @@ -298,6 +299,7 @@ def validate_agent( request = types.agent.ValidateAgentRequest() request.name = agent_id + request.language_code = language_code client_options = self._set_region(agent_id) client = services.agents.AgentsClient( diff --git a/src/dfcx_scrapi/core/entity_types.py b/src/dfcx_scrapi/core/entity_types.py index 4ffaac6a..c805f644 100644 --- a/src/dfcx_scrapi/core/entity_types.py +++ b/src/dfcx_scrapi/core/entity_types.py @@ -43,6 +43,7 @@ def __init__( scope=False, entity_id: str = None, agent_id: str = None, + language_code: str = "en" ): super().__init__( creds_path=creds_path, @@ -53,6 +54,7 @@ def __init__( self.entity_id = entity_id self.agent_id = agent_id + self.language_code = language_code @staticmethod @@ -244,11 +246,12 @@ def get_entities_map(self, agent_id: str = None, reverse=False): return entities_dict @scrapi_base.api_call_counter_decorator - def list_entity_types(self, agent_id: str = None): + def list_entity_types(self, agent_id: str, language_code: str = "en"): """Returns a list of Entity Type objects. Args: agent_id: the formatted CX Agent ID to use + language_code: Specifies the language of the Entity Types listed Returns: List of Entity Type objects @@ -258,6 +261,7 @@ def list_entity_types(self, agent_id: str = None): request = types.entity_type.ListEntityTypesRequest() request.parent = agent_id + request.language_code = language_code client_options = self._set_region(agent_id) client = services.entity_types.EntityTypesClient( @@ -274,11 +278,12 @@ def list_entity_types(self, agent_id: str = None): return entities @scrapi_base.api_call_counter_decorator - def get_entity_type(self, entity_id: str = None): + def get_entity_type(self, entity_id: str = None, language_code: str = "en"): """Returns a single Entity Type object. Args: entity_id: the formatted CX Entity ID to get + language_code: Specifies the language of the Entity Types listed Returns: The single Entity Type object @@ -290,7 +295,11 @@ def get_entity_type(self, entity_id: str = None): client = services.entity_types.EntityTypesClient( credentials=self.creds, client_options=client_options ) - response = client.get_entity_type(name=entity_id) + request = types.entity_type.GetEntityTypeRequest() + request.name = entity_id + request.language_code = language_code + + response = client.get_entity_type(request=request) return response diff --git a/src/dfcx_scrapi/core/pages.py b/src/dfcx_scrapi/core/pages.py index 7438412c..8c98ec9f 100644 --- a/src/dfcx_scrapi/core/pages.py +++ b/src/dfcx_scrapi/core/pages.py @@ -113,17 +113,25 @@ def get_pages_map( return pages_dict @scrapi_base.api_call_counter_decorator - def list_pages(self, flow_id: str = None) -> List[gcdc_page.Page]: + def list_pages( + self, + flow_id: str = None, + language_code: str = "en") -> List[gcdc_page.Page]: """Get a List of all pages for the specified Flow ID. Args: flow_id: the properly formatted Flow ID string + language_code: Specifies the language of the Pages listed. While the + majority of contents of a Page is language agnostic, the contents + in the "Agent Says" and similar parts of a Page are affected by + language code. Returns: A List of CX Page objects for the specific Flow ID """ request = gcdc_page.ListPagesRequest() request.parent = flow_id + request.language_code = language_code client_options = self._set_region(flow_id) client = pages.PagesClient( From 28ef21b4ae24ade5b30197bb4c3f2e2fb1df95ec Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Tue, 20 Jun 2023 21:24:13 -0500 Subject: [PATCH 033/151] fix: deprecate reliance on diag_info --- src/dfcx_scrapi/core/conversation.py | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/src/dfcx_scrapi/core/conversation.py b/src/dfcx_scrapi/core/conversation.py index 3d5cc98c..7a65c78d 100644 --- a/src/dfcx_scrapi/core/conversation.py +++ b/src/dfcx_scrapi/core/conversation.py @@ -423,7 +423,7 @@ def reply( Returns: A dictionary for the agent reply to to the submitted text. Includes keys response_messages, confidence, page_name, - intent_name, match_type, match, other_intents, and params. + intent_name, match_type, match, and params. """ text = send_obj.get("text") send_params = send_obj.get("params") @@ -543,35 +543,12 @@ def reply( query_result.match.match_type ) reply["match"] = query_result.match - reply["other_intents"] = self.format_other_intents(query_result) reply["params"] = params logging.debug("reply %s", reply) return reply - def format_other_intents(self, query_result): - """Unwind protobufs into more friendly dict""" - other_intents = query_result.diagnostic_info.get( - "Alternative Matched Intents" - ) - items = [] - rank = 0 - for alt in other_intents: - items.append( - { - "name": alt.get("DisplayName"), - "score": alt.get("Score"), - "rank": rank, - } - ) - rank += 1 - - if self: - return items - - return None - def getpath(self, obj, xpath, default=None): """Get data at a pathed location out of object internals""" elem = obj From 1c9f81320ffb454718945f9caf47c27a0d72f232 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Tue, 20 Jun 2023 21:35:20 -0500 Subject: [PATCH 034/151] update v1.7.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 373401d9..595c8ce5 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ setup( name='dfcx-scrapi', - version='1.6.3', + version='1.7.0', description='A high level scripting API for bot builders, developers, and\ maintainers.', long_description=long_description, From 0f0f5269443b91c87ef0ec3635120246f1920b72 Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Fri, 7 Jul 2023 11:01:18 -0500 Subject: [PATCH 035/151] pylint depracation issue resolved --- .pylintrc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pylintrc b/.pylintrc index e1ec406e..4a16e611 100644 --- a/.pylintrc +++ b/.pylintrc @@ -421,6 +421,6 @@ valid-metaclass-classmethod-first-arg=mcs # Exceptions that will emit a warning when being caught. Defaults to # "Exception" -overgeneral-exceptions=StandardError, - Exception, - BaseException +overgeneral-exceptions=builtins.StandardError, + builtins.Exception, + builtins.BaseException From f93e5313dc7586926a33742b10d673c6c7912be1 Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Fri, 7 Jul 2023 11:08:06 -0500 Subject: [PATCH 036/151] more readable code for convert_tr_target_page --- src/dfcx_scrapi/tools/copy_util.py | 34 +++++++++--------------------- 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/src/dfcx_scrapi/tools/copy_util.py b/src/dfcx_scrapi/tools/copy_util.py index 8e97cd14..65498cd8 100644 --- a/src/dfcx_scrapi/tools/copy_util.py +++ b/src/dfcx_scrapi/tools/copy_util.py @@ -134,36 +134,22 @@ def _convert_entry_webhooks(page_object, webhooks_map): def __convert_tr_target_page( trans_route, pages_map, convert_type=None, flows_map=None, flow=None ): + special_pages = [ + "END_FLOW", "END_SESSION", + "CURRENT_PAGE", "PREVIOUS_PAGE", "START_PAGE" + ] if convert_type == "source": - if trans_route.target_page.split("/")[-1] == "END_FLOW": - trans_route.target_page = "END_FLOW" - elif trans_route.target_page.split("/")[-1] == "END_SESSION": - trans_route.target_page = "END_SESSION" - elif trans_route.target_page.split("/")[-1] == "CURRENT_PAGE": - trans_route.target_page = "CURRENT_PAGE" - elif trans_route.target_page.split("/")[-1] == "PREVIOUS_PAGE": - trans_route.target_page = "PREVIOUS_PAGE" - elif trans_route.target_page.split("/")[-1] == "START_PAGE": - trans_route.target_page = "START_PAGE" + last_part = trans_route.target_page.split("/")[-1] + if last_part in special_pages: + trans_route.target_page = last_part else: trans_route.target_page = pages_map[trans_route.target_page] elif convert_type == "destination": - if trans_route.target_page == "END_FLOW": - trans_route.target_page = flows_map[flow] + "/pages/END_FLOW" - elif trans_route.target_page == "END_SESSION": - trans_route.target_page = flows_map[flow] + "/pages/END_SESSION" - elif trans_route.target_page == "CURRENT_PAGE": - trans_route.target_page = ( - flows_map[flow] + "/pages/CURRENT_PAGE" - ) - elif trans_route.target_page == "PREVIOUS_PAGE": - trans_route.target_page = ( - flows_map[flow] + "/pages/PREVIOUS_PAGE" - ) - elif trans_route.target_page == "START_PAGE": - trans_route.target_page = flows_map[flow] + "/pages/START_PAGE" + if trans_route.target_page in special_pages: + new_page = f"{flows_map[flow]}/pages/{trans_route.target_page}" + trans_route.target_page = new_page else: trans_route.target_page = pages_map[trans_route.target_page] From 9e090630d0fd18d72b11d0d8fef1a189fa6740fd Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Fri, 7 Jul 2023 11:17:51 -0500 Subject: [PATCH 037/151] docs fix --- src/dfcx_scrapi/core/agents.py | 4 ++-- src/dfcx_scrapi/core/entity_types.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dfcx_scrapi/core/agents.py b/src/dfcx_scrapi/core/agents.py index 4c8185f0..326e2f17 100644 --- a/src/dfcx_scrapi/core/agents.py +++ b/src/dfcx_scrapi/core/agents.py @@ -418,8 +418,8 @@ def restore_agent(self, agent_id: str, gcs_bucket_uri: str) -> str: Args: agent_id: CX Agent ID string in the following format projects//locations//agents/ - gcs_bucket_uri: The Google Cloud Storage bucket/filepath to export the - agent to in the following format: + gcs_bucket_uri: The Google Cloud Storage bucket/filepath to restore + the agent from in the following format: `gs:///` Returns: diff --git a/src/dfcx_scrapi/core/entity_types.py b/src/dfcx_scrapi/core/entity_types.py index c805f644..526d840c 100644 --- a/src/dfcx_scrapi/core/entity_types.py +++ b/src/dfcx_scrapi/core/entity_types.py @@ -422,7 +422,7 @@ def update_entity_type( @scrapi_base.api_call_counter_decorator def delete_entity_type(self, entity_id: str = None, obj=None) -> None: - """Deletes a single Entity Type resouce object. + """Deletes a single Entity Type resource object. Args: entity_id: the formatted CX Entity ID to delete From 89ff59285a8de399a27dcadfb02612fb343d563e Mon Sep 17 00:00:00 2001 From: Milad <63479762+MRyderOC@users.noreply.github.com> Date: Mon, 31 Jul 2023 15:36:00 -0500 Subject: [PATCH 038/151] change df.append to pd.concat due to deprecation warning --- src/dfcx_scrapi/core/changelogs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dfcx_scrapi/core/changelogs.py b/src/dfcx_scrapi/core/changelogs.py index 9e8aedf4..db7ccf55 100644 --- a/src/dfcx_scrapi/core/changelogs.py +++ b/src/dfcx_scrapi/core/changelogs.py @@ -225,7 +225,7 @@ def changelogs_to_dataframe( ], ) - df = df.append(log_data) + df = pd.concat([df, log_data], ignore_index=True) df = df.reset_index(drop=True) From e651ff7dd5b7de56beb6b144f781470fc0bce545 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Thu, 17 Aug 2023 10:47:52 -0500 Subject: [PATCH 039/151] fix: revert change to location_id arg --- src/dfcx_scrapi/core/agents.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dfcx_scrapi/core/agents.py b/src/dfcx_scrapi/core/agents.py index 326e2f17..f4897250 100644 --- a/src/dfcx_scrapi/core/agents.py +++ b/src/dfcx_scrapi/core/agents.py @@ -86,7 +86,7 @@ def _list_agents_client_request(self, location_id) -> List[ def list_agents( self, project_id: str, - location: str = None) -> List[types.Agent]: + location_id: str = None) -> List[types.Agent]: """Get list of all CX agents in a given GCP Region or Project. This method allows you to provide a GCP Project ID to retrieve all of @@ -101,7 +101,7 @@ def list_agents( List of Agent objects """ - if not location: + if not location_id: region_list = [ "global", "us-central1", @@ -121,7 +121,7 @@ def list_agents( agents += self._list_agents_client_request(location_path) else: - location_path = f"projects/{project_id}/locations/{location}" + location_path = f"projects/{project_id}/locations/{location_id}" agents = self._list_agents_client_request(location_path) return agents From 5ace0030b86494977ac4c4bcc9de625f9397784b Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 9 Dec 2022 16:56:11 +0000 Subject: [PATCH 040/151] Create agent_checker_util.py --- src/dfcx_scrapi/tools/agent_checker_util.py | 65 +++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 src/dfcx_scrapi/tools/agent_checker_util.py diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py new file mode 100644 index 00000000..6fb123df --- /dev/null +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -0,0 +1,65 @@ +"""A set of Utility methods to check DFCX Agents.""" + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + +# logging config +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s %(levelname)-8s %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', +) + +class AgentCheckerUtil(ScrapiBase): + """Utility class for checking DFCX Agents.""" + def __init__( + self, + creds_path: str = None, + creds_dict: Dict = None, + creds = None, + scope = False, + agent_id: str = None + ): + super().__init__( + creds_path=creds_path, + creds_dict=creds_dict, + creds=creds, + scope=scope, + ) + + self.agent_id = agent_id + + self.intents = Intents(creds=self.creds, agent_id=self.agent_id) + self.entities = EntityTypes(creds=self.creds, agent_id=self.agent_id) + self.flows = Flows(creds=self.creds, agent_id=self.agent_id) + self.pages = Pages(creds=self.creds) + self.webhooks = Webhooks(creds=self.creds, agent_id=self.agent_id) + self.route_groups = TransitionRouteGroups( + creds=self.creds, agent_id=self.agent_id) + + """ + TODO: Methods to implement: + - Restore from reference agent + - Retrain flows + - Run test cases and store results, and give a report + - Get condensed changelog + - Find unreachable/unused pages, intents, route groups, and possibly routes + - Find invalid test cases + - Check true routes + - Check events + - Check infinite loops + - Probably other things + """ From 91688f4378b358a1f2eb5ffb23caf5403b7ab8ae Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 9 Dec 2022 17:11:22 +0000 Subject: [PATCH 041/151] Update notes --- src/dfcx_scrapi/tools/agent_checker_util.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 6fb123df..5124afb1 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -52,14 +52,24 @@ def __init__( """ TODO: Methods to implement: - - Restore from reference agent - - Retrain flows + x Restore from reference agent + - Beyond the scope of this class + x Retrain flows + - Easy enough to do manually - Run test cases and store results, and give a report - - Get condensed changelog + - Eeed to include a reference agent for this to give useful info about new failing test cases + - Get condensed changelog compared to a reference + - Ideally include test case changes, to include info that the CX UI can't provide - Find unreachable/unused pages, intents, route groups, and possibly routes + - Finding unreachable routes is hard, but the other problems have already been figured out - Find invalid test cases + - Test cases referencing pages or intents that don't exist, for example - Check true routes + - Pages with only conditional routes, and no intents or parameter filling, should have the last route be "true" to prevent getting stuck on the page - Check events + - Pages with user input should have a no-input-default and no-match-default event handler. + - Not sure if this applies to all agents in the same way - Check infinite loops + - Not possible to solve in general because of webhooks, but can find likely candidates - Probably other things """ From 332517dee8776ed87c9df2daf7d436187b3dc8c7 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 9 Dec 2022 17:32:05 +0000 Subject: [PATCH 042/151] Add utility ID conversion functions --- src/dfcx_scrapi/tools/agent_checker_util.py | 38 ++++++++++++++++++--- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 5124afb1..25153c8d 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -50,12 +50,42 @@ def __init__( self.route_groups = TransitionRouteGroups( creds=self.creds, agent_id=self.agent_id) + def convert_intent(self, intent_id, agent_id, intents_map): + intent_id_converted = str(agent_id) + '/intents/' + str(intent_id) + if intent_id_converted in intents_map.keys(): + return intents_map[intent_id_converted] + return '' + + def convert_flow(self, flow_id, agent_id, flows_map): + if flow_id.split('/')[-1] == '-': + return '' + #flow_id_converted = str(agent_id) + '/flows/' + str(flow_id) + if flow_id in flows_map.keys(): + return flows_map[flow_id] + # TODO: Should throw error instead of returning default + return 'Default Start Flow' + + # Note that flow id includes agent, normally... + def convert_page(self, page_id, flow_id, pages_map): + if page_id == 'END_SESSION': + return 'End Session' + elif page_id == 'END_FLOW': + return 'End Flow' + elif page_id == 'START_PAGE': + return 'Start' + page_id_converted = str(flow_id) + '/pages/' + str(page_id) + if flow_id in pages_map.keys(): + if page_id_converted in pages_map[flow_id].keys(): + return pages_map[flow_id][page_id_converted] + else: + # TODO: Should throw error instead of returning default + return 'Start' + print('Flow not found') + # TODO: Should throw error, but returning this probably will anyway + return 'Invalid' + """ TODO: Methods to implement: - x Restore from reference agent - - Beyond the scope of this class - x Retrain flows - - Easy enough to do manually - Run test cases and store results, and give a report - Eeed to include a reference agent for this to give useful info about new failing test cases - Get condensed changelog compared to a reference From 824007d863c6203a17fc9c2307989bf893da1df4 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 9 Dec 2022 17:48:11 +0000 Subject: [PATCH 043/151] Add function get_test_case_results --- src/dfcx_scrapi/tools/agent_checker_util.py | 69 +++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 25153c8d..39583664 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -49,6 +49,7 @@ def __init__( self.webhooks = Webhooks(creds=self.creds, agent_id=self.agent_id) self.route_groups = TransitionRouteGroups( creds=self.creds, agent_id=self.agent_id) + self.test_cases = TestCases(creds=self.creds, agent_id=self.agent_id) def convert_intent(self, intent_id, agent_id, intents_map): intent_id_converted = str(agent_id) + '/intents/' + str(intent_id) @@ -84,6 +85,74 @@ def convert_page(self, page_id, flow_id, pages_map): # TODO: Should throw error, but returning this probably will anyway return 'Invalid' + def get_test_case_results(self, retest_all=False): + test_case_results = dfcx_tc.list_test_cases(self.agent_id) + retest = [] + retest_names = [] + + display_names = [] + ids = [] + short_ids = [] + tags = [] + creation_times = [] + flows = [] + pages = [] + test_results = [] + test_times = [] + passed = [] + + for response in test_case_results: + # Collect untested cases to be retested (or all if retest_all is True) + if retest_all or str(response.last_test_result.test_result) == 'TestResult.TEST_RESULT_UNSPECIFIED': + retest.append(response.name) + retest_names.append(response.display_name) + # Collect additional information for dataframe + display_names.append(response.display_name) + ids.append(response.name) + short_ids.append(response.name.split('/')[-1]) + tags.append(','.join(response.tags)) + creation_times.append(response.creation_time) + flows.append(convert_flow(response.test_config.flow, self.agent_id, self.flows_map)) + pages.append(convert_page(response.test_config.page, response.test_config.flow, self.pages_map)) + test_results.append(str(response.last_test_result.test_result)) + test_times.append(response.last_test_result.test_time) + passed.append(str(response.last_test_result.test_result) == 'TestResult.PASSED') + + # Create dataframe + test_case_df = pd.DataFrame({ + 'display_name': display_names, + 'id': ids, + 'short_id': short_ids, + 'tags': tags, + 'creation_time': creation_times, + 'start_flow': flows, + 'start_page': pages, + 'test_result': test_results, + 'passed': passed, + 'test_time': test_times}) + + # Retest any that haven't been run yet + print('To retest:', len(retest)) + if len(retest) > 0: + response = self.test_cases.batch_run_test_cases(retest, self.agent_id) + for result in response.results: + # Results may not be in the same order as they went in (oh well) + # Process the name a bit to remove the /results/id part at the end. + testCaseId_full = '/'.join(result.name.split('/')[:-2]) + index = retest.index(testCaseId_full) + testCaseId = testCaseId_full.split('/')[-1] + + # Update dataframe where id = testcaseId_full + #row = test_case_df.loc[test_case_df['id'] == testCaseId_full] + test_case_df.loc[test_case_df['id'] == testCaseId_full, 'short_id'] = testCaseId + test_case_df.loc[test_case_df['id'] == testCaseId_full, 'test_result'] = str(result.test_result) + test_case_df.loc[test_case_df['id'] == testCaseId_full, 'test_time'] = result.test_time + test_case_df.loc[test_case_df['id'] == testCaseId_full, 'passed'] = str(result.test_result) == 'TestResult.PASSED' + + # This column is redundant, since we have passed (bool) + test_case_df = test_case_df.drop(columns=['test_result']) + return test_case_df + """ TODO: Methods to implement: - Run test cases and store results, and give a report From 444ad71694542513e9841c6a9f453eca96251d8b Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 9 Dec 2022 17:54:29 +0000 Subject: [PATCH 044/151] Add maps --- src/dfcx_scrapi/tools/agent_checker_util.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 39583664..c0f6c83e 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -51,6 +51,16 @@ def __init__( creds=self.creds, agent_id=self.agent_id) self.test_cases = TestCases(creds=self.creds, agent_id=self.agent_id) + # Generate maps + self.intents_map = self.intents.get_intents_map(self.agent_id) + self.flows_map = self.flows.get_flows_map(self.agent_id) + self.pages_map = {} + for flow_id in self.flows_map.keys(): + self.pages_map[flow_id] = self.pages.get_pages_map(flow_id) + self.route_groups_map = {} + for flow_id in self.flows_map.keys(): + self.route_groups_map[flow_id] = self.route_groups.get_route_groups_map(flow_id) + def convert_intent(self, intent_id, agent_id, intents_map): intent_id_converted = str(agent_id) + '/intents/' + str(intent_id) if intent_id_converted in intents_map.keys(): @@ -156,7 +166,7 @@ def get_test_case_results(self, retest_all=False): """ TODO: Methods to implement: - Run test cases and store results, and give a report - - Eeed to include a reference agent for this to give useful info about new failing test cases + - Need to include a reference agent for this to give useful info about new failing test cases - Get condensed changelog compared to a reference - Ideally include test case changes, to include info that the CX UI can't provide - Find unreachable/unused pages, intents, route groups, and possibly routes From df4b671d48411d71d6568bcfb17ca88388e08149 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 9 Dec 2022 17:56:37 +0000 Subject: [PATCH 045/151] Simplify conversions --- src/dfcx_scrapi/tools/agent_checker_util.py | 26 ++++++++++----------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index c0f6c83e..ec1cdfb1 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -61,23 +61,23 @@ def __init__( for flow_id in self.flows_map.keys(): self.route_groups_map[flow_id] = self.route_groups.get_route_groups_map(flow_id) - def convert_intent(self, intent_id, agent_id, intents_map): - intent_id_converted = str(agent_id) + '/intents/' + str(intent_id) - if intent_id_converted in intents_map.keys(): - return intents_map[intent_id_converted] + def convert_intent(self, intent_id): + intent_id_converted = str(self.agent_id) + '/intents/' + str(intent_id) + if intent_id_converted in self.intents_map.keys(): + return self.intents_map[intent_id_converted] return '' - def convert_flow(self, flow_id, agent_id, flows_map): + def convert_flow(self, flow_id): if flow_id.split('/')[-1] == '-': return '' #flow_id_converted = str(agent_id) + '/flows/' + str(flow_id) - if flow_id in flows_map.keys(): - return flows_map[flow_id] + if flow_id in self.flows_map.keys(): + return self.flows_map[flow_id] # TODO: Should throw error instead of returning default return 'Default Start Flow' # Note that flow id includes agent, normally... - def convert_page(self, page_id, flow_id, pages_map): + def convert_page(self, page_id, flow_id): if page_id == 'END_SESSION': return 'End Session' elif page_id == 'END_FLOW': @@ -85,9 +85,9 @@ def convert_page(self, page_id, flow_id, pages_map): elif page_id == 'START_PAGE': return 'Start' page_id_converted = str(flow_id) + '/pages/' + str(page_id) - if flow_id in pages_map.keys(): - if page_id_converted in pages_map[flow_id].keys(): - return pages_map[flow_id][page_id_converted] + if flow_id in self.pages_map.keys(): + if page_id_converted in self.pages_map[flow_id].keys(): + return self.pages_map[flow_id][page_id_converted] else: # TODO: Should throw error instead of returning default return 'Start' @@ -122,8 +122,8 @@ def get_test_case_results(self, retest_all=False): short_ids.append(response.name.split('/')[-1]) tags.append(','.join(response.tags)) creation_times.append(response.creation_time) - flows.append(convert_flow(response.test_config.flow, self.agent_id, self.flows_map)) - pages.append(convert_page(response.test_config.page, response.test_config.flow, self.pages_map)) + flows.append(convert_flow(response.test_config.flow)) + pages.append(convert_page(response.test_config.page, response.test_config.flow)) test_results.append(str(response.last_test_result.test_result)) test_times.append(response.last_test_result.test_time) passed.append(str(response.last_test_result.test_result) == 'TestResult.PASSED') From c6b3271da6f35b9e24c3adad248520a41530f488 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 9 Dec 2022 18:12:07 +0000 Subject: [PATCH 046/151] Add docstrings --- src/dfcx_scrapi/tools/agent_checker_util.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index ec1cdfb1..cad18f76 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -62,12 +62,14 @@ def __init__( self.route_groups_map[flow_id] = self.route_groups.get_route_groups_map(flow_id) def convert_intent(self, intent_id): + """Gets an intent display name from an intent ID""" intent_id_converted = str(self.agent_id) + '/intents/' + str(intent_id) if intent_id_converted in self.intents_map.keys(): return self.intents_map[intent_id_converted] return '' def convert_flow(self, flow_id): + """Gets a flow display name from a flow ID""" if flow_id.split('/')[-1] == '-': return '' #flow_id_converted = str(agent_id) + '/flows/' + str(flow_id) @@ -78,6 +80,7 @@ def convert_flow(self, flow_id): # Note that flow id includes agent, normally... def convert_page(self, page_id, flow_id): + """Gets a page display name from a page and flow ID""" if page_id == 'END_SESSION': return 'End Session' elif page_id == 'END_FLOW': @@ -95,7 +98,23 @@ def convert_page(self, page_id, flow_id): # TODO: Should throw error, but returning this probably will anyway return 'Invalid' + # TODO: Should this function be in the base test_cases class, + # as get_test_case_results_df or something? def get_test_case_results(self, retest_all=False): + """Gets the test case results for this agent, + and generates a dataframe with their details. + Any tests without a result will be run in a batch. + + Args: + retest_all: if true, all test cases are re-run, + regardless of whether or not they had a result + + Returns: + DataFrame of test case results for this agent, with columns: + display_name, id, short_id (excluding agent ID), + tags (comma-separated string), creation_time, + start_flow, start_page, passed, test_time + """ test_case_results = dfcx_tc.list_test_cases(self.agent_id) retest = [] retest_names = [] From da3a4f1c2168659f0fd37362a034a2235b90c6d5 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 9 Dec 2022 18:18:48 +0000 Subject: [PATCH 047/151] Include imports --- src/dfcx_scrapi/tools/agent_checker_util.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index cad18f76..df8664a4 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -14,7 +14,17 @@ # See the License for the specific language governing permissions and # limitations under the License. - +import logging +from typing import Dict, List + +from dfcx_scrapi.core.scrapi_base import ScrapiBase +from dfcx_scrapi.core.intents import Intents +from dfcx_scrapi.core.entity_types import EntityTypes +from dfcx_scrapi.core.flows import Flows +from dfcx_scrapi.core.pages import Pages +from dfcx_scrapi.core.webhooks import Webhooks +from dfcx_scrapi.core.transition_route_groups import TransitionRouteGroups +from dfcx_scrapi.core.test_cases import TestCases # logging config logging.basicConfig( From 8a4d7302f00d6c2df5ad9da96685401d894bbc0a Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 9 Dec 2022 18:33:15 +0000 Subject: [PATCH 048/151] Fix references and missing imports --- src/dfcx_scrapi/tools/agent_checker_util.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index df8664a4..78878467 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -16,6 +16,7 @@ import logging from typing import Dict, List +import pandas as pd from dfcx_scrapi.core.scrapi_base import ScrapiBase from dfcx_scrapi.core.intents import Intents @@ -51,6 +52,8 @@ def __init__( ) self.agent_id = agent_id + if not self.agent_id: + self.intents = Intents(creds=self.creds, agent_id=self.agent_id) self.entities = EntityTypes(creds=self.creds, agent_id=self.agent_id) @@ -62,14 +65,14 @@ def __init__( self.test_cases = TestCases(creds=self.creds, agent_id=self.agent_id) # Generate maps - self.intents_map = self.intents.get_intents_map(self.agent_id) - self.flows_map = self.flows.get_flows_map(self.agent_id) + self.intents_map = self.intents.get_intents_map(agent_id=self.agent_id) + self.flows_map = self.flows.get_flows_map(agent_id=self.agent_id) self.pages_map = {} for flow_id in self.flows_map.keys(): - self.pages_map[flow_id] = self.pages.get_pages_map(flow_id) + self.pages_map[flow_id] = self.pages.get_pages_map(flow_id=flow_id) self.route_groups_map = {} for flow_id in self.flows_map.keys(): - self.route_groups_map[flow_id] = self.route_groups.get_route_groups_map(flow_id) + self.route_groups_map[flow_id] = self.route_groups.get_route_groups_map(flow_id=flow_id) def convert_intent(self, intent_id): """Gets an intent display name from an intent ID""" @@ -125,7 +128,7 @@ def get_test_case_results(self, retest_all=False): tags (comma-separated string), creation_time, start_flow, start_page, passed, test_time """ - test_case_results = dfcx_tc.list_test_cases(self.agent_id) + test_case_results = self.test_cases.list_test_cases(self.agent_id) retest = [] retest_names = [] @@ -151,8 +154,8 @@ def get_test_case_results(self, retest_all=False): short_ids.append(response.name.split('/')[-1]) tags.append(','.join(response.tags)) creation_times.append(response.creation_time) - flows.append(convert_flow(response.test_config.flow)) - pages.append(convert_page(response.test_config.page, response.test_config.flow)) + flows.append(self.convert_flow(response.test_config.flow)) + pages.append(self.convert_page(response.test_config.page, response.test_config.flow)) test_results.append(str(response.last_test_result.test_result)) test_times.append(response.last_test_result.test_time) passed.append(str(response.last_test_result.test_result) == 'TestResult.PASSED') From d70e7c46933c621232e81774aa62bad55a78ae3f Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 9 Dec 2022 19:10:38 +0000 Subject: [PATCH 049/151] Create functions for finding reachable pages --- src/dfcx_scrapi/tools/agent_checker_util.py | 319 +++++++++++++++++++- 1 file changed, 318 insertions(+), 1 deletion(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 78878467..b501f618 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -53,7 +53,7 @@ def __init__( self.agent_id = agent_id if not self.agent_id: - + raise Exception("agent_id parameter is required") self.intents = Intents(creds=self.creds, agent_id=self.agent_id) self.entities = EntityTypes(creds=self.creds, agent_id=self.agent_id) @@ -67,6 +67,7 @@ def __init__( # Generate maps self.intents_map = self.intents.get_intents_map(agent_id=self.agent_id) self.flows_map = self.flows.get_flows_map(agent_id=self.agent_id) + self.flows_map_rev = self.flows.get_flows_map(agent_id=self.agent_id, reverse=True) self.pages_map = {} for flow_id in self.flows_map.keys(): self.pages_map[flow_id] = self.pages.get_pages_map(flow_id=flow_id) @@ -74,6 +75,8 @@ def __init__( for flow_id in self.flows_map.keys(): self.route_groups_map[flow_id] = self.route_groups.get_route_groups_map(flow_id=flow_id) + # Conversion utilities + def convert_intent(self, intent_id): """Gets an intent display name from an intent ID""" intent_id_converted = str(self.agent_id) + '/intents/' + str(intent_id) @@ -111,6 +114,8 @@ def convert_page(self, page_id, flow_id): # TODO: Should throw error, but returning this probably will anyway return 'Invalid' + # Test case results + # TODO: Should this function be in the base test_cases class, # as get_test_case_results_df or something? def get_test_case_results(self, retest_all=False): @@ -195,6 +200,318 @@ def get_test_case_results(self, retest_all=False): test_case_df = test_case_df.drop(columns=['test_result']) return test_case_df + # Test case comparison/report + + # Changelogs + + # Reachable and unreachable pages + + def find_reachable_pages_rec_helper(self, page: DFCXPage | DFCXFlow, route: DFCXRoute, reachable: List[str], conversation_path: List[str], min_intent_counts: List[int], presets: Dict[str,str], intent_route_count: int = 0, intent_route_limit: Optional[int] = None, include_groups: bool = True, include_start_page_routes: bool = True, limit_intent_to_initial: bool = False, is_initial: bool = False, include_meta: bool = False, verbose: bool = False) -> None: + # TODO: Docstring + target_page = route.target_page + target_flow = route.target_flow + if intent_route_limit is None or not hasattr(route, 'intent') or route.intent == '' or intent_route_count < intent_route_limit: + if hasattr(page, 'form'): + for parameter in page.form.parameters: + parameter_name = parameter.display_name + # Need to also account for parameters being set by intents (or by webhooks...) + if parameter_name not in presets.keys() or presets[parameter_name] == 'NULL': + # This page has an unfilled parameter + if limit_intent_to_initial and not is_initial: + return + if hasattr(route, 'intent') and route.intent != '': + if limit_intent_to_initial and not is_initial: + # Don't continue on this path + return + intent_route_count += 1 + if target_page in self.pages: + page_name = self.pages[target_page].display_name + if verbose: + print(page.display_name,'->',page_name) + # Move to this page (this is also the recursion limiting step to prevent infinite loops) + if page_name not in reachable: + reachable.append(page_name) + min_intent_counts.append(intent_route_count) + conversation_path.append(page_name) + if verbose: + print(conversation_path, intent_route_count) + + new_presets = presets.copy() + if hasattr(page, 'entry_fulfillment'): + if hasattr(page.entry_fulfillment, 'set_parameter_actions'): + for param_preset in page.entry_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value + if hasattr(page, 'form'): + for parameter in page.form.parameters: + if hasattr(parameter, 'fill_behavior'): + if hasattr(parameter.fill_behavior, 'initial_prompt_fulfillment'): + if hasattr(parameter.fill_behavior.initial_prompt_fulfillment, 'set_parameter_actions'): + for param_preset in parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value + if hasattr(route, 'trigger_fulfillment'): + if hasattr(route.trigger_fulfillment, 'set_parameter_actions'): + for param_preset in route.trigger_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value + + if hasattr(route, 'intent') and route.intent != '': + # Check the entities annotated on this intent + intent_name = self.intents_map[route.intent] + intent_params = self.get_intent_parameters(intent_name) + for param in intent_params: + new_presets[param.id] = f'(potentially set by {intent_name})' + + self.find_reachable_pages_rec(self.pages[target_page], reachable, conversation_path, min_intent_counts, new_presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=False, include_meta=include_meta, verbose=verbose) + conversation_path.pop(-1) + elif page_name in reachable and intent_route_count < min_intent_counts[reachable.index(page_name)]: + # Better route found, traverse from here + min_intent_counts[reachable.index(page_name)] = intent_route_count + conversation_path.append(page_name) + if verbose: + print(conversation_path, intent_route_count) + + new_presets = presets.copy() + if hasattr(page, 'entry_fulfillment'): + if hasattr(page.entry_fulfillment, 'set_parameter_actions'): + for param_preset in page.entry_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value + if hasattr(page, 'form'): + for parameter in page.form.parameters: + if hasattr(parameter, 'fill_behavior'): + if hasattr(parameter.fill_behavior, 'initial_prompt_fulfillment'): + if hasattr(parameter.fill_behavior.initial_prompt_fulfillment, 'set_parameter_actions'): + for param_preset in parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value + if hasattr(route, 'trigger_fulfillment'): + if hasattr(route.trigger_fulfillment, 'set_parameter_actions'): + for param_preset in route.trigger_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value + + if hasattr(route, 'intent') and route.intent != '': + # Check the entities annotated on this intent + intent_name = self.intents_map[route.intent] + intent_params = self.get_intent_parameters(intent_name) + for param in intent_params: + new_presets[param.id] = f'(potentially set by {intent_name})' + + self.find_reachable_pages_rec(self.pages[target_page], reachable, conversation_path, min_intent_counts, new_presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=False, include_meta=include_meta, verbose=verbose) + conversation_path.pop(-1) + elif 'END_FLOW' in target_page: + if verbose: + print(page.display_name,'-> END FLOW') + if include_meta: + page_name = 'END FLOW' + if page_name not in reachable: + reachable.append(page_name) + min_intent_counts.append(intent_route_count) + elif page_name in reachable and intent_route_count < min_intent_counts[reachable.index(page_name)]: + min_intent_counts[reachable.index(page_name)] = intent_route_count + #reachable.append('END FLOW') + elif 'END_SESSION' in target_page: + if verbose: + print(page.display_name,'-> END SESSION') + if include_meta: + page_name = 'END SESSION' + if page_name not in reachable: + reachable.append(page_name) + min_intent_counts.append(intent_route_count) + elif page_name in reachable and intent_route_count < min_intent_counts[reachable.index(page_name)]: + min_intent_counts[reachable.index(page_name)] = intent_route_count + #reachable.append('END SESSION') + elif 'CURRENT_PAGE' in target_page: + if verbose: + print(page.display_name,'-> CURRENT PAGE') + page_name = page.display_name + if page_name in reachable and intent_route_count < min_intent_counts[reachable.index(page_name)]: + min_intent_counts[reachable.index(page_name)] = intent_route_count + elif 'PREVIOUS_PAGE' in target_page: + if verbose: + print(page.display_name, '-> PREVIOUS PAGE') + if include_meta: + page_name = 'PREVIOUS PAGE' + if page_name not in reachable: + reachable.append(page_name) + min_intent_counts.append(intent_route_count) + elif page_name in reachable and intent_route_count < min_intent_counts[reachable.index(page_name)]: + min_intent_counts[reachable.index(page_name)] = intent_route_count + # TODO: This could cause huge problems... + elif 'START_PAGE' in target_page: + if verbose: + print(page.display_name, '-> START PAGE') + page_name = 'Start' + if page_name not in reachable: + reachable.append(page_name) + min_intent_counts.append(intent_route_count) + conversation_path.append(page_name) + if verbose: + print(conversation_path, intent_route_count) + + new_presets = presets.copy() + if hasattr(page, 'entry_fulfillment'): + if hasattr(page.entry_fulfillment, 'set_parameter_actions'): + for param_preset in page.entry_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value + if hasattr(page, 'form'): + for parameter in page.form.parameters: + if hasattr(parameter, 'fill_behavior'): + if hasattr(parameter.fill_behavior, 'initial_prompt_fulfillment'): + if hasattr(parameter.fill_behavior.initial_prompt_fulfillment, 'set_parameter_actions'): + for param_preset in parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value + if hasattr(route, 'trigger_fulfillment'): + if hasattr(route.trigger_fulfillment, 'set_parameter_actions'): + for param_preset in route.trigger_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value + + if hasattr(route, 'intent') and route.intent != '': + # Check the entities annotated on this intent + intent_name = self.intents_map[route.intent] + intent_params = self.get_intent_parameters(intent_name) + for param in intent_params: + new_presets[param.id] = f'(potentially set by {intent_name})' + + self.find_reachable_pages_rec(self.flow_data, reachable, conversation_path, min_intent_counts, new_presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=False, include_meta=include_meta, verbose=verbose) + conversation_path.pop(-1) + elif page_name in reachable and intent_route_count < min_intent_counts[reachable.index(page_name)]: + # Better route found, traverse from here + min_intent_counts[reachable.index(page_name)] = intent_route_count + conversation_path.append(page_name) + if verbose: + print(conversation_path, intent_route_count) + + new_presets = presets.copy() + if hasattr(page, 'entry_fulfillment'): + if hasattr(page.entry_fulfillment, 'set_parameter_actions'): + for param_preset in page.entry_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value + if hasattr(page, 'form'): + for parameter in page.form.parameters: + if hasattr(parameter, 'fill_behavior'): + if hasattr(parameter.fill_behavior, 'initial_prompt_fulfillment'): + if hasattr(parameter.fill_behavior.initial_prompt_fulfillment, 'set_parameter_actions'): + for param_preset in parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value + if hasattr(route, 'trigger_fulfillment'): + if hasattr(route.trigger_fulfillment, 'set_parameter_actions'): + for param_preset in route.trigger_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value + + if hasattr(route, 'intent') and route.intent != '': + # Check the entities annotated on this intent + intent_name = self.intents_map[route.intent] + intent_params = self.get_intent_parameters(intent_name) + for param in intent_params: + new_presets[param.id] = f'(potentially set by {intent_name})' + + self.find_reachable_pages_rec(self.flow_data, reachable, conversation_path, min_intent_counts, new_presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=False, include_meta=include_meta, verbose=verbose) + conversation_path.pop(-1) + elif len(target_page) > 0: + print(page.display_name,'->',target_page) + # This should not happen, and if it does it needs to be fixed + input() + elif len(target_flow) > 0: + flow_name = self.flows_map[route.target_flow] + if verbose: + print(page.display_name,'->',flow_name) + if flow_name not in reachable: + reachable.append(flow_name) + min_intent_counts.append(intent_route_count) + elif flow_name in reachable and intent_route_count < min_intent_counts[reachable.index(flow_name)]: + min_intent_counts[reachable.index(flow_name)] = intent_route_count + else: + if verbose: + print(page.display_name,'->',route.target_flow, '(empty)') + page_name = page.display_name + if page_name in reachable and intent_route_count < min_intent_counts[reachable.index(page_name)]: + min_intent_counts[reachable.index(page_name)] = intent_route_count + + def find_reachable_pages_rec(self, page: DFCXPage | DFCXFlow, reachable: List[str], conversation_path: List[str], min_intent_counts: List[int], presets: Dict[str,str], intent_route_count: int = 0, intent_route_limit: Optional[int] = None, include_groups: bool = True, include_start_page_routes: bool = True, limit_intent_to_initial: bool = False, is_initial: bool = False, include_meta: bool = False, verbose: bool = False) -> None: + # TODO: Docstring + if hasattr(page, 'form'): + for parameter in page.form.parameters: + for event_handler in parameter.fill_behavior.reprompt_event_handlers: + if limit_intent_to_initial and not is_initial: + continue + if hasattr(event_handler, 'target_page') or hasattr(event_handler, 'target_flow'): + self.find_reachable_pages_rec_helper(page, event_handler, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + for event_handler in page.event_handlers: + if limit_intent_to_initial and not is_initial: + continue + if hasattr(event_handler, 'target_page') or hasattr(event_handler, 'target_flow'): + self.find_reachable_pages_rec_helper(page, event_handler, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + for route in page.transition_routes: + self.find_reachable_pages_rec_helper(page, route, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + if include_groups: + for route_group in page.transition_route_groups: + for route in self.transition_route_groups[route_group].transition_routes: + self.find_reachable_pages_rec_helper(page, route, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + # Start page routes and route groups are also accessible from this page + if include_start_page_routes and page.display_name != self.flow_data.display_name and (not limit_intent_to_initial or is_initial): + for event_handler in self.flow_data.event_handlers: + if hasattr(event_handler, 'target_page') or hasattr(event_handler, 'target_flow'): + self.find_reachable_pages_rec_helper(self.flow_data, event_handler, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + for route in self.flow_data.transition_routes: + if hasattr(route, 'intent') and route.intent != '': + self.find_reachable_pages_rec_helper(self.flow_data, route, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + if include_groups: + for route_group in self.flow_data.transition_route_groups: + for route in self.transition_route_groups[route_group].transition_routes: + if hasattr(route, 'intent') and route.intent != '': + self.find_reachable_pages_rec_helper(self.flow_data, route, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + + def find_reachable_pages(self, flow_id: str, flow_name: str, from_page: str = 'Start', intent_route_limit: Optional[int] = None, include_groups: bool = True, include_start_page_routes: bool = True, limit_intent_to_initial: bool = False, is_initial: bool = True, include_meta: bool = False, verbose: bool = False) -> List[str]: + """Finds all pages which are reachable by transition routes, + starting from a given page in a given flow. Either flow_id or + flow_name must be used. + + Args: + flow_id: The ID of the flow to find reachable pages for + flow_name: The display name of the flow to find reachable pages for + from_page: (Optional) The page to start from. If left blank, it will start on the Start Page + intent_route_limit: (Optional) Default None + include_groups: (Optional) If true, intents from transition route groups will be included, + but only if they are actually referenced on some page + include_start_page_routes: (Optional) Default true + limit_intent_to_initial: (Optional) Default False + is_initial: (Optional) Default True + include_meta: (Optional) Default False + verbose: (Optional) If true, print debug information about route traversal + + Returns: + The list of reachable pages in this flow + """ + # Start at the start page... + reachable = [from_page] + conversation_path = [from_page] + min_intent_counts = [25] # Technically this could be [0] or [1], or very rarely more than 1, depending on the routes that lead to current page... + presets = {} + page_data = self.get_page(flow_id=flow_id, flow_name=flow_name, page_id=None, page_name=from_page) + self.find_reachable_pages_rec(page_data, reachable, conversation_path, min_intent_counts, presets, intent_route_count=0, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + return reachable + + def find_unreachable_pages(self, flow_id: str = None, flow_name: str = None, include_groups: bool = True, verbose: bool = False) -> List[str]: + """Finds all pages which are unreachable by transition routes, + starting from the start page of a given flow. Either flow_id or + flow_name must be used. + + Args: + flow_id: The ID of the flow to find unreachable pages for + flow_name: The display name of the flow to find unreachable pages for + include_groups: (Optional) If true, intents from transition route groups will be included, + but only if they are actually referenced on some page + verbose: (Optional) If true, print debug information about route traversal + + Returns: + The list of unreachable pages in this flow + """ + if not flow_id: + if not flow_name: + raise Exception("One of flow_id or flow_name must be set for find_unreachable_pages") + reachable = self.find_reachable_pages(flow_id, flow_name, include_groups=include_groups, verbose=verbose) + if flow_id: + return list(set(self.pages_map[self.flows_map_rev[flow_name]].keys()) - set(reachable)) + else: + return list(set(self.pages_map[self.flows_map[flow_id]].keys()) - set(reachable)) + """ TODO: Methods to implement: - Run test cases and store results, and give a report From 9c76cdfa66a013bc8e5d96e8d77a59ede8ecc8b3 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 9 Dec 2022 19:18:17 +0000 Subject: [PATCH 050/151] Fix tabbing and imports --- src/dfcx_scrapi/tools/agent_checker_util.py | 123 +++++++++++--------- 1 file changed, 66 insertions(+), 57 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index b501f618..3ea523cb 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -14,10 +14,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import logging -from typing import Dict, List +from typing import Dict, List, Optional import pandas as pd +import google.cloud.dialogflowcx_v3beta1.types as dfcx_types + from dfcx_scrapi.core.scrapi_base import ScrapiBase from dfcx_scrapi.core.intents import Intents from dfcx_scrapi.core.entity_types import EntityTypes @@ -27,6 +31,11 @@ from dfcx_scrapi.core.transition_route_groups import TransitionRouteGroups from dfcx_scrapi.core.test_cases import TestCases +# Type aliases +DFCXFlow = dfcx_types.flow.Flow +DFCXPage = dfcx_types.page.Page +DFCXRoute = dfcx_types.page.TransitionRoute + # logging config logging.basicConfig( level=logging.INFO, @@ -384,12 +393,12 @@ def find_reachable_pages_rec_helper(self, page: DFCXPage | DFCXFlow, route: DFCX for param_preset in page.entry_fulfillment.set_parameter_actions: new_presets[param_preset.parameter] = param_preset.value if hasattr(page, 'form'): - for parameter in page.form.parameters: - if hasattr(parameter, 'fill_behavior'): - if hasattr(parameter.fill_behavior, 'initial_prompt_fulfillment'): - if hasattr(parameter.fill_behavior.initial_prompt_fulfillment, 'set_parameter_actions'): - for param_preset in parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value + for parameter in page.form.parameters: + if hasattr(parameter, 'fill_behavior'): + if hasattr(parameter.fill_behavior, 'initial_prompt_fulfillment'): + if hasattr(parameter.fill_behavior.initial_prompt_fulfillment, 'set_parameter_actions'): + for param_preset in parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value if hasattr(route, 'trigger_fulfillment'): if hasattr(route.trigger_fulfillment, 'set_parameter_actions'): for param_preset in route.trigger_fulfillment.set_parameter_actions: @@ -459,58 +468,58 @@ def find_reachable_pages_rec(self, page: DFCXPage | DFCXFlow, reachable: List[st self.find_reachable_pages_rec_helper(self.flow_data, route, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) def find_reachable_pages(self, flow_id: str, flow_name: str, from_page: str = 'Start', intent_route_limit: Optional[int] = None, include_groups: bool = True, include_start_page_routes: bool = True, limit_intent_to_initial: bool = False, is_initial: bool = True, include_meta: bool = False, verbose: bool = False) -> List[str]: - """Finds all pages which are reachable by transition routes, - starting from a given page in a given flow. Either flow_id or - flow_name must be used. - - Args: - flow_id: The ID of the flow to find reachable pages for - flow_name: The display name of the flow to find reachable pages for - from_page: (Optional) The page to start from. If left blank, it will start on the Start Page - intent_route_limit: (Optional) Default None - include_groups: (Optional) If true, intents from transition route groups will be included, - but only if they are actually referenced on some page - include_start_page_routes: (Optional) Default true - limit_intent_to_initial: (Optional) Default False - is_initial: (Optional) Default True - include_meta: (Optional) Default False - verbose: (Optional) If true, print debug information about route traversal - - Returns: - The list of reachable pages in this flow - """ - # Start at the start page... - reachable = [from_page] - conversation_path = [from_page] - min_intent_counts = [25] # Technically this could be [0] or [1], or very rarely more than 1, depending on the routes that lead to current page... - presets = {} - page_data = self.get_page(flow_id=flow_id, flow_name=flow_name, page_id=None, page_name=from_page) - self.find_reachable_pages_rec(page_data, reachable, conversation_path, min_intent_counts, presets, intent_route_count=0, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) - return reachable + """Finds all pages which are reachable by transition routes, + starting from a given page in a given flow. Either flow_id or + flow_name must be used. + + Args: + flow_id: The ID of the flow to find reachable pages for + flow_name: The display name of the flow to find reachable pages for + from_page: (Optional) The page to start from. If left blank, it will start on the Start Page + intent_route_limit: (Optional) Default None + include_groups: (Optional) If true, intents from transition route groups will be included, + but only if they are actually referenced on some page + include_start_page_routes: (Optional) Default true + limit_intent_to_initial: (Optional) Default False + is_initial: (Optional) Default True + include_meta: (Optional) Default False + verbose: (Optional) If true, print debug information about route traversal + + Returns: + The list of reachable pages in this flow + """ + # Start at the start page... + reachable = [from_page] + conversation_path = [from_page] + min_intent_counts = [25] # Technically this could be [0] or [1], or very rarely more than 1, depending on the routes that lead to current page... + presets = {} + page_data = self.get_page(flow_id=flow_id, flow_name=flow_name, page_id=None, page_name=from_page) + self.find_reachable_pages_rec(page_data, reachable, conversation_path, min_intent_counts, presets, intent_route_count=0, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + return reachable def find_unreachable_pages(self, flow_id: str = None, flow_name: str = None, include_groups: bool = True, verbose: bool = False) -> List[str]: - """Finds all pages which are unreachable by transition routes, - starting from the start page of a given flow. Either flow_id or - flow_name must be used. - - Args: - flow_id: The ID of the flow to find unreachable pages for - flow_name: The display name of the flow to find unreachable pages for - include_groups: (Optional) If true, intents from transition route groups will be included, - but only if they are actually referenced on some page - verbose: (Optional) If true, print debug information about route traversal - - Returns: - The list of unreachable pages in this flow - """ - if not flow_id: - if not flow_name: - raise Exception("One of flow_id or flow_name must be set for find_unreachable_pages") - reachable = self.find_reachable_pages(flow_id, flow_name, include_groups=include_groups, verbose=verbose) - if flow_id: - return list(set(self.pages_map[self.flows_map_rev[flow_name]].keys()) - set(reachable)) - else: - return list(set(self.pages_map[self.flows_map[flow_id]].keys()) - set(reachable)) + """Finds all pages which are unreachable by transition routes, + starting from the start page of a given flow. Either flow_id or + flow_name must be used. + + Args: + flow_id: The ID of the flow to find unreachable pages for + flow_name: The display name of the flow to find unreachable pages for + include_groups: (Optional) If true, intents from transition route groups will be included, + but only if they are actually referenced on some page + verbose: (Optional) If true, print debug information about route traversal + + Returns: + The list of unreachable pages in this flow + """ + if not flow_id: + if not flow_name: + raise Exception("One of flow_id or flow_name must be set for find_unreachable_pages") + reachable = self.find_reachable_pages(flow_id, flow_name, include_groups=include_groups, verbose=verbose) + if flow_id: + return list(set(self.pages_map[self.flows_map_rev[flow_name]].keys()) - set(reachable)) + else: + return list(set(self.pages_map[self.flows_map[flow_id]].keys()) - set(reachable)) """ TODO: Methods to implement: From 5ac69a83c9b88062e6c4ec28542c943943707d00 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 9 Dec 2022 19:32:03 +0000 Subject: [PATCH 051/151] Implement get_page --- src/dfcx_scrapi/tools/agent_checker_util.py | 41 +++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 3ea523cb..4254cde2 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -123,6 +123,47 @@ def convert_page(self, page_id, flow_id): # TODO: Should throw error, but returning this probably will anyway return 'Invalid' + def get_page(self, flow_id: str = None, flow_name: str = None, page_id: str = None, page_name: str = None) -> DFCXPage | DFCXFlow: + """Gets the page data for a specified page within + a specified flow. The flow and page can be specified + by ID or by display name. + + Args: + flow_id OR flow_name: The ID or display name of the flow + page_id OR page_name: The ID or display name of the page + + Returns: + A DFCX Page object for this page, or DFCX Flow object if it's the start page + + Raises: + KeyError, if the page is not found + """ + if flow_id is None and flow_name is None: + raise Exception('Please specify a flow') + elif flow_name is not None: + if flow_name in self.flows_map_rev.keys(): + flow_id = self.flows_map_rev[flow_name] + else: + raise Exception(f'Flow not found: {flow_name}') + # Now that flow_id is set, look up the page + if page_id is None and page_name is None: + raise Exception('Please specify a page') + elif page_name is not None: + if page_name == 'Start': + return self.flow_data[flow_id] + if page_name in self.pages_map_rev[flow_id].keys(): + page_id = self.pages_map_rev[flow_id][page_name] + return self.page_data[flow_id][page_id] + else: + raise KeyError('Page not found. Did you forget "page_name="?') + else: + if 'START_PAGE' in page_id: + return self.flow_data[flow_id] + elif page_id not in self.pages_map[flow_id].keys(): + raise KeyError('Page not found.') + else: + return self.page_data[flow_id][page_id] + # Test case results # TODO: Should this function be in the base test_cases class, From a8efb12fcfaccd465d7af21b5a6d76be21cf1bda Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 9 Dec 2022 19:42:17 +0000 Subject: [PATCH 052/151] Get flow and page data --- src/dfcx_scrapi/tools/agent_checker_util.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 4254cde2..387a4a66 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -80,10 +80,22 @@ def __init__( self.pages_map = {} for flow_id in self.flows_map.keys(): self.pages_map[flow_id] = self.pages.get_pages_map(flow_id=flow_id) + self.pages_map_rev = {} + for flow_id in self.flows_map.keys(): + self.pages_map_rev[flow_id] = self.pages.get_pages_map(flow_id=flow_id, reverse=True) self.route_groups_map = {} for flow_id in self.flows_map.keys(): self.route_groups_map[flow_id] = self.route_groups.get_route_groups_map(flow_id=flow_id) + # Get flow and page data + self.flow_data = {} + for flow_id in self.flows_map.keys(): + self.flow_data[flow_id] = self.flows.get_flow(flow_id=flow_id) + self.page_data = {} + for flow_id in self.flows_map.keys(): + page_data_list = self.pages.list_pages(flow_id=flow_id) + self.page_data[flow_id] = {page.name: page for page in page_list} + # Conversion utilities def convert_intent(self, intent_id): From 8414538b537f2816e1de8a5952c90b81bfdfe206 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 9 Dec 2022 19:42:41 +0000 Subject: [PATCH 053/151] Fix page data --- src/dfcx_scrapi/tools/agent_checker_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 387a4a66..cf3e7d9d 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -93,7 +93,7 @@ def __init__( self.flow_data[flow_id] = self.flows.get_flow(flow_id=flow_id) self.page_data = {} for flow_id in self.flows_map.keys(): - page_data_list = self.pages.list_pages(flow_id=flow_id) + page_list = self.pages.list_pages(flow_id=flow_id) self.page_data[flow_id] = {page.name: page for page in page_list} # Conversion utilities From e158d6178d7ff5c8afc4953e8897380993848765 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 9 Dec 2022 20:46:43 +0000 Subject: [PATCH 054/151] Fix find_unreachable_pages function and dependencies --- src/dfcx_scrapi/tools/agent_checker_util.py | 92 ++++++++++++++------- 1 file changed, 62 insertions(+), 30 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index cf3e7d9d..278258f2 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -87,7 +87,8 @@ def __init__( for flow_id in self.flows_map.keys(): self.route_groups_map[flow_id] = self.route_groups.get_route_groups_map(flow_id=flow_id) - # Get flow and page data + # Get intent, flow, and page data + self.intent_data = self.intents.list_intents(agent_id=self.agent_id) self.flow_data = {} for flow_id in self.flows_map.keys(): self.flow_data[flow_id] = self.flows.get_flow(flow_id=flow_id) @@ -95,6 +96,10 @@ def __init__( for flow_id in self.flows_map.keys(): page_list = self.pages.list_pages(flow_id=flow_id) self.page_data[flow_id] = {page.name: page for page in page_list} + self.route_group_data = {} + for flow_id in self.flows_map.keys(): + route_group_list = self.route_groups.list_transition_route_groups(flow_id=flow_id) + self.route_group_data[flow_id] = {route_group.name: route_group for route_group in route_group_list} # Conversion utilities @@ -134,6 +139,11 @@ def convert_page(self, page_id, flow_id): print('Flow not found') # TODO: Should throw error, but returning this probably will anyway return 'Invalid' + + def get_intent_parameters(self, intent_name): + for intent in self.intent_data: + if intent.display_name == intent_name: + return intent.parameters def get_page(self, flow_id: str = None, flow_name: str = None, page_id: str = None, page_name: str = None) -> DFCXPage | DFCXFlow: """Gets the page data for a specified page within @@ -268,8 +278,10 @@ def get_test_case_results(self, retest_all=False): # Reachable and unreachable pages - def find_reachable_pages_rec_helper(self, page: DFCXPage | DFCXFlow, route: DFCXRoute, reachable: List[str], conversation_path: List[str], min_intent_counts: List[int], presets: Dict[str,str], intent_route_count: int = 0, intent_route_limit: Optional[int] = None, include_groups: bool = True, include_start_page_routes: bool = True, limit_intent_to_initial: bool = False, is_initial: bool = False, include_meta: bool = False, verbose: bool = False) -> None: + def find_reachable_pages_rec_helper(self, flow_id: str, flow_name: str, page: DFCXPage | DFCXFlow, route: DFCXRoute, reachable: List[str], conversation_path: List[str], min_intent_counts: List[int], presets: Dict[str,str], intent_route_count: int = 0, intent_route_limit: Optional[int] = None, include_groups: bool = True, include_start_page_routes: bool = True, limit_intent_to_initial: bool = False, is_initial: bool = False, include_meta: bool = False, verbose: bool = False) -> None: # TODO: Docstring + if not flow_name: + flow_name = self.flows_map[flow_id] target_page = route.target_page target_flow = route.target_flow if intent_route_limit is None or not hasattr(route, 'intent') or route.intent == '' or intent_route_count < intent_route_limit: @@ -286,8 +298,8 @@ def find_reachable_pages_rec_helper(self, page: DFCXPage | DFCXFlow, route: DFCX # Don't continue on this path return intent_route_count += 1 - if target_page in self.pages: - page_name = self.pages[target_page].display_name + if target_page in self.page_data[flow_id]: + page_name = self.page_data[flow_id][target_page].display_name if verbose: print(page.display_name,'->',page_name) # Move to this page (this is also the recursion limiting step to prevent infinite loops) @@ -322,7 +334,7 @@ def find_reachable_pages_rec_helper(self, page: DFCXPage | DFCXFlow, route: DFCX for param in intent_params: new_presets[param.id] = f'(potentially set by {intent_name})' - self.find_reachable_pages_rec(self.pages[target_page], reachable, conversation_path, min_intent_counts, new_presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=False, include_meta=include_meta, verbose=verbose) + self.find_reachable_pages_rec(flow_id, flow_name, self.page_data[flow_id][target_page], reachable, conversation_path, min_intent_counts, new_presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=False, include_meta=include_meta, verbose=verbose) conversation_path.pop(-1) elif page_name in reachable and intent_route_count < min_intent_counts[reachable.index(page_name)]: # Better route found, traverse from here @@ -355,7 +367,7 @@ def find_reachable_pages_rec_helper(self, page: DFCXPage | DFCXFlow, route: DFCX for param in intent_params: new_presets[param.id] = f'(potentially set by {intent_name})' - self.find_reachable_pages_rec(self.pages[target_page], reachable, conversation_path, min_intent_counts, new_presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=False, include_meta=include_meta, verbose=verbose) + self.find_reachable_pages_rec(flow_id, flow_name, self.page_data[flow_id][target_page], reachable, conversation_path, min_intent_counts, new_presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=False, include_meta=include_meta, verbose=verbose) conversation_path.pop(-1) elif 'END_FLOW' in target_page: if verbose: @@ -431,7 +443,7 @@ def find_reachable_pages_rec_helper(self, page: DFCXPage | DFCXFlow, route: DFCX for param in intent_params: new_presets[param.id] = f'(potentially set by {intent_name})' - self.find_reachable_pages_rec(self.flow_data, reachable, conversation_path, min_intent_counts, new_presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=False, include_meta=include_meta, verbose=verbose) + self.find_reachable_pages_rec(flow_id, flow_name, self.flow_data[flow_id], reachable, conversation_path, min_intent_counts, new_presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=False, include_meta=include_meta, verbose=verbose) conversation_path.pop(-1) elif page_name in reachable and intent_route_count < min_intent_counts[reachable.index(page_name)]: # Better route found, traverse from here @@ -464,7 +476,7 @@ def find_reachable_pages_rec_helper(self, page: DFCXPage | DFCXFlow, route: DFCX for param in intent_params: new_presets[param.id] = f'(potentially set by {intent_name})' - self.find_reachable_pages_rec(self.flow_data, reachable, conversation_path, min_intent_counts, new_presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=False, include_meta=include_meta, verbose=verbose) + self.find_reachable_pages_rec(flow_id, flow_name, self.flow_data[flow_id], reachable, conversation_path, min_intent_counts, new_presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=False, include_meta=include_meta, verbose=verbose) conversation_path.pop(-1) elif len(target_page) > 0: print(page.display_name,'->',target_page) @@ -486,39 +498,42 @@ def find_reachable_pages_rec_helper(self, page: DFCXPage | DFCXFlow, route: DFCX if page_name in reachable and intent_route_count < min_intent_counts[reachable.index(page_name)]: min_intent_counts[reachable.index(page_name)] = intent_route_count - def find_reachable_pages_rec(self, page: DFCXPage | DFCXFlow, reachable: List[str], conversation_path: List[str], min_intent_counts: List[int], presets: Dict[str,str], intent_route_count: int = 0, intent_route_limit: Optional[int] = None, include_groups: bool = True, include_start_page_routes: bool = True, limit_intent_to_initial: bool = False, is_initial: bool = False, include_meta: bool = False, verbose: bool = False) -> None: + def find_reachable_pages_rec(self, flow_id: str, flow_name: str, page: DFCXPage | DFCXFlow, reachable: List[str], conversation_path: List[str], min_intent_counts: List[int], presets: Dict[str,str], intent_route_count: int = 0, intent_route_limit: Optional[int] = None, include_groups: bool = True, include_start_page_routes: bool = True, limit_intent_to_initial: bool = False, is_initial: bool = False, include_meta: bool = False, verbose: bool = False) -> None: # TODO: Docstring + if not flow_name: + flow_name = self.flows_map[flow_id] if hasattr(page, 'form'): for parameter in page.form.parameters: for event_handler in parameter.fill_behavior.reprompt_event_handlers: if limit_intent_to_initial and not is_initial: continue if hasattr(event_handler, 'target_page') or hasattr(event_handler, 'target_flow'): - self.find_reachable_pages_rec_helper(page, event_handler, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + self.find_reachable_pages_rec_helper(flow_id, flow_name, page, event_handler, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) for event_handler in page.event_handlers: if limit_intent_to_initial and not is_initial: continue if hasattr(event_handler, 'target_page') or hasattr(event_handler, 'target_flow'): - self.find_reachable_pages_rec_helper(page, event_handler, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + self.find_reachable_pages_rec_helper(flow_id, flow_name, page, event_handler, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) for route in page.transition_routes: - self.find_reachable_pages_rec_helper(page, route, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + self.find_reachable_pages_rec_helper(flow_id, flow_name, page, route, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) if include_groups: for route_group in page.transition_route_groups: - for route in self.transition_route_groups[route_group].transition_routes: - self.find_reachable_pages_rec_helper(page, route, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + # TODO: Need to map by flow + for route in self.route_group_data[flow_id][route_group].transition_routes: + self.find_reachable_pages_rec_helper(flow_id, flow_name, page, route, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) # Start page routes and route groups are also accessible from this page - if include_start_page_routes and page.display_name != self.flow_data.display_name and (not limit_intent_to_initial or is_initial): - for event_handler in self.flow_data.event_handlers: + if include_start_page_routes and page.display_name != flow_name and (not limit_intent_to_initial or is_initial): + for event_handler in self.flow_data[flow_id].event_handlers: if hasattr(event_handler, 'target_page') or hasattr(event_handler, 'target_flow'): - self.find_reachable_pages_rec_helper(self.flow_data, event_handler, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) - for route in self.flow_data.transition_routes: + self.find_reachable_pages_rec_helper(flow_id, flow_name, self.flow_data[flow_id], event_handler, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + for route in self.flow_data[flow_id].transition_routes: if hasattr(route, 'intent') and route.intent != '': - self.find_reachable_pages_rec_helper(self.flow_data, route, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + self.find_reachable_pages_rec_helper(flow_id, flow_name, self.flow_data[flow_id], route, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) if include_groups: - for route_group in self.flow_data.transition_route_groups: - for route in self.transition_route_groups[route_group].transition_routes: + for route_group in self.flow_data[flow_id].transition_route_groups: + for route in self.route_group_data[flow_id][route_group].transition_routes: if hasattr(route, 'intent') and route.intent != '': - self.find_reachable_pages_rec_helper(self.flow_data, route, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + self.find_reachable_pages_rec_helper(flow_id, flow_name, self.flow_data[flow_id], route, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) def find_reachable_pages(self, flow_id: str, flow_name: str, from_page: str = 'Start', intent_route_limit: Optional[int] = None, include_groups: bool = True, include_start_page_routes: bool = True, limit_intent_to_initial: bool = False, is_initial: bool = True, include_meta: bool = False, verbose: bool = False) -> List[str]: """Finds all pages which are reachable by transition routes, @@ -526,8 +541,7 @@ def find_reachable_pages(self, flow_id: str, flow_name: str, from_page: str = 'S flow_name must be used. Args: - flow_id: The ID of the flow to find reachable pages for - flow_name: The display name of the flow to find reachable pages for + flow_id OR flow_name: The ID or name of the flow to find reachable pages for from_page: (Optional) The page to start from. If left blank, it will start on the Start Page intent_route_limit: (Optional) Default None include_groups: (Optional) If true, intents from transition route groups will be included, @@ -541,13 +555,25 @@ def find_reachable_pages(self, flow_id: str, flow_name: str, from_page: str = 'S Returns: The list of reachable pages in this flow """ + if not flow_id: + if not flow_name: + raise Exception("One of flow_id or flow_name must be set") + if flow_name in self.flows_map_rev.keys(): + flow_id = self.flows_map_rev[flow_name] + else: + raise Exception(f"Flow not found: {flow_name}") + if flow_id in self.flows_map.keys(): + flow_name = self.flows_map[flow_id] + else: + raise Exception(f'Flow not found: {flow_id}') + # Start at the start page... reachable = [from_page] conversation_path = [from_page] min_intent_counts = [25] # Technically this could be [0] or [1], or very rarely more than 1, depending on the routes that lead to current page... presets = {} page_data = self.get_page(flow_id=flow_id, flow_name=flow_name, page_id=None, page_name=from_page) - self.find_reachable_pages_rec(page_data, reachable, conversation_path, min_intent_counts, presets, intent_route_count=0, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + self.find_reachable_pages_rec(flow_id, flow_name, page_data, reachable, conversation_path, min_intent_counts, presets, intent_route_count=0, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) return reachable def find_unreachable_pages(self, flow_id: str = None, flow_name: str = None, include_groups: bool = True, verbose: bool = False) -> List[str]: @@ -567,12 +593,18 @@ def find_unreachable_pages(self, flow_id: str = None, flow_name: str = None, inc """ if not flow_id: if not flow_name: - raise Exception("One of flow_id or flow_name must be set for find_unreachable_pages") - reachable = self.find_reachable_pages(flow_id, flow_name, include_groups=include_groups, verbose=verbose) - if flow_id: - return list(set(self.pages_map[self.flows_map_rev[flow_name]].keys()) - set(reachable)) + raise Exception("One of flow_id or flow_name must be set") + if flow_name in self.flows_map_rev.keys(): + flow_id = self.flows_map_rev[flow_name] + else: + raise Exception(f"Flow not found: {flow_name}") + if flow_id in self.flows_map.keys(): + flow_name = self.flows_map[flow_id] else: - return list(set(self.pages_map[self.flows_map[flow_id]].keys()) - set(reachable)) + raise Exception(f'Flow not found: {flow_id}') + + reachable = self.find_reachable_pages(flow_id, flow_name, include_groups=include_groups, verbose=verbose) + return list(set(self.pages_map[flow_id].values()) - set(reachable)) """ TODO: Methods to implement: From 0bda9bb8003c43f5180c9b93725955b5a53b3987 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 9 Dec 2022 21:12:22 +0000 Subject: [PATCH 055/151] Clean up agent_checker_util.py --- src/dfcx_scrapi/tools/agent_checker_util.py | 867 +++++++++++++++----- 1 file changed, 650 insertions(+), 217 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 278258f2..40e90a91 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -39,19 +39,21 @@ # logging config logging.basicConfig( level=logging.INFO, - format='%(asctime)s %(levelname)-8s %(message)s', - datefmt='%Y-%m-%d %H:%M:%S', + format="%(asctime)s %(levelname)-8s %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", ) + class AgentCheckerUtil(ScrapiBase): """Utility class for checking DFCX Agents.""" + def __init__( self, creds_path: str = None, creds_dict: Dict = None, - creds = None, - scope = False, - agent_id: str = None + creds=None, + scope=False, + agent_id: str = None, ): super().__init__( creds_path=creds_path, @@ -70,22 +72,29 @@ def __init__( self.pages = Pages(creds=self.creds) self.webhooks = Webhooks(creds=self.creds, agent_id=self.agent_id) self.route_groups = TransitionRouteGroups( - creds=self.creds, agent_id=self.agent_id) + creds=self.creds, agent_id=self.agent_id + ) self.test_cases = TestCases(creds=self.creds, agent_id=self.agent_id) # Generate maps self.intents_map = self.intents.get_intents_map(agent_id=self.agent_id) self.flows_map = self.flows.get_flows_map(agent_id=self.agent_id) - self.flows_map_rev = self.flows.get_flows_map(agent_id=self.agent_id, reverse=True) + self.flows_map_rev = self.flows.get_flows_map( + agent_id=self.agent_id, reverse=True + ) self.pages_map = {} for flow_id in self.flows_map.keys(): self.pages_map[flow_id] = self.pages.get_pages_map(flow_id=flow_id) self.pages_map_rev = {} for flow_id in self.flows_map.keys(): - self.pages_map_rev[flow_id] = self.pages.get_pages_map(flow_id=flow_id, reverse=True) + self.pages_map_rev[flow_id] = self.pages.get_pages_map( + flow_id=flow_id, reverse=True + ) self.route_groups_map = {} for flow_id in self.flows_map.keys(): - self.route_groups_map[flow_id] = self.route_groups.get_route_groups_map(flow_id=flow_id) + self.route_groups_map[flow_id] = self.route_groups.get_route_groups_map( + flow_id=flow_id + ) # Get intent, flow, and page data self.intent_data = self.intents.list_intents(agent_id=self.agent_id) @@ -98,80 +107,91 @@ def __init__( self.page_data[flow_id] = {page.name: page for page in page_list} self.route_group_data = {} for flow_id in self.flows_map.keys(): - route_group_list = self.route_groups.list_transition_route_groups(flow_id=flow_id) - self.route_group_data[flow_id] = {route_group.name: route_group for route_group in route_group_list} + route_group_list = self.route_groups.list_transition_route_groups( + flow_id=flow_id + ) + self.route_group_data[flow_id] = { + route_group.name: route_group for route_group in route_group_list + } # Conversion utilities - - def convert_intent(self, intent_id): + + def _convert_intent(self, intent_id): """Gets an intent display name from an intent ID""" - intent_id_converted = str(self.agent_id) + '/intents/' + str(intent_id) + intent_id_converted = str(self.agent_id) + "/intents/" + str(intent_id) if intent_id_converted in self.intents_map.keys(): return self.intents_map[intent_id_converted] - return '' + return "" - def convert_flow(self, flow_id): + def _convert_flow(self, flow_id): """Gets a flow display name from a flow ID""" - if flow_id.split('/')[-1] == '-': - return '' - #flow_id_converted = str(agent_id) + '/flows/' + str(flow_id) + if flow_id.split("/")[-1] == "-": + return "" + # flow_id_converted = str(agent_id) + '/flows/' + str(flow_id) if flow_id in self.flows_map.keys(): return self.flows_map[flow_id] # TODO: Should throw error instead of returning default - return 'Default Start Flow' + return "Default Start Flow" # Note that flow id includes agent, normally... - def convert_page(self, page_id, flow_id): + def _convert_page(self, page_id, flow_id): """Gets a page display name from a page and flow ID""" - if page_id == 'END_SESSION': - return 'End Session' - elif page_id == 'END_FLOW': - return 'End Flow' - elif page_id == 'START_PAGE': - return 'Start' - page_id_converted = str(flow_id) + '/pages/' + str(page_id) + if page_id == "END_SESSION": + return "End Session" + elif page_id == "END_FLOW": + return "End Flow" + elif page_id == "START_PAGE": + return "Start" + page_id_converted = str(flow_id) + "/pages/" + str(page_id) if flow_id in self.pages_map.keys(): if page_id_converted in self.pages_map[flow_id].keys(): return self.pages_map[flow_id][page_id_converted] else: # TODO: Should throw error instead of returning default - return 'Start' - print('Flow not found') + return "Start" + print("Flow not found") # TODO: Should throw error, but returning this probably will anyway - return 'Invalid' + return "Invalid" - def get_intent_parameters(self, intent_name): + def _get_intent_parameters(self, intent_name): + """Gets the parameters for a particular intent, by display name""" for intent in self.intent_data: if intent.display_name == intent_name: return intent.parameters - - def get_page(self, flow_id: str = None, flow_name: str = None, page_id: str = None, page_name: str = None) -> DFCXPage | DFCXFlow: + + def _get_page( + self, + flow_id: str = None, + flow_name: str = None, + page_id: str = None, + page_name: str = None, + ) -> DFCXPage | DFCXFlow: """Gets the page data for a specified page within a specified flow. The flow and page can be specified by ID or by display name. - + Args: - flow_id OR flow_name: The ID or display name of the flow + flow_id OR flow_name: The ID or display name of the flow page_id OR page_name: The ID or display name of the page - + Returns: A DFCX Page object for this page, or DFCX Flow object if it's the start page - + Raises: KeyError, if the page is not found """ if flow_id is None and flow_name is None: - raise Exception('Please specify a flow') + raise Exception("Please specify a flow") elif flow_name is not None: if flow_name in self.flows_map_rev.keys(): flow_id = self.flows_map_rev[flow_name] else: - raise Exception(f'Flow not found: {flow_name}') + raise Exception(f"Flow not found: {flow_name}") # Now that flow_id is set, look up the page if page_id is None and page_name is None: - raise Exception('Please specify a page') + raise Exception("Please specify a page") elif page_name is not None: - if page_name == 'Start': + if page_name == "Start": return self.flow_data[flow_id] if page_name in self.pages_map_rev[flow_id].keys(): page_id = self.pages_map_rev[flow_id][page_name] @@ -179,29 +199,29 @@ def get_page(self, flow_id: str = None, flow_name: str = None, page_id: str = No else: raise KeyError('Page not found. Did you forget "page_name="?') else: - if 'START_PAGE' in page_id: + if "START_PAGE" in page_id: return self.flow_data[flow_id] elif page_id not in self.pages_map[flow_id].keys(): - raise KeyError('Page not found.') + raise KeyError("Page not found.") else: return self.page_data[flow_id][page_id] - + # Test case results - - # TODO: Should this function be in the base test_cases class, + + # TODO: Should this function be in the base test_cases class, # as get_test_case_results_df or something? def get_test_case_results(self, retest_all=False): """Gets the test case results for this agent, and generates a dataframe with their details. Any tests without a result will be run in a batch. - + Args: retest_all: if true, all test cases are re-run, regardless of whether or not they had a result - + Returns: DataFrame of test case results for this agent, with columns: - display_name, id, short_id (excluding agent ID), + display_name, id, short_id (excluding agent ID), tags (comma-separated string), creation_time, start_flow, start_page, passed, test_time """ @@ -221,79 +241,125 @@ def get_test_case_results(self, retest_all=False): passed = [] for response in test_case_results: - # Collect untested cases to be retested (or all if retest_all is True) - if retest_all or str(response.last_test_result.test_result) == 'TestResult.TEST_RESULT_UNSPECIFIED': + # Collect untested cases to be retested + # (or all if retest_all is True) + if ( + retest_all + or str(response.last_test_result.test_result) + == "TestResult.TEST_RESULT_UNSPECIFIED" + ): retest.append(response.name) retest_names.append(response.display_name) # Collect additional information for dataframe display_names.append(response.display_name) ids.append(response.name) - short_ids.append(response.name.split('/')[-1]) - tags.append(','.join(response.tags)) + short_ids.append(response.name.split("/")[-1]) + tags.append(",".join(response.tags)) creation_times.append(response.creation_time) - flows.append(self.convert_flow(response.test_config.flow)) - pages.append(self.convert_page(response.test_config.page, response.test_config.flow)) + flows.append(self._convert_flow(response.test_config.flow)) + pages.append( + self._convert_page(response.test_config.page, response.test_config.flow) + ) test_results.append(str(response.last_test_result.test_result)) test_times.append(response.last_test_result.test_time) - passed.append(str(response.last_test_result.test_result) == 'TestResult.PASSED') + passed.append( + str(response.last_test_result.test_result) == "TestResult.PASSED" + ) # Create dataframe - test_case_df = pd.DataFrame({ - 'display_name': display_names, - 'id': ids, - 'short_id': short_ids, - 'tags': tags, - 'creation_time': creation_times, - 'start_flow': flows, - 'start_page': pages, - 'test_result': test_results, - 'passed': passed, - 'test_time': test_times}) + test_case_df = pd.DataFrame( + { + "display_name": display_names, + "id": ids, + "short_id": short_ids, + "tags": tags, + "creation_time": creation_times, + "start_flow": flows, + "start_page": pages, + "test_result": test_results, + "passed": passed, + "test_time": test_times, + } + ) # Retest any that haven't been run yet - print('To retest:', len(retest)) + print("To retest:", len(retest)) if len(retest) > 0: response = self.test_cases.batch_run_test_cases(retest, self.agent_id) for result in response.results: # Results may not be in the same order as they went in (oh well) # Process the name a bit to remove the /results/id part at the end. - testCaseId_full = '/'.join(result.name.split('/')[:-2]) - index = retest.index(testCaseId_full) - testCaseId = testCaseId_full.split('/')[-1] + testCaseId_full = "/".join(result.name.split("/")[:-2]) + testCaseId = testCaseId_full.split("/")[-1] # Update dataframe where id = testcaseId_full - #row = test_case_df.loc[test_case_df['id'] == testCaseId_full] - test_case_df.loc[test_case_df['id'] == testCaseId_full, 'short_id'] = testCaseId - test_case_df.loc[test_case_df['id'] == testCaseId_full, 'test_result'] = str(result.test_result) - test_case_df.loc[test_case_df['id'] == testCaseId_full, 'test_time'] = result.test_time - test_case_df.loc[test_case_df['id'] == testCaseId_full, 'passed'] = str(result.test_result) == 'TestResult.PASSED' + # row = test_case_df.loc[test_case_df['id'] == testCaseId_full] + test_case_df.loc[ + test_case_df["id"] == testCaseId_full, "short_id" + ] = testCaseId + test_case_df.loc[ + test_case_df["id"] == testCaseId_full, "test_result" + ] = str(result.test_result) + test_case_df.loc[ + test_case_df["id"] == testCaseId_full, "test_time" + ] = result.test_time + test_case_df.loc[test_case_df["id"] == testCaseId_full, "passed"] = ( + str(result.test_result) == "TestResult.PASSED" + ) # This column is redundant, since we have passed (bool) - test_case_df = test_case_df.drop(columns=['test_result']) + test_case_df = test_case_df.drop(columns=["test_result"]) return test_case_df - + # Test case comparison/report - + # Changelogs - + # Reachable and unreachable pages - - def find_reachable_pages_rec_helper(self, flow_id: str, flow_name: str, page: DFCXPage | DFCXFlow, route: DFCXRoute, reachable: List[str], conversation_path: List[str], min_intent_counts: List[int], presets: Dict[str,str], intent_route_count: int = 0, intent_route_limit: Optional[int] = None, include_groups: bool = True, include_start_page_routes: bool = True, limit_intent_to_initial: bool = False, is_initial: bool = False, include_meta: bool = False, verbose: bool = False) -> None: - # TODO: Docstring + + def _find_reachable_pages_rec_helper( + self, + flow_id: str, + flow_name: str, + page: DFCXPage | DFCXFlow, + route: DFCXRoute, + reachable: List[str], + conversation_path: List[str], + min_intent_counts: List[int], + presets: Dict[str, str], + intent_route_count: int = 0, + intent_route_limit: Optional[int] = None, + include_groups: bool = True, + include_start_page_routes: bool = True, + limit_intent_to_initial: bool = False, + is_initial: bool = False, + include_meta: bool = False, + verbose: bool = False, + ) -> None: + """Helper function for the recursion involved in finding reachable pages""" if not flow_name: flow_name = self.flows_map[flow_id] target_page = route.target_page target_flow = route.target_flow - if intent_route_limit is None or not hasattr(route, 'intent') or route.intent == '' or intent_route_count < intent_route_limit: - if hasattr(page, 'form'): + if ( + intent_route_limit is None + or not hasattr(route, "intent") + or route.intent == "" + or intent_route_count < intent_route_limit + ): + if hasattr(page, "form"): for parameter in page.form.parameters: parameter_name = parameter.display_name - # Need to also account for parameters being set by intents (or by webhooks...) - if parameter_name not in presets.keys() or presets[parameter_name] == 'NULL': + # Need to also account for parameters being + # set by intents (or by webhooks...) + if ( + parameter_name not in presets.keys() + or presets[parameter_name] == "NULL" + ): # This page has an unfilled parameter if limit_intent_to_initial and not is_initial: return - if hasattr(route, 'intent') and route.intent != '': + if hasattr(route, "intent") and route.intent != "": if limit_intent_to_initial and not is_initial: # Don't continue on this path return @@ -301,8 +367,9 @@ def find_reachable_pages_rec_helper(self, flow_id: str, flow_name: str, page: DF if target_page in self.page_data[flow_id]: page_name = self.page_data[flow_id][target_page].display_name if verbose: - print(page.display_name,'->',page_name) - # Move to this page (this is also the recursion limiting step to prevent infinite loops) + print(page.display_name, "->", page_name) + # Move to this page (this is also the recursion limiting step + # to prevent infinite loops) if page_name not in reachable: reachable.append(page_name) min_intent_counts.append(intent_route_count) @@ -311,32 +378,70 @@ def find_reachable_pages_rec_helper(self, flow_id: str, flow_name: str, page: DF print(conversation_path, intent_route_count) new_presets = presets.copy() - if hasattr(page, 'entry_fulfillment'): - if hasattr(page.entry_fulfillment, 'set_parameter_actions'): - for param_preset in page.entry_fulfillment.set_parameter_actions: + if hasattr(page, "entry_fulfillment"): + if hasattr(page.entry_fulfillment, "set_parameter_actions"): + for ( + param_preset + ) in page.entry_fulfillment.set_parameter_actions: new_presets[param_preset.parameter] = param_preset.value - if hasattr(page, 'form'): + if hasattr(page, "form"): for parameter in page.form.parameters: - if hasattr(parameter, 'fill_behavior'): - if hasattr(parameter.fill_behavior, 'initial_prompt_fulfillment'): - if hasattr(parameter.fill_behavior.initial_prompt_fulfillment, 'set_parameter_actions'): - for param_preset in parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value - if hasattr(route, 'trigger_fulfillment'): - if hasattr(route.trigger_fulfillment, 'set_parameter_actions'): - for param_preset in route.trigger_fulfillment.set_parameter_actions: + if hasattr(parameter, "fill_behavior"): + if hasattr( + parameter.fill_behavior, + "initial_prompt_fulfillment", + ): + if hasattr( + parameter.fill_behavior.initial_prompt_fulfillment, + "set_parameter_actions", + ): + for ( + param_preset + ) in ( + parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions + ): + new_presets[ + param_preset.parameter + ] = param_preset.value + if hasattr(route, "trigger_fulfillment"): + if hasattr(route.trigger_fulfillment, "set_parameter_actions"): + for ( + param_preset + ) in route.trigger_fulfillment.set_parameter_actions: new_presets[param_preset.parameter] = param_preset.value - if hasattr(route, 'intent') and route.intent != '': + if hasattr(route, "intent") and route.intent != "": # Check the entities annotated on this intent intent_name = self.intents_map[route.intent] - intent_params = self.get_intent_parameters(intent_name) + intent_params = self._get_intent_parameters(intent_name) for param in intent_params: - new_presets[param.id] = f'(potentially set by {intent_name})' + new_presets[ + param.id + ] = f"(potentially set by {intent_name})" - self.find_reachable_pages_rec(flow_id, flow_name, self.page_data[flow_id][target_page], reachable, conversation_path, min_intent_counts, new_presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=False, include_meta=include_meta, verbose=verbose) + self._find_reachable_pages_rec( + flow_id, + flow_name, + self.page_data[flow_id][target_page], + reachable, + conversation_path, + min_intent_counts, + new_presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=False, + include_meta=include_meta, + verbose=verbose, + ) conversation_path.pop(-1) - elif page_name in reachable and intent_route_count < min_intent_counts[reachable.index(page_name)]: + elif ( + page_name in reachable + and intent_route_count + < min_intent_counts[reachable.index(page_name)] + ): # Better route found, traverse from here min_intent_counts[reachable.index(page_name)] = intent_route_count conversation_path.append(page_name) @@ -344,74 +449,130 @@ def find_reachable_pages_rec_helper(self, flow_id: str, flow_name: str, page: DF print(conversation_path, intent_route_count) new_presets = presets.copy() - if hasattr(page, 'entry_fulfillment'): - if hasattr(page.entry_fulfillment, 'set_parameter_actions'): - for param_preset in page.entry_fulfillment.set_parameter_actions: + if hasattr(page, "entry_fulfillment"): + if hasattr(page.entry_fulfillment, "set_parameter_actions"): + for ( + param_preset + ) in page.entry_fulfillment.set_parameter_actions: new_presets[param_preset.parameter] = param_preset.value - if hasattr(page, 'form'): + if hasattr(page, "form"): for parameter in page.form.parameters: - if hasattr(parameter, 'fill_behavior'): - if hasattr(parameter.fill_behavior, 'initial_prompt_fulfillment'): - if hasattr(parameter.fill_behavior.initial_prompt_fulfillment, 'set_parameter_actions'): - for param_preset in parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value - if hasattr(route, 'trigger_fulfillment'): - if hasattr(route.trigger_fulfillment, 'set_parameter_actions'): - for param_preset in route.trigger_fulfillment.set_parameter_actions: + if hasattr(parameter, "fill_behavior"): + if hasattr( + parameter.fill_behavior, + "initial_prompt_fulfillment", + ): + if hasattr( + parameter.fill_behavior.initial_prompt_fulfillment, + "set_parameter_actions", + ): + for ( + param_preset + ) in ( + parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions + ): + new_presets[ + param_preset.parameter + ] = param_preset.value + if hasattr(route, "trigger_fulfillment"): + if hasattr(route.trigger_fulfillment, "set_parameter_actions"): + for ( + param_preset + ) in route.trigger_fulfillment.set_parameter_actions: new_presets[param_preset.parameter] = param_preset.value - if hasattr(route, 'intent') and route.intent != '': + if hasattr(route, "intent") and route.intent != "": # Check the entities annotated on this intent intent_name = self.intents_map[route.intent] - intent_params = self.get_intent_parameters(intent_name) + intent_params = self._get_intent_parameters(intent_name) for param in intent_params: - new_presets[param.id] = f'(potentially set by {intent_name})' + new_presets[ + param.id + ] = f"(potentially set by {intent_name})" - self.find_reachable_pages_rec(flow_id, flow_name, self.page_data[flow_id][target_page], reachable, conversation_path, min_intent_counts, new_presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=False, include_meta=include_meta, verbose=verbose) + self._find_reachable_pages_rec( + flow_id, + flow_name, + self.page_data[flow_id][target_page], + reachable, + conversation_path, + min_intent_counts, + new_presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=False, + include_meta=include_meta, + verbose=verbose, + ) conversation_path.pop(-1) - elif 'END_FLOW' in target_page: + elif "END_FLOW" in target_page: if verbose: - print(page.display_name,'-> END FLOW') + print(page.display_name, "-> END FLOW") if include_meta: - page_name = 'END FLOW' + page_name = "END FLOW" if page_name not in reachable: reachable.append(page_name) min_intent_counts.append(intent_route_count) - elif page_name in reachable and intent_route_count < min_intent_counts[reachable.index(page_name)]: - min_intent_counts[reachable.index(page_name)] = intent_route_count - #reachable.append('END FLOW') - elif 'END_SESSION' in target_page: + elif ( + page_name in reachable + and intent_route_count + < min_intent_counts[reachable.index(page_name)] + ): + min_intent_counts[ + reachable.index(page_name) + ] = intent_route_count + # reachable.append('END FLOW') + elif "END_SESSION" in target_page: if verbose: - print(page.display_name,'-> END SESSION') + print(page.display_name, "-> END SESSION") if include_meta: - page_name = 'END SESSION' + page_name = "END SESSION" if page_name not in reachable: reachable.append(page_name) min_intent_counts.append(intent_route_count) - elif page_name in reachable and intent_route_count < min_intent_counts[reachable.index(page_name)]: - min_intent_counts[reachable.index(page_name)] = intent_route_count - #reachable.append('END SESSION') - elif 'CURRENT_PAGE' in target_page: + elif ( + page_name in reachable + and intent_route_count + < min_intent_counts[reachable.index(page_name)] + ): + min_intent_counts[ + reachable.index(page_name) + ] = intent_route_count + # reachable.append('END SESSION') + elif "CURRENT_PAGE" in target_page: if verbose: - print(page.display_name,'-> CURRENT PAGE') + print(page.display_name, "-> CURRENT PAGE") page_name = page.display_name - if page_name in reachable and intent_route_count < min_intent_counts[reachable.index(page_name)]: + if ( + page_name in reachable + and intent_route_count + < min_intent_counts[reachable.index(page_name)] + ): min_intent_counts[reachable.index(page_name)] = intent_route_count - elif 'PREVIOUS_PAGE' in target_page: + elif "PREVIOUS_PAGE" in target_page: if verbose: - print(page.display_name, '-> PREVIOUS PAGE') + print(page.display_name, "-> PREVIOUS PAGE") if include_meta: - page_name = 'PREVIOUS PAGE' + page_name = "PREVIOUS PAGE" if page_name not in reachable: reachable.append(page_name) min_intent_counts.append(intent_route_count) - elif page_name in reachable and intent_route_count < min_intent_counts[reachable.index(page_name)]: - min_intent_counts[reachable.index(page_name)] = intent_route_count + elif ( + page_name in reachable + and intent_route_count + < min_intent_counts[reachable.index(page_name)] + ): + min_intent_counts[ + reachable.index(page_name) + ] = intent_route_count # TODO: This could cause huge problems... - elif 'START_PAGE' in target_page: + elif "START_PAGE" in target_page: if verbose: - print(page.display_name, '-> START PAGE') - page_name = 'Start' + print(page.display_name, "-> START PAGE") + page_name = "Start" if page_name not in reachable: reachable.append(page_name) min_intent_counts.append(intent_route_count) @@ -420,122 +581,366 @@ def find_reachable_pages_rec_helper(self, flow_id: str, flow_name: str, page: DF print(conversation_path, intent_route_count) new_presets = presets.copy() - if hasattr(page, 'entry_fulfillment'): - if hasattr(page.entry_fulfillment, 'set_parameter_actions'): - for param_preset in page.entry_fulfillment.set_parameter_actions: + if hasattr(page, "entry_fulfillment"): + if hasattr(page.entry_fulfillment, "set_parameter_actions"): + for ( + param_preset + ) in page.entry_fulfillment.set_parameter_actions: new_presets[param_preset.parameter] = param_preset.value - if hasattr(page, 'form'): + if hasattr(page, "form"): for parameter in page.form.parameters: - if hasattr(parameter, 'fill_behavior'): - if hasattr(parameter.fill_behavior, 'initial_prompt_fulfillment'): - if hasattr(parameter.fill_behavior.initial_prompt_fulfillment, 'set_parameter_actions'): - for param_preset in parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value - if hasattr(route, 'trigger_fulfillment'): - if hasattr(route.trigger_fulfillment, 'set_parameter_actions'): - for param_preset in route.trigger_fulfillment.set_parameter_actions: + if hasattr(parameter, "fill_behavior"): + if hasattr( + parameter.fill_behavior, + "initial_prompt_fulfillment", + ): + if hasattr( + parameter.fill_behavior.initial_prompt_fulfillment, + "set_parameter_actions", + ): + for ( + param_preset + ) in ( + parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions + ): + new_presets[ + param_preset.parameter + ] = param_preset.value + if hasattr(route, "trigger_fulfillment"): + if hasattr(route.trigger_fulfillment, "set_parameter_actions"): + for ( + param_preset + ) in route.trigger_fulfillment.set_parameter_actions: new_presets[param_preset.parameter] = param_preset.value - if hasattr(route, 'intent') and route.intent != '': + if hasattr(route, "intent") and route.intent != "": # Check the entities annotated on this intent intent_name = self.intents_map[route.intent] - intent_params = self.get_intent_parameters(intent_name) + intent_params = self._get_intent_parameters(intent_name) for param in intent_params: - new_presets[param.id] = f'(potentially set by {intent_name})' + new_presets[ + param.id + ] = f"(potentially set by {intent_name})" - self.find_reachable_pages_rec(flow_id, flow_name, self.flow_data[flow_id], reachable, conversation_path, min_intent_counts, new_presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=False, include_meta=include_meta, verbose=verbose) + self._find_reachable_pages_rec( + flow_id, + flow_name, + self.flow_data[flow_id], + reachable, + conversation_path, + min_intent_counts, + new_presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=False, + include_meta=include_meta, + verbose=verbose, + ) conversation_path.pop(-1) - elif page_name in reachable and intent_route_count < min_intent_counts[reachable.index(page_name)]: - # Better route found, traverse from here + elif ( + page_name in reachable + and intent_route_count + < min_intent_counts[reachable.index(page_name)] + ): + # Better route found, traverse from here min_intent_counts[reachable.index(page_name)] = intent_route_count conversation_path.append(page_name) if verbose: print(conversation_path, intent_route_count) new_presets = presets.copy() - if hasattr(page, 'entry_fulfillment'): - if hasattr(page.entry_fulfillment, 'set_parameter_actions'): - for param_preset in page.entry_fulfillment.set_parameter_actions: + if hasattr(page, "entry_fulfillment"): + if hasattr(page.entry_fulfillment, "set_parameter_actions"): + for ( + param_preset + ) in page.entry_fulfillment.set_parameter_actions: new_presets[param_preset.parameter] = param_preset.value - if hasattr(page, 'form'): + if hasattr(page, "form"): for parameter in page.form.parameters: - if hasattr(parameter, 'fill_behavior'): - if hasattr(parameter.fill_behavior, 'initial_prompt_fulfillment'): - if hasattr(parameter.fill_behavior.initial_prompt_fulfillment, 'set_parameter_actions'): - for param_preset in parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value - if hasattr(route, 'trigger_fulfillment'): - if hasattr(route.trigger_fulfillment, 'set_parameter_actions'): - for param_preset in route.trigger_fulfillment.set_parameter_actions: + if hasattr(parameter, "fill_behavior"): + if hasattr( + parameter.fill_behavior, + "initial_prompt_fulfillment", + ): + if hasattr( + parameter.fill_behavior.initial_prompt_fulfillment, + "set_parameter_actions", + ): + for ( + param_preset + ) in ( + parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions + ): + new_presets[ + param_preset.parameter + ] = param_preset.value + if hasattr(route, "trigger_fulfillment"): + if hasattr(route.trigger_fulfillment, "set_parameter_actions"): + for ( + param_preset + ) in route.trigger_fulfillment.set_parameter_actions: new_presets[param_preset.parameter] = param_preset.value - if hasattr(route, 'intent') and route.intent != '': + if hasattr(route, "intent") and route.intent != "": # Check the entities annotated on this intent intent_name = self.intents_map[route.intent] - intent_params = self.get_intent_parameters(intent_name) + intent_params = self._get_intent_parameters(intent_name) for param in intent_params: - new_presets[param.id] = f'(potentially set by {intent_name})' + new_presets[ + param.id + ] = f"(potentially set by {intent_name})" - self.find_reachable_pages_rec(flow_id, flow_name, self.flow_data[flow_id], reachable, conversation_path, min_intent_counts, new_presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=False, include_meta=include_meta, verbose=verbose) + self._find_reachable_pages_rec( + flow_id, + flow_name, + self.flow_data[flow_id], + reachable, + conversation_path, + min_intent_counts, + new_presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=False, + include_meta=include_meta, + verbose=verbose, + ) conversation_path.pop(-1) elif len(target_page) > 0: - print(page.display_name,'->',target_page) + print(page.display_name, "->", target_page) # This should not happen, and if it does it needs to be fixed input() elif len(target_flow) > 0: flow_name = self.flows_map[route.target_flow] if verbose: - print(page.display_name,'->',flow_name) + print(page.display_name, "->", flow_name) if flow_name not in reachable: reachable.append(flow_name) min_intent_counts.append(intent_route_count) - elif flow_name in reachable and intent_route_count < min_intent_counts[reachable.index(flow_name)]: + elif ( + flow_name in reachable + and intent_route_count + < min_intent_counts[reachable.index(flow_name)] + ): min_intent_counts[reachable.index(flow_name)] = intent_route_count else: if verbose: - print(page.display_name,'->',route.target_flow, '(empty)') + print(page.display_name, "->", route.target_flow, "(empty)") page_name = page.display_name - if page_name in reachable and intent_route_count < min_intent_counts[reachable.index(page_name)]: + if ( + page_name in reachable + and intent_route_count + < min_intent_counts[reachable.index(page_name)] + ): min_intent_counts[reachable.index(page_name)] = intent_route_count - - def find_reachable_pages_rec(self, flow_id: str, flow_name: str, page: DFCXPage | DFCXFlow, reachable: List[str], conversation_path: List[str], min_intent_counts: List[int], presets: Dict[str,str], intent_route_count: int = 0, intent_route_limit: Optional[int] = None, include_groups: bool = True, include_start_page_routes: bool = True, limit_intent_to_initial: bool = False, is_initial: bool = False, include_meta: bool = False, verbose: bool = False) -> None: - # TODO: Docstring + + def _find_reachable_pages_rec( + self, + flow_id: str, + flow_name: str, + page: DFCXPage | DFCXFlow, + reachable: List[str], + conversation_path: List[str], + min_intent_counts: List[int], + presets: Dict[str, str], + intent_route_count: int = 0, + intent_route_limit: Optional[int] = None, + include_groups: bool = True, + include_start_page_routes: bool = True, + limit_intent_to_initial: bool = False, + is_initial: bool = False, + include_meta: bool = False, + verbose: bool = False, + ) -> None: + """Recursive function to find reachable pages within a given flow, + starting at a particular page. Other parameters here are used for + more general traversal, but not currently used.""" if not flow_name: flow_name = self.flows_map[flow_id] - if hasattr(page, 'form'): + if hasattr(page, "form"): for parameter in page.form.parameters: for event_handler in parameter.fill_behavior.reprompt_event_handlers: if limit_intent_to_initial and not is_initial: continue - if hasattr(event_handler, 'target_page') or hasattr(event_handler, 'target_flow'): - self.find_reachable_pages_rec_helper(flow_id, flow_name, page, event_handler, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + if hasattr(event_handler, "target_page") or hasattr( + event_handler, "target_flow" + ): + self._find_reachable_pages_rec_helper( + flow_id, + flow_name, + page, + event_handler, + reachable, + conversation_path, + min_intent_counts, + presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=is_initial, + include_meta=include_meta, + verbose=verbose, + ) for event_handler in page.event_handlers: if limit_intent_to_initial and not is_initial: continue - if hasattr(event_handler, 'target_page') or hasattr(event_handler, 'target_flow'): - self.find_reachable_pages_rec_helper(flow_id, flow_name, page, event_handler, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + if hasattr(event_handler, "target_page") or hasattr( + event_handler, "target_flow" + ): + self._find_reachable_pages_rec_helper( + flow_id, + flow_name, + page, + event_handler, + reachable, + conversation_path, + min_intent_counts, + presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=is_initial, + include_meta=include_meta, + verbose=verbose, + ) for route in page.transition_routes: - self.find_reachable_pages_rec_helper(flow_id, flow_name, page, route, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + self._find_reachable_pages_rec_helper( + flow_id, + flow_name, + page, + route, + reachable, + conversation_path, + min_intent_counts, + presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=is_initial, + include_meta=include_meta, + verbose=verbose, + ) if include_groups: for route_group in page.transition_route_groups: # TODO: Need to map by flow - for route in self.route_group_data[flow_id][route_group].transition_routes: - self.find_reachable_pages_rec_helper(flow_id, flow_name, page, route, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + for route in self.route_group_data[flow_id][ + route_group + ].transition_routes: + self._find_reachable_pages_rec_helper( + flow_id, + flow_name, + page, + route, + reachable, + conversation_path, + min_intent_counts, + presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=is_initial, + include_meta=include_meta, + verbose=verbose, + ) # Start page routes and route groups are also accessible from this page - if include_start_page_routes and page.display_name != flow_name and (not limit_intent_to_initial or is_initial): + if ( + include_start_page_routes + and page.display_name != flow_name + and (not limit_intent_to_initial or is_initial) + ): for event_handler in self.flow_data[flow_id].event_handlers: - if hasattr(event_handler, 'target_page') or hasattr(event_handler, 'target_flow'): - self.find_reachable_pages_rec_helper(flow_id, flow_name, self.flow_data[flow_id], event_handler, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + if hasattr(event_handler, "target_page") or hasattr( + event_handler, "target_flow" + ): + self._find_reachable_pages_rec_helper( + flow_id, + flow_name, + self.flow_data[flow_id], + event_handler, + reachable, + conversation_path, + min_intent_counts, + presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=is_initial, + include_meta=include_meta, + verbose=verbose, + ) for route in self.flow_data[flow_id].transition_routes: - if hasattr(route, 'intent') and route.intent != '': - self.find_reachable_pages_rec_helper(flow_id, flow_name, self.flow_data[flow_id], route, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + if hasattr(route, "intent") and route.intent != "": + self._find_reachable_pages_rec_helper( + flow_id, + flow_name, + self.flow_data[flow_id], + route, + reachable, + conversation_path, + min_intent_counts, + presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=is_initial, + include_meta=include_meta, + verbose=verbose, + ) if include_groups: for route_group in self.flow_data[flow_id].transition_route_groups: - for route in self.route_group_data[flow_id][route_group].transition_routes: - if hasattr(route, 'intent') and route.intent != '': - self.find_reachable_pages_rec_helper(flow_id, flow_name, self.flow_data[flow_id], route, reachable, conversation_path, min_intent_counts, presets, intent_route_count=intent_route_count, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + for route in self.route_group_data[flow_id][ + route_group + ].transition_routes: + if hasattr(route, "intent") and route.intent != "": + self._find_reachable_pages_rec_helper( + flow_id, + flow_name, + self.flow_data[flow_id], + route, + reachable, + conversation_path, + min_intent_counts, + presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=is_initial, + include_meta=include_meta, + verbose=verbose, + ) - def find_reachable_pages(self, flow_id: str, flow_name: str, from_page: str = 'Start', intent_route_limit: Optional[int] = None, include_groups: bool = True, include_start_page_routes: bool = True, limit_intent_to_initial: bool = False, is_initial: bool = True, include_meta: bool = False, verbose: bool = False) -> List[str]: + def find_reachable_pages( + self, + flow_id: str = None, + flow_name: str = None, + from_page: str = "Start", + intent_route_limit: Optional[int] = None, + include_groups: bool = True, + include_start_page_routes: bool = True, + limit_intent_to_initial: bool = False, + is_initial: bool = True, + include_meta: bool = False, + verbose: bool = False, + ) -> List[str]: """Finds all pages which are reachable by transition routes, starting from a given page in a given flow. Either flow_id or flow_name must be used. @@ -544,7 +949,7 @@ def find_reachable_pages(self, flow_id: str, flow_name: str, from_page: str = 'S flow_id OR flow_name: The ID or name of the flow to find reachable pages for from_page: (Optional) The page to start from. If left blank, it will start on the Start Page intent_route_limit: (Optional) Default None - include_groups: (Optional) If true, intents from transition route groups will be included, + include_groups: (Optional) If true, intents from transition route groups will be included, but only if they are actually referenced on some page include_start_page_routes: (Optional) Default true limit_intent_to_initial: (Optional) Default False @@ -565,26 +970,52 @@ def find_reachable_pages(self, flow_id: str, flow_name: str, from_page: str = 'S if flow_id in self.flows_map.keys(): flow_name = self.flows_map[flow_id] else: - raise Exception(f'Flow not found: {flow_id}') + raise Exception(f"Flow not found: {flow_id}") # Start at the start page... reachable = [from_page] conversation_path = [from_page] - min_intent_counts = [25] # Technically this could be [0] or [1], or very rarely more than 1, depending on the routes that lead to current page... + min_intent_counts = [ + 25 + ] # Technically this could be [0] or [1], or very rarely more than 1, depending on the routes that lead to current page... presets = {} - page_data = self.get_page(flow_id=flow_id, flow_name=flow_name, page_id=None, page_name=from_page) - self.find_reachable_pages_rec(flow_id, flow_name, page_data, reachable, conversation_path, min_intent_counts, presets, intent_route_count=0, intent_route_limit=intent_route_limit, include_groups=include_groups, include_start_page_routes=include_start_page_routes, limit_intent_to_initial=limit_intent_to_initial, is_initial=is_initial, include_meta=include_meta, verbose=verbose) + page_data = self._get_page( + flow_id=flow_id, flow_name=flow_name, page_id=None, page_name=from_page + ) + self._find_reachable_pages_rec( + flow_id, + flow_name, + page_data, + reachable, + conversation_path, + min_intent_counts, + presets, + intent_route_count=0, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=is_initial, + include_meta=include_meta, + verbose=verbose, + ) return reachable - - def find_unreachable_pages(self, flow_id: str = None, flow_name: str = None, include_groups: bool = True, verbose: bool = False) -> List[str]: - """Finds all pages which are unreachable by transition routes, + + def find_unreachable_pages( + self, + flow_id: str = None, + flow_name: str = None, + include_groups: bool = True, + verbose: bool = False, + ) -> List[str]: + """Finds all pages which are unreachable by transition routes, starting from the start page of a given flow. Either flow_id or flow_name must be used. Args: flow_id: The ID of the flow to find unreachable pages for flow_name: The display name of the flow to find unreachable pages for - include_groups: (Optional) If true, intents from transition route groups will be included, + include_groups: (Optional) If true, intents from transition route groups will be included, but only if they are actually referenced on some page verbose: (Optional) If true, print debug information about route traversal @@ -601,9 +1032,11 @@ def find_unreachable_pages(self, flow_id: str = None, flow_name: str = None, inc if flow_id in self.flows_map.keys(): flow_name = self.flows_map[flow_id] else: - raise Exception(f'Flow not found: {flow_id}') + raise Exception(f"Flow not found: {flow_id}") - reachable = self.find_reachable_pages(flow_id, flow_name, include_groups=include_groups, verbose=verbose) + reachable = self.find_reachable_pages( + flow_id, flow_name, include_groups=include_groups, verbose=verbose + ) return list(set(self.pages_map[flow_id].values()) - set(reachable)) """ From 5ea1851a42d3bc9fb940c3bd8e3cffb2ba7aeb32 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 9 Dec 2022 21:36:32 +0000 Subject: [PATCH 056/151] Lint fixes --- src/dfcx_scrapi/tools/agent_checker_util.py | 134 +++++++++++--------- 1 file changed, 75 insertions(+), 59 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 40e90a91..df01e324 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -43,6 +43,31 @@ datefmt="%Y-%m-%d %H:%M:%S", ) +# TODO: Methods to implement: +# - Run test cases and store results, and give a report +# - Need to include a reference agent for this to give useful info +# about new failing test cases +# - Get condensed changelog compared to a reference +# - Ideally include test case changes, to include info that the CX UI +# can't provide +# - Find unreachable/unused pages, intents, route groups, and maybe routes +# - Finding unreachable routes is hard, but the other problems have +# already been figured out +# - Find invalid test cases +# - Test cases referencing pages or intents that don't exist, +# for example +# - Check true routes +# - Pages with only conditional routes, and no intents or parameter +# filling, should have the last route be "true" to prevent getting +# stuck on the page +# - Check events +# - Pages with user input should have a no-input-default and +# no-match-default event handler. +# - Not sure if this applies to all agents in the same way +# - Check infinite loops +# - Not possible to solve in general because of webhooks, +# but can find likely candidates +# - Probably other things class AgentCheckerUtil(ScrapiBase): """Utility class for checking DFCX Agents.""" @@ -119,7 +144,7 @@ def __init__( def _convert_intent(self, intent_id): """Gets an intent display name from an intent ID""" intent_id_converted = str(self.agent_id) + "/intents/" + str(intent_id) - if intent_id_converted in self.intents_map.keys(): + if intent_id_converted in self.intents_map: return self.intents_map[intent_id_converted] return "" @@ -128,7 +153,7 @@ def _convert_flow(self, flow_id): if flow_id.split("/")[-1] == "-": return "" # flow_id_converted = str(agent_id) + '/flows/' + str(flow_id) - if flow_id in self.flows_map.keys(): + if flow_id in self.flows_map: return self.flows_map[flow_id] # TODO: Should throw error instead of returning default return "Default Start Flow" @@ -143,8 +168,8 @@ def _convert_page(self, page_id, flow_id): elif page_id == "START_PAGE": return "Start" page_id_converted = str(flow_id) + "/pages/" + str(page_id) - if flow_id in self.pages_map.keys(): - if page_id_converted in self.pages_map[flow_id].keys(): + if flow_id in self.pages_map: + if page_id_converted in self.pages_map[flow_id]: return self.pages_map[flow_id][page_id_converted] else: # TODO: Should throw error instead of returning default @@ -175,7 +200,8 @@ def _get_page( page_id OR page_name: The ID or display name of the page Returns: - A DFCX Page object for this page, or DFCX Flow object if it's the start page + A DFCX Page object for this page, + or DFCX Flow object if it's the start page Raises: KeyError, if the page is not found @@ -183,7 +209,7 @@ def _get_page( if flow_id is None and flow_name is None: raise Exception("Please specify a flow") elif flow_name is not None: - if flow_name in self.flows_map_rev.keys(): + if flow_name in self.flows_map_rev: flow_id = self.flows_map_rev[flow_name] else: raise Exception(f"Flow not found: {flow_name}") @@ -193,7 +219,7 @@ def _get_page( elif page_name is not None: if page_name == "Start": return self.flow_data[flow_id] - if page_name in self.pages_map_rev[flow_id].keys(): + if page_name in self.pages_map_rev[flow_id]: page_id = self.pages_map_rev[flow_id][page_name] return self.page_data[flow_id][page_id] else: @@ -201,7 +227,7 @@ def _get_page( else: if "START_PAGE" in page_id: return self.flow_data[flow_id] - elif page_id not in self.pages_map[flow_id].keys(): + elif page_id not in self.pages_map[flow_id]: raise KeyError("Page not found.") else: return self.page_data[flow_id][page_id] @@ -258,7 +284,8 @@ def get_test_case_results(self, retest_all=False): creation_times.append(response.creation_time) flows.append(self._convert_flow(response.test_config.flow)) pages.append( - self._convert_page(response.test_config.page, response.test_config.flow) + self._convert_page(response.test_config.page, + response.test_config.flow) ) test_results.append(str(response.last_test_result.test_result)) test_times.append(response.last_test_result.test_time) @@ -285,25 +312,26 @@ def get_test_case_results(self, retest_all=False): # Retest any that haven't been run yet print("To retest:", len(retest)) if len(retest) > 0: - response = self.test_cases.batch_run_test_cases(retest, self.agent_id) + response = self.test_cases.batch_run_test_cases(retest, + self.agent_id) for result in response.results: - # Results may not be in the same order as they went in (oh well) - # Process the name a bit to remove the /results/id part at the end. - testCaseId_full = "/".join(result.name.split("/")[:-2]) - testCaseId = testCaseId_full.split("/")[-1] + # Results may not be in the same order as they went in + # Process the name a bit to remove the /results/id part + test_case_id_full = "/".join(result.name.split("/")[:-2]) + test_case_id = test_case_id_full.split("/")[-1] - # Update dataframe where id = testcaseId_full - # row = test_case_df.loc[test_case_df['id'] == testCaseId_full] + # Update dataframe where id = test_case_id_full + # row = test_case_df.loc[test_case_df['id']==test_case_id_full] test_case_df.loc[ - test_case_df["id"] == testCaseId_full, "short_id" - ] = testCaseId + test_case_df["id"] == test_case_id_full, "short_id" + ] = test_case_id test_case_df.loc[ - test_case_df["id"] == testCaseId_full, "test_result" + test_case_df["id"] == test_case_id_full, "test_result" ] = str(result.test_result) test_case_df.loc[ - test_case_df["id"] == testCaseId_full, "test_time" + test_case_df["id"] == test_case_id_full, "test_time" ] = result.test_time - test_case_df.loc[test_case_df["id"] == testCaseId_full, "passed"] = ( + test_case_df.loc[test_case_df["id"] == test_case_id_full, "passed"] = ( str(result.test_result) == "TestResult.PASSED" ) @@ -336,7 +364,9 @@ def _find_reachable_pages_rec_helper( include_meta: bool = False, verbose: bool = False, ) -> None: - """Helper function for the recursion involved in finding reachable pages""" + """Helper function for the recursion involved in + finding reachable pages + """ if not flow_name: flow_name = self.flows_map[flow_id] target_page = route.target_page @@ -353,7 +383,7 @@ def _find_reachable_pages_rec_helper( # Need to also account for parameters being # set by intents (or by webhooks...) if ( - parameter_name not in presets.keys() + parameter_name not in presets or presets[parameter_name] == "NULL" ): # This page has an unfilled parameter @@ -946,16 +976,19 @@ def find_reachable_pages( flow_name must be used. Args: - flow_id OR flow_name: The ID or name of the flow to find reachable pages for - from_page: (Optional) The page to start from. If left blank, it will start on the Start Page + flow_id OR flow_name: The ID or name of the flow + from_page: (Optional) The page to start from. If left blank, it will + start on the Start Page intent_route_limit: (Optional) Default None - include_groups: (Optional) If true, intents from transition route groups will be included, - but only if they are actually referenced on some page + include_groups: (Optional) If true, intents from transition route + groups will be included, but only if they are actually referenced + on some page include_start_page_routes: (Optional) Default true limit_intent_to_initial: (Optional) Default False is_initial: (Optional) Default True include_meta: (Optional) Default False - verbose: (Optional) If true, print debug information about route traversal + verbose: (Optional) If true, print debug information about + route traversal Returns: The list of reachable pages in this flow @@ -963,11 +996,11 @@ def find_reachable_pages( if not flow_id: if not flow_name: raise Exception("One of flow_id or flow_name must be set") - if flow_name in self.flows_map_rev.keys(): + if flow_name in self.flows_map_rev: flow_id = self.flows_map_rev[flow_name] else: raise Exception(f"Flow not found: {flow_name}") - if flow_id in self.flows_map.keys(): + if flow_id in self.flows_map: flow_name = self.flows_map[flow_id] else: raise Exception(f"Flow not found: {flow_id}") @@ -975,12 +1008,13 @@ def find_reachable_pages( # Start at the start page... reachable = [from_page] conversation_path = [from_page] - min_intent_counts = [ - 25 - ] # Technically this could be [0] or [1], or very rarely more than 1, depending on the routes that lead to current page... + # Technically this could be [0] or [1], or very rarely more than 1, + # depending on the routes that lead to current page... + min_intent_counts = [25] presets = {} page_data = self._get_page( - flow_id=flow_id, flow_name=flow_name, page_id=None, page_name=from_page + flow_id=flow_id, flow_name=flow_name, + page_id=None, page_name=from_page ) self._find_reachable_pages_rec( flow_id, @@ -1015,9 +1049,11 @@ def find_unreachable_pages( Args: flow_id: The ID of the flow to find unreachable pages for flow_name: The display name of the flow to find unreachable pages for - include_groups: (Optional) If true, intents from transition route groups will be included, - but only if they are actually referenced on some page - verbose: (Optional) If true, print debug information about route traversal + include_groups: (Optional) If true, intents from transition route + groups will be included, but only if they are actually referenced + on some page + verbose: (Optional) If true, print debug information about + route traversal Returns: The list of unreachable pages in this flow @@ -1025,11 +1061,11 @@ def find_unreachable_pages( if not flow_id: if not flow_name: raise Exception("One of flow_id or flow_name must be set") - if flow_name in self.flows_map_rev.keys(): + if flow_name in self.flows_map_rev: flow_id = self.flows_map_rev[flow_name] else: raise Exception(f"Flow not found: {flow_name}") - if flow_id in self.flows_map.keys(): + if flow_id in self.flows_map: flow_name = self.flows_map[flow_id] else: raise Exception(f"Flow not found: {flow_id}") @@ -1038,23 +1074,3 @@ def find_unreachable_pages( flow_id, flow_name, include_groups=include_groups, verbose=verbose ) return list(set(self.pages_map[flow_id].values()) - set(reachable)) - - """ - TODO: Methods to implement: - - Run test cases and store results, and give a report - - Need to include a reference agent for this to give useful info about new failing test cases - - Get condensed changelog compared to a reference - - Ideally include test case changes, to include info that the CX UI can't provide - - Find unreachable/unused pages, intents, route groups, and possibly routes - - Finding unreachable routes is hard, but the other problems have already been figured out - - Find invalid test cases - - Test cases referencing pages or intents that don't exist, for example - - Check true routes - - Pages with only conditional routes, and no intents or parameter filling, should have the last route be "true" to prevent getting stuck on the page - - Check events - - Pages with user input should have a no-input-default and no-match-default event handler. - - Not sure if this applies to all agents in the same way - - Check infinite loops - - Not possible to solve in general because of webhooks, but can find likely candidates - - Probably other things - """ From bdfa66f6d0b9490f3384fc82b1564a31af7da49d Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Wed, 14 Dec 2022 22:46:35 +0000 Subject: [PATCH 057/151] Optimize agent data loading --- src/dfcx_scrapi/tools/agent_checker_util.py | 35 ++++++++++++++------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index df01e324..45f0ec79 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -123,21 +123,32 @@ def __init__( # Get intent, flow, and page data self.intent_data = self.intents.list_intents(agent_id=self.agent_id) - self.flow_data = {} - for flow_id in self.flows_map.keys(): - self.flow_data[flow_id] = self.flows.get_flow(flow_id=flow_id) - self.page_data = {} - for flow_id in self.flows_map.keys(): + self.flow_data = self.get_all_flow_data() + self.page_data = self.get_all_page_data() + self.route_group_data = self.get_all_route_group_data() + + def get_all_flow_data(self): + flow_data = {} + flow_list = self.flows.list_flows(self.agent_id) + for flow in flow_list: + flow_data[flow.name] = flow + return flow_data + + def get_all_page_data(self): + page_data = {} + for flow_id in self.flows_map.values(): page_list = self.pages.list_pages(flow_id=flow_id) - self.page_data[flow_id] = {page.name: page for page in page_list} - self.route_group_data = {} - for flow_id in self.flows_map.keys(): - route_group_list = self.route_groups.list_transition_route_groups( + page_data[flow_id] = {page.name: page for page in page_list} + return page_data + + def get_all_route_group_data(self): + route_group_data = {} + for flow_id in self.flows_map.values(): + group_list = self.route_groups.list_transition_route_groups( flow_id=flow_id ) - self.route_group_data[flow_id] = { - route_group.name: route_group for route_group in route_group_list - } + route_group_data[flow_id] = {rg.name: rg for rg in group_list} + return route_group_data # Conversion utilities From feb6b8b55ff4ebde78511dd9327d6d3ffaf26402 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 16 Dec 2022 18:35:50 +0000 Subject: [PATCH 058/151] Clean up excess indents in find_reachable_pages_rec with a couple of helper functions --- src/dfcx_scrapi/tools/agent_checker_util.py | 254 +++++++++++++------- 1 file changed, 163 insertions(+), 91 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 45f0ec79..e3f3a322 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -803,32 +803,26 @@ def _find_reachable_pages_rec( more general traversal, but not currently used.""" if not flow_name: flow_name = self.flows_map[flow_id] - if hasattr(page, "form"): + if hasattr(page, "form") and page.form: for parameter in page.form.parameters: - for event_handler in parameter.fill_behavior.reprompt_event_handlers: - if limit_intent_to_initial and not is_initial: - continue - if hasattr(event_handler, "target_page") or hasattr( - event_handler, "target_flow" - ): - self._find_reachable_pages_rec_helper( - flow_id, - flow_name, - page, - event_handler, - reachable, - conversation_path, - min_intent_counts, - presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=is_initial, - include_meta=include_meta, - verbose=verbose, - ) + self._process_form_parameter_for_reachable_pages( + flow_id, + flow_name, + page, + parameter, + reachable, + conversation_path, + min_intent_counts, + presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=is_initial, + include_meta=include_meta, + verbose=verbose, + ) for event_handler in page.event_handlers: if limit_intent_to_initial and not is_initial: continue @@ -902,72 +896,150 @@ def _find_reachable_pages_rec( and page.display_name != flow_name and (not limit_intent_to_initial or is_initial) ): - for event_handler in self.flow_data[flow_id].event_handlers: - if hasattr(event_handler, "target_page") or hasattr( - event_handler, "target_flow" - ): - self._find_reachable_pages_rec_helper( - flow_id, - flow_name, - self.flow_data[flow_id], - event_handler, - reachable, - conversation_path, - min_intent_counts, - presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=is_initial, - include_meta=include_meta, - verbose=verbose, - ) - for route in self.flow_data[flow_id].transition_routes: - if hasattr(route, "intent") and route.intent != "": - self._find_reachable_pages_rec_helper( - flow_id, - flow_name, - self.flow_data[flow_id], - route, - reachable, - conversation_path, - min_intent_counts, - presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=is_initial, - include_meta=include_meta, - verbose=verbose, - ) - if include_groups: - for route_group in self.flow_data[flow_id].transition_route_groups: - for route in self.route_group_data[flow_id][ - route_group - ].transition_routes: - if hasattr(route, "intent") and route.intent != "": - self._find_reachable_pages_rec_helper( - flow_id, - flow_name, - self.flow_data[flow_id], - route, - reachable, - conversation_path, - min_intent_counts, - presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=is_initial, - include_meta=include_meta, - verbose=verbose, - ) + self._process_start_page_routes_for_reachable_pages( + flow_id, + flow_name, + reachable, + conversation_path, + min_intent_counts, + presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=is_initial, + include_meta=include_meta, + verbose=verbose, + ) + + def _process_form_for_reachable_pages( + self, + flow_id: str, + flow_name: str, + page: DFCXPage | DFCXFlow, + parameter, # TODO: Data type for DFCX Parameter + reachable: List[str], + conversation_path: List[str], + min_intent_counts: List[int], + presets: Dict[str, str], + intent_route_count: int = 0, + intent_route_limit: Optional[int] = None, + include_groups: bool = True, + include_start_page_routes: bool = True, + limit_intent_to_initial: bool = False, + is_initial: bool = False, + include_meta: bool = False, + verbose: bool = False, + ) -> None: + for event_handler in parameter.fill_behavior.reprompt_event_handlers: + if limit_intent_to_initial and not is_initial: + continue + if hasattr(event_handler, "target_page") or hasattr( + event_handler, "target_flow" + ): + self._find_reachable_pages_rec_helper( + flow_id, + flow_name, + page, + event_handler, + reachable, + conversation_path, + min_intent_counts, + presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=is_initial, + include_meta=include_meta, + verbose=verbose, + ) + + def _process_start_page_routes_for_reachable_pages( + self, + flow_id: str, + flow_name: str, + reachable: List[str], + conversation_path: List[str], + min_intent_counts: List[int], + presets: Dict[str, str], + intent_route_count: int = 0, + intent_route_limit: Optional[int] = None, + include_groups: bool = True, + include_start_page_routes: bool = True, + limit_intent_to_initial: bool = False, + is_initial: bool = False, + include_meta: bool = False, + verbose: bool = False, + ): + for event_handler in self.flow_data[flow_id].event_handlers: + if hasattr(event_handler, "target_page") or hasattr( + event_handler, "target_flow" + ): + self._find_reachable_pages_rec_helper( + flow_id, + flow_name, + self.flow_data[flow_id], + event_handler, + reachable, + conversation_path, + min_intent_counts, + presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=is_initial, + include_meta=include_meta, + verbose=verbose, + ) + for route in self.flow_data[flow_id].transition_routes: + if hasattr(route, "intent") and route.intent != "": + self._find_reachable_pages_rec_helper( + flow_id, + flow_name, + self.flow_data[flow_id], + route, + reachable, + conversation_path, + min_intent_counts, + presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=is_initial, + include_meta=include_meta, + verbose=verbose, + ) + if include_groups: + for route_group in self.flow_data[flow_id].transition_route_groups: + for route in self.route_group_data[flow_id][ + route_group + ].transition_routes: + if hasattr(route, "intent") and route.intent != "": + self._find_reachable_pages_rec_helper( + flow_id, + flow_name, + self.flow_data[flow_id], + route, + reachable, + conversation_path, + min_intent_counts, + presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=is_initial, + include_meta=include_meta, + verbose=verbose, + ) def find_reachable_pages( self, From b02781f8d15ddc3af92e0847b56ab1a86f1432c5 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 16 Dec 2022 18:38:27 +0000 Subject: [PATCH 059/151] Minor linting --- src/dfcx_scrapi/tools/agent_checker_util.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index e3f3a322..7a5b3846 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -301,7 +301,7 @@ def get_test_case_results(self, retest_all=False): test_results.append(str(response.last_test_result.test_result)) test_times.append(response.last_test_result.test_time) passed.append( - str(response.last_test_result.test_result) == "TestResult.PASSED" + str(response.last_test_result.test_result)=="TestResult.PASSED" ) # Create dataframe @@ -328,21 +328,21 @@ def get_test_case_results(self, retest_all=False): for result in response.results: # Results may not be in the same order as they went in # Process the name a bit to remove the /results/id part - test_case_id_full = "/".join(result.name.split("/")[:-2]) - test_case_id = test_case_id_full.split("/")[-1] + tc_id_full = "/".join(result.name.split("/")[:-2]) + tc_id = tc_id_full.split("/")[-1] - # Update dataframe where id = test_case_id_full - # row = test_case_df.loc[test_case_df['id']==test_case_id_full] + # Update dataframe where id = tc_id_full + # row = test_case_df.loc[test_case_df['id']==tc_id_full] test_case_df.loc[ - test_case_df["id"] == test_case_id_full, "short_id" - ] = test_case_id + test_case_df["id"] == tc_id_full, "short_id" + ] = tc_id test_case_df.loc[ - test_case_df["id"] == test_case_id_full, "test_result" + test_case_df["id"] == tc_id_full, "test_result" ] = str(result.test_result) test_case_df.loc[ - test_case_df["id"] == test_case_id_full, "test_time" + test_case_df["id"] == tc_id_full, "test_time" ] = result.test_time - test_case_df.loc[test_case_df["id"] == test_case_id_full, "passed"] = ( + test_case_df.loc[test_case_df["id"] == tc_id_full,"passed"] = ( str(result.test_result) == "TestResult.PASSED" ) From 6c925a51a29aff1432a706e3da1c012902e99875 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 16 Dec 2022 18:40:32 +0000 Subject: [PATCH 060/151] More minor linting --- src/dfcx_scrapi/tools/agent_checker_util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 7a5b3846..994c654a 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -194,6 +194,7 @@ def _get_intent_parameters(self, intent_name): for intent in self.intent_data: if intent.display_name == intent_name: return intent.parameters + return None def _get_page( self, @@ -329,7 +330,7 @@ def get_test_case_results(self, retest_all=False): # Results may not be in the same order as they went in # Process the name a bit to remove the /results/id part tc_id_full = "/".join(result.name.split("/")[:-2]) - tc_id = tc_id_full.split("/")[-1] + tc_id = tc_id_full.rsplit("/", maxsplit=1)[-1] # Update dataframe where id = tc_id_full # row = test_case_df.loc[test_case_df['id']==tc_id_full] From 8416c0a6a21e12221adad82f8c755a02f9b80ec8 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 16 Dec 2022 18:44:38 +0000 Subject: [PATCH 061/151] Use early return to remove one indentation layer in find_reachable_pages_rec_helper --- src/dfcx_scrapi/tools/agent_checker_util.py | 728 ++++++++++---------- 1 file changed, 363 insertions(+), 365 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 994c654a..5411aba0 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -383,403 +383,401 @@ def _find_reachable_pages_rec_helper( flow_name = self.flows_map[flow_id] target_page = route.target_page target_flow = route.target_flow - if ( - intent_route_limit is None - or not hasattr(route, "intent") - or route.intent == "" - or intent_route_count < intent_route_limit - ): - if hasattr(page, "form"): - for parameter in page.form.parameters: - parameter_name = parameter.display_name - # Need to also account for parameters being - # set by intents (or by webhooks...) - if ( - parameter_name not in presets - or presets[parameter_name] == "NULL" - ): - # This page has an unfilled parameter - if limit_intent_to_initial and not is_initial: - return - if hasattr(route, "intent") and route.intent != "": - if limit_intent_to_initial and not is_initial: - # Don't continue on this path - return - intent_route_count += 1 - if target_page in self.page_data[flow_id]: - page_name = self.page_data[flow_id][target_page].display_name + if hasattr(route, "intent") and route.intent != "": + return None + if intent_route_limit and intent_route_count < intent_route_limit: + return None + if hasattr(page, "form"): + for parameter in page.form.parameters: + parameter_name = parameter.display_name + # Need to also account for parameters being + # set by intents (or by webhooks...) + if ( + parameter_name not in presets + or presets[parameter_name] == "NULL" + ): + # This page has an unfilled parameter + if limit_intent_to_initial and not is_initial: + return + if hasattr(route, "intent") and route.intent != "": + if limit_intent_to_initial and not is_initial: + # Don't continue on this path + return + intent_route_count += 1 + if target_page in self.page_data[flow_id]: + page_name = self.page_data[flow_id][target_page].display_name + if verbose: + print(page.display_name, "->", page_name) + # Move to this page (this is also the recursion limiting step + # to prevent infinite loops) + if page_name not in reachable: + reachable.append(page_name) + min_intent_counts.append(intent_route_count) + conversation_path.append(page_name) if verbose: - print(page.display_name, "->", page_name) - # Move to this page (this is also the recursion limiting step - # to prevent infinite loops) - if page_name not in reachable: - reachable.append(page_name) - min_intent_counts.append(intent_route_count) - conversation_path.append(page_name) - if verbose: - print(conversation_path, intent_route_count) + print(conversation_path, intent_route_count) - new_presets = presets.copy() - if hasattr(page, "entry_fulfillment"): - if hasattr(page.entry_fulfillment, "set_parameter_actions"): - for ( - param_preset - ) in page.entry_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value - if hasattr(page, "form"): - for parameter in page.form.parameters: - if hasattr(parameter, "fill_behavior"): + new_presets = presets.copy() + if hasattr(page, "entry_fulfillment"): + if hasattr(page.entry_fulfillment, "set_parameter_actions"): + for ( + param_preset + ) in page.entry_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value + if hasattr(page, "form"): + for parameter in page.form.parameters: + if hasattr(parameter, "fill_behavior"): + if hasattr( + parameter.fill_behavior, + "initial_prompt_fulfillment", + ): if hasattr( - parameter.fill_behavior, - "initial_prompt_fulfillment", + parameter.fill_behavior.initial_prompt_fulfillment, + "set_parameter_actions", ): - if hasattr( - parameter.fill_behavior.initial_prompt_fulfillment, - "set_parameter_actions", + for ( + param_preset + ) in ( + parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions ): - for ( - param_preset - ) in ( - parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions - ): - new_presets[ - param_preset.parameter - ] = param_preset.value - if hasattr(route, "trigger_fulfillment"): - if hasattr(route.trigger_fulfillment, "set_parameter_actions"): - for ( - param_preset - ) in route.trigger_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value + new_presets[ + param_preset.parameter + ] = param_preset.value + if hasattr(route, "trigger_fulfillment"): + if hasattr(route.trigger_fulfillment, "set_parameter_actions"): + for ( + param_preset + ) in route.trigger_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value - if hasattr(route, "intent") and route.intent != "": - # Check the entities annotated on this intent - intent_name = self.intents_map[route.intent] - intent_params = self._get_intent_parameters(intent_name) - for param in intent_params: - new_presets[ - param.id - ] = f"(potentially set by {intent_name})" + if hasattr(route, "intent") and route.intent != "": + # Check the entities annotated on this intent + intent_name = self.intents_map[route.intent] + intent_params = self._get_intent_parameters(intent_name) + for param in intent_params: + new_presets[ + param.id + ] = f"(potentially set by {intent_name})" - self._find_reachable_pages_rec( - flow_id, - flow_name, - self.page_data[flow_id][target_page], - reachable, - conversation_path, - min_intent_counts, - new_presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=False, - include_meta=include_meta, - verbose=verbose, - ) - conversation_path.pop(-1) - elif ( - page_name in reachable - and intent_route_count - < min_intent_counts[reachable.index(page_name)] - ): - # Better route found, traverse from here - min_intent_counts[reachable.index(page_name)] = intent_route_count - conversation_path.append(page_name) - if verbose: - print(conversation_path, intent_route_count) + self._find_reachable_pages_rec( + flow_id, + flow_name, + self.page_data[flow_id][target_page], + reachable, + conversation_path, + min_intent_counts, + new_presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=False, + include_meta=include_meta, + verbose=verbose, + ) + conversation_path.pop(-1) + elif ( + page_name in reachable + and intent_route_count + < min_intent_counts[reachable.index(page_name)] + ): + # Better route found, traverse from here + min_intent_counts[reachable.index(page_name)] = intent_route_count + conversation_path.append(page_name) + if verbose: + print(conversation_path, intent_route_count) - new_presets = presets.copy() - if hasattr(page, "entry_fulfillment"): - if hasattr(page.entry_fulfillment, "set_parameter_actions"): - for ( - param_preset - ) in page.entry_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value - if hasattr(page, "form"): - for parameter in page.form.parameters: - if hasattr(parameter, "fill_behavior"): + new_presets = presets.copy() + if hasattr(page, "entry_fulfillment"): + if hasattr(page.entry_fulfillment, "set_parameter_actions"): + for ( + param_preset + ) in page.entry_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value + if hasattr(page, "form"): + for parameter in page.form.parameters: + if hasattr(parameter, "fill_behavior"): + if hasattr( + parameter.fill_behavior, + "initial_prompt_fulfillment", + ): if hasattr( - parameter.fill_behavior, - "initial_prompt_fulfillment", + parameter.fill_behavior.initial_prompt_fulfillment, + "set_parameter_actions", ): - if hasattr( - parameter.fill_behavior.initial_prompt_fulfillment, - "set_parameter_actions", + for ( + param_preset + ) in ( + parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions ): - for ( - param_preset - ) in ( - parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions - ): - new_presets[ - param_preset.parameter - ] = param_preset.value - if hasattr(route, "trigger_fulfillment"): - if hasattr(route.trigger_fulfillment, "set_parameter_actions"): - for ( - param_preset - ) in route.trigger_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value + new_presets[ + param_preset.parameter + ] = param_preset.value + if hasattr(route, "trigger_fulfillment"): + if hasattr(route.trigger_fulfillment, "set_parameter_actions"): + for ( + param_preset + ) in route.trigger_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value - if hasattr(route, "intent") and route.intent != "": - # Check the entities annotated on this intent - intent_name = self.intents_map[route.intent] - intent_params = self._get_intent_parameters(intent_name) - for param in intent_params: - new_presets[ - param.id - ] = f"(potentially set by {intent_name})" + if hasattr(route, "intent") and route.intent != "": + # Check the entities annotated on this intent + intent_name = self.intents_map[route.intent] + intent_params = self._get_intent_parameters(intent_name) + for param in intent_params: + new_presets[ + param.id + ] = f"(potentially set by {intent_name})" - self._find_reachable_pages_rec( - flow_id, - flow_name, - self.page_data[flow_id][target_page], - reachable, - conversation_path, - min_intent_counts, - new_presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=False, - include_meta=include_meta, - verbose=verbose, - ) - conversation_path.pop(-1) - elif "END_FLOW" in target_page: - if verbose: - print(page.display_name, "-> END FLOW") - if include_meta: - page_name = "END FLOW" - if page_name not in reachable: - reachable.append(page_name) - min_intent_counts.append(intent_route_count) - elif ( - page_name in reachable - and intent_route_count - < min_intent_counts[reachable.index(page_name)] - ): - min_intent_counts[ - reachable.index(page_name) - ] = intent_route_count - # reachable.append('END FLOW') - elif "END_SESSION" in target_page: - if verbose: - print(page.display_name, "-> END SESSION") - if include_meta: - page_name = "END SESSION" - if page_name not in reachable: - reachable.append(page_name) - min_intent_counts.append(intent_route_count) - elif ( - page_name in reachable - and intent_route_count - < min_intent_counts[reachable.index(page_name)] - ): - min_intent_counts[ - reachable.index(page_name) - ] = intent_route_count - # reachable.append('END SESSION') - elif "CURRENT_PAGE" in target_page: - if verbose: - print(page.display_name, "-> CURRENT PAGE") - page_name = page.display_name - if ( + self._find_reachable_pages_rec( + flow_id, + flow_name, + self.page_data[flow_id][target_page], + reachable, + conversation_path, + min_intent_counts, + new_presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=False, + include_meta=include_meta, + verbose=verbose, + ) + conversation_path.pop(-1) + elif "END_FLOW" in target_page: + if verbose: + print(page.display_name, "-> END FLOW") + if include_meta: + page_name = "END FLOW" + if page_name not in reachable: + reachable.append(page_name) + min_intent_counts.append(intent_route_count) + elif ( page_name in reachable and intent_route_count < min_intent_counts[reachable.index(page_name)] ): - min_intent_counts[reachable.index(page_name)] = intent_route_count - elif "PREVIOUS_PAGE" in target_page: - if verbose: - print(page.display_name, "-> PREVIOUS PAGE") - if include_meta: - page_name = "PREVIOUS PAGE" - if page_name not in reachable: - reachable.append(page_name) - min_intent_counts.append(intent_route_count) - elif ( - page_name in reachable - and intent_route_count - < min_intent_counts[reachable.index(page_name)] - ): - min_intent_counts[ - reachable.index(page_name) - ] = intent_route_count - # TODO: This could cause huge problems... - elif "START_PAGE" in target_page: - if verbose: - print(page.display_name, "-> START PAGE") - page_name = "Start" + min_intent_counts[ + reachable.index(page_name) + ] = intent_route_count + # reachable.append('END FLOW') + elif "END_SESSION" in target_page: + if verbose: + print(page.display_name, "-> END SESSION") + if include_meta: + page_name = "END SESSION" + if page_name not in reachable: + reachable.append(page_name) + min_intent_counts.append(intent_route_count) + elif ( + page_name in reachable + and intent_route_count + < min_intent_counts[reachable.index(page_name)] + ): + min_intent_counts[ + reachable.index(page_name) + ] = intent_route_count + # reachable.append('END SESSION') + elif "CURRENT_PAGE" in target_page: + if verbose: + print(page.display_name, "-> CURRENT PAGE") + page_name = page.display_name + if ( + page_name in reachable + and intent_route_count + < min_intent_counts[reachable.index(page_name)] + ): + min_intent_counts[reachable.index(page_name)] = intent_route_count + elif "PREVIOUS_PAGE" in target_page: + if verbose: + print(page.display_name, "-> PREVIOUS PAGE") + if include_meta: + page_name = "PREVIOUS PAGE" if page_name not in reachable: reachable.append(page_name) min_intent_counts.append(intent_route_count) - conversation_path.append(page_name) - if verbose: - print(conversation_path, intent_route_count) - - new_presets = presets.copy() - if hasattr(page, "entry_fulfillment"): - if hasattr(page.entry_fulfillment, "set_parameter_actions"): - for ( - param_preset - ) in page.entry_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value - if hasattr(page, "form"): - for parameter in page.form.parameters: - if hasattr(parameter, "fill_behavior"): - if hasattr( - parameter.fill_behavior, - "initial_prompt_fulfillment", - ): - if hasattr( - parameter.fill_behavior.initial_prompt_fulfillment, - "set_parameter_actions", - ): - for ( - param_preset - ) in ( - parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions - ): - new_presets[ - param_preset.parameter - ] = param_preset.value - if hasattr(route, "trigger_fulfillment"): - if hasattr(route.trigger_fulfillment, "set_parameter_actions"): - for ( - param_preset - ) in route.trigger_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value - - if hasattr(route, "intent") and route.intent != "": - # Check the entities annotated on this intent - intent_name = self.intents_map[route.intent] - intent_params = self._get_intent_parameters(intent_name) - for param in intent_params: - new_presets[ - param.id - ] = f"(potentially set by {intent_name})" - - self._find_reachable_pages_rec( - flow_id, - flow_name, - self.flow_data[flow_id], - reachable, - conversation_path, - min_intent_counts, - new_presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=False, - include_meta=include_meta, - verbose=verbose, - ) - conversation_path.pop(-1) elif ( page_name in reachable and intent_route_count < min_intent_counts[reachable.index(page_name)] ): - # Better route found, traverse from here - min_intent_counts[reachable.index(page_name)] = intent_route_count - conversation_path.append(page_name) - if verbose: - print(conversation_path, intent_route_count) + min_intent_counts[ + reachable.index(page_name) + ] = intent_route_count + # TODO: This could cause huge problems... + elif "START_PAGE" in target_page: + if verbose: + print(page.display_name, "-> START PAGE") + page_name = "Start" + if page_name not in reachable: + reachable.append(page_name) + min_intent_counts.append(intent_route_count) + conversation_path.append(page_name) + if verbose: + print(conversation_path, intent_route_count) - new_presets = presets.copy() - if hasattr(page, "entry_fulfillment"): - if hasattr(page.entry_fulfillment, "set_parameter_actions"): - for ( - param_preset - ) in page.entry_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value - if hasattr(page, "form"): - for parameter in page.form.parameters: - if hasattr(parameter, "fill_behavior"): + new_presets = presets.copy() + if hasattr(page, "entry_fulfillment"): + if hasattr(page.entry_fulfillment, "set_parameter_actions"): + for ( + param_preset + ) in page.entry_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value + if hasattr(page, "form"): + for parameter in page.form.parameters: + if hasattr(parameter, "fill_behavior"): + if hasattr( + parameter.fill_behavior, + "initial_prompt_fulfillment", + ): if hasattr( - parameter.fill_behavior, - "initial_prompt_fulfillment", + parameter.fill_behavior.initial_prompt_fulfillment, + "set_parameter_actions", ): - if hasattr( - parameter.fill_behavior.initial_prompt_fulfillment, - "set_parameter_actions", + for ( + param_preset + ) in ( + parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions ): - for ( - param_preset - ) in ( - parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions - ): - new_presets[ - param_preset.parameter - ] = param_preset.value - if hasattr(route, "trigger_fulfillment"): - if hasattr(route.trigger_fulfillment, "set_parameter_actions"): - for ( - param_preset - ) in route.trigger_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value + new_presets[ + param_preset.parameter + ] = param_preset.value + if hasattr(route, "trigger_fulfillment"): + if hasattr(route.trigger_fulfillment, "set_parameter_actions"): + for ( + param_preset + ) in route.trigger_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value - if hasattr(route, "intent") and route.intent != "": - # Check the entities annotated on this intent - intent_name = self.intents_map[route.intent] - intent_params = self._get_intent_parameters(intent_name) - for param in intent_params: - new_presets[ - param.id - ] = f"(potentially set by {intent_name})" + if hasattr(route, "intent") and route.intent != "": + # Check the entities annotated on this intent + intent_name = self.intents_map[route.intent] + intent_params = self._get_intent_parameters(intent_name) + for param in intent_params: + new_presets[ + param.id + ] = f"(potentially set by {intent_name})" - self._find_reachable_pages_rec( - flow_id, - flow_name, - self.flow_data[flow_id], - reachable, - conversation_path, - min_intent_counts, - new_presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=False, - include_meta=include_meta, - verbose=verbose, - ) - conversation_path.pop(-1) - elif len(target_page) > 0: - print(page.display_name, "->", target_page) - # This should not happen, and if it does it needs to be fixed - input() - elif len(target_flow) > 0: - flow_name = self.flows_map[route.target_flow] - if verbose: - print(page.display_name, "->", flow_name) - if flow_name not in reachable: - reachable.append(flow_name) - min_intent_counts.append(intent_route_count) - elif ( - flow_name in reachable - and intent_route_count - < min_intent_counts[reachable.index(flow_name)] - ): - min_intent_counts[reachable.index(flow_name)] = intent_route_count - else: + self._find_reachable_pages_rec( + flow_id, + flow_name, + self.flow_data[flow_id], + reachable, + conversation_path, + min_intent_counts, + new_presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=False, + include_meta=include_meta, + verbose=verbose, + ) + conversation_path.pop(-1) + elif ( + page_name in reachable + and intent_route_count + < min_intent_counts[reachable.index(page_name)] + ): + # Better route found, traverse from here + min_intent_counts[reachable.index(page_name)] = intent_route_count + conversation_path.append(page_name) if verbose: - print(page.display_name, "->", route.target_flow, "(empty)") - page_name = page.display_name - if ( - page_name in reachable - and intent_route_count - < min_intent_counts[reachable.index(page_name)] - ): - min_intent_counts[reachable.index(page_name)] = intent_route_count + print(conversation_path, intent_route_count) + + new_presets = presets.copy() + if hasattr(page, "entry_fulfillment"): + if hasattr(page.entry_fulfillment, "set_parameter_actions"): + for ( + param_preset + ) in page.entry_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value + if hasattr(page, "form"): + for parameter in page.form.parameters: + if hasattr(parameter, "fill_behavior"): + if hasattr( + parameter.fill_behavior, + "initial_prompt_fulfillment", + ): + if hasattr( + parameter.fill_behavior.initial_prompt_fulfillment, + "set_parameter_actions", + ): + for ( + param_preset + ) in ( + parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions + ): + new_presets[ + param_preset.parameter + ] = param_preset.value + if hasattr(route, "trigger_fulfillment"): + if hasattr(route.trigger_fulfillment, "set_parameter_actions"): + for ( + param_preset + ) in route.trigger_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value + + if hasattr(route, "intent") and route.intent != "": + # Check the entities annotated on this intent + intent_name = self.intents_map[route.intent] + intent_params = self._get_intent_parameters(intent_name) + for param in intent_params: + new_presets[ + param.id + ] = f"(potentially set by {intent_name})" + + self._find_reachable_pages_rec( + flow_id, + flow_name, + self.flow_data[flow_id], + reachable, + conversation_path, + min_intent_counts, + new_presets, + intent_route_count=intent_route_count, + intent_route_limit=intent_route_limit, + include_groups=include_groups, + include_start_page_routes=include_start_page_routes, + limit_intent_to_initial=limit_intent_to_initial, + is_initial=False, + include_meta=include_meta, + verbose=verbose, + ) + conversation_path.pop(-1) + elif len(target_page) > 0: + print(page.display_name, "->", target_page) + # This should not happen, and if it does it needs to be fixed + input() + elif len(target_flow) > 0: + flow_name = self.flows_map[route.target_flow] + if verbose: + print(page.display_name, "->", flow_name) + if flow_name not in reachable: + reachable.append(flow_name) + min_intent_counts.append(intent_route_count) + elif ( + flow_name in reachable + and intent_route_count + < min_intent_counts[reachable.index(flow_name)] + ): + min_intent_counts[reachable.index(flow_name)] = intent_route_count + else: + if verbose: + print(page.display_name, "->", route.target_flow, "(empty)") + page_name = page.display_name + if ( + page_name in reachable + and intent_route_count + < min_intent_counts[reachable.index(page_name)] + ): + min_intent_counts[reachable.index(page_name)] = intent_route_count def _find_reachable_pages_rec( self, From 6c4fecd9f0eb9192557468313ba534b7a4baac19 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 16 Dec 2022 20:40:37 +0000 Subject: [PATCH 062/151] Clean up repeated code with new function get_new_presets --- src/dfcx_scrapi/tools/agent_checker_util.py | 217 +++++--------------- 1 file changed, 50 insertions(+), 167 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 5411aba0..8fcd1eb7 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -384,10 +384,10 @@ def _find_reachable_pages_rec_helper( target_page = route.target_page target_flow = route.target_flow if hasattr(route, "intent") and route.intent != "": - return None + return if intent_route_limit and intent_route_count < intent_route_limit: - return None - if hasattr(page, "form"): + return + if hasattr(page, "form") and page.form: for parameter in page.form.parameters: parameter_name = parameter.display_name # Need to also account for parameters being @@ -417,47 +417,7 @@ def _find_reachable_pages_rec_helper( if verbose: print(conversation_path, intent_route_count) - new_presets = presets.copy() - if hasattr(page, "entry_fulfillment"): - if hasattr(page.entry_fulfillment, "set_parameter_actions"): - for ( - param_preset - ) in page.entry_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value - if hasattr(page, "form"): - for parameter in page.form.parameters: - if hasattr(parameter, "fill_behavior"): - if hasattr( - parameter.fill_behavior, - "initial_prompt_fulfillment", - ): - if hasattr( - parameter.fill_behavior.initial_prompt_fulfillment, - "set_parameter_actions", - ): - for ( - param_preset - ) in ( - parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions - ): - new_presets[ - param_preset.parameter - ] = param_preset.value - if hasattr(route, "trigger_fulfillment"): - if hasattr(route.trigger_fulfillment, "set_parameter_actions"): - for ( - param_preset - ) in route.trigger_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value - - if hasattr(route, "intent") and route.intent != "": - # Check the entities annotated on this intent - intent_name = self.intents_map[route.intent] - intent_params = self._get_intent_parameters(intent_name) - for param in intent_params: - new_presets[ - param.id - ] = f"(potentially set by {intent_name})" + new_presets = self._get_new_presets(presets, page, route) self._find_reachable_pages_rec( flow_id, @@ -488,47 +448,7 @@ def _find_reachable_pages_rec_helper( if verbose: print(conversation_path, intent_route_count) - new_presets = presets.copy() - if hasattr(page, "entry_fulfillment"): - if hasattr(page.entry_fulfillment, "set_parameter_actions"): - for ( - param_preset - ) in page.entry_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value - if hasattr(page, "form"): - for parameter in page.form.parameters: - if hasattr(parameter, "fill_behavior"): - if hasattr( - parameter.fill_behavior, - "initial_prompt_fulfillment", - ): - if hasattr( - parameter.fill_behavior.initial_prompt_fulfillment, - "set_parameter_actions", - ): - for ( - param_preset - ) in ( - parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions - ): - new_presets[ - param_preset.parameter - ] = param_preset.value - if hasattr(route, "trigger_fulfillment"): - if hasattr(route.trigger_fulfillment, "set_parameter_actions"): - for ( - param_preset - ) in route.trigger_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value - - if hasattr(route, "intent") and route.intent != "": - # Check the entities annotated on this intent - intent_name = self.intents_map[route.intent] - intent_params = self._get_intent_parameters(intent_name) - for param in intent_params: - new_presets[ - param.id - ] = f"(potentially set by {intent_name})" + new_presets = self._get_new_presets(presets, page, route) self._find_reachable_pages_rec( flow_id, @@ -591,6 +511,7 @@ def _find_reachable_pages_rec_helper( and intent_route_count < min_intent_counts[reachable.index(page_name)] ): + # TODO: barely too long min_intent_counts[reachable.index(page_name)] = intent_route_count elif "PREVIOUS_PAGE" in target_page: if verbose: @@ -620,47 +541,7 @@ def _find_reachable_pages_rec_helper( if verbose: print(conversation_path, intent_route_count) - new_presets = presets.copy() - if hasattr(page, "entry_fulfillment"): - if hasattr(page.entry_fulfillment, "set_parameter_actions"): - for ( - param_preset - ) in page.entry_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value - if hasattr(page, "form"): - for parameter in page.form.parameters: - if hasattr(parameter, "fill_behavior"): - if hasattr( - parameter.fill_behavior, - "initial_prompt_fulfillment", - ): - if hasattr( - parameter.fill_behavior.initial_prompt_fulfillment, - "set_parameter_actions", - ): - for ( - param_preset - ) in ( - parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions - ): - new_presets[ - param_preset.parameter - ] = param_preset.value - if hasattr(route, "trigger_fulfillment"): - if hasattr(route.trigger_fulfillment, "set_parameter_actions"): - for ( - param_preset - ) in route.trigger_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value - - if hasattr(route, "intent") and route.intent != "": - # Check the entities annotated on this intent - intent_name = self.intents_map[route.intent] - intent_params = self._get_intent_parameters(intent_name) - for param in intent_params: - new_presets[ - param.id - ] = f"(potentially set by {intent_name})" + new_presets = self._get_new_presets(presets, page, route) self._find_reachable_pages_rec( flow_id, @@ -691,47 +572,7 @@ def _find_reachable_pages_rec_helper( if verbose: print(conversation_path, intent_route_count) - new_presets = presets.copy() - if hasattr(page, "entry_fulfillment"): - if hasattr(page.entry_fulfillment, "set_parameter_actions"): - for ( - param_preset - ) in page.entry_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value - if hasattr(page, "form"): - for parameter in page.form.parameters: - if hasattr(parameter, "fill_behavior"): - if hasattr( - parameter.fill_behavior, - "initial_prompt_fulfillment", - ): - if hasattr( - parameter.fill_behavior.initial_prompt_fulfillment, - "set_parameter_actions", - ): - for ( - param_preset - ) in ( - parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions - ): - new_presets[ - param_preset.parameter - ] = param_preset.value - if hasattr(route, "trigger_fulfillment"): - if hasattr(route.trigger_fulfillment, "set_parameter_actions"): - for ( - param_preset - ) in route.trigger_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value - - if hasattr(route, "intent") and route.intent != "": - # Check the entities annotated on this intent - intent_name = self.intents_map[route.intent] - intent_params = self._get_intent_parameters(intent_name) - for param in intent_params: - new_presets[ - param.id - ] = f"(potentially set by {intent_name})" + new_presets = self._get_new_presets(presets, page, route) self._find_reachable_pages_rec( flow_id, @@ -767,6 +608,7 @@ def _find_reachable_pages_rec_helper( and intent_route_count < min_intent_counts[reachable.index(flow_name)] ): + # TODO: barely too long min_intent_counts[reachable.index(flow_name)] = intent_route_count else: if verbose: @@ -777,8 +619,49 @@ def _find_reachable_pages_rec_helper( and intent_route_count < min_intent_counts[reachable.index(page_name)] ): + # TODO: barely too long min_intent_counts[reachable.index(page_name)] = intent_route_count + def _get_new_presets(self, presets, page, route): + new_presets = presets.copy() + if hasattr(page, "entry_fulfillment"): + if hasattr(page.entry_fulfillment, "set_parameter_actions"): + for ( + param_preset + ) in page.entry_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value + if hasattr(page, "form"): + for parameter in page.form.parameters: + if (hasattr(parameter, "fill_behavior") + and hasattr(parameter.fill_behavior, "initial_prompt_fulfillment")): + if hasattr( + parameter.fill_behavior.initial_prompt_fulfillment, + "set_parameter_actions", + ): + for ( + param_preset + ) in ( + parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions + ): + new_presets[ + param_preset.parameter + ] = param_preset.value + if hasattr(route, "trigger_fulfillment"): + if hasattr(route.trigger_fulfillment, "set_parameter_actions"): + for ( + param_preset + ) in route.trigger_fulfillment.set_parameter_actions: + new_presets[param_preset.parameter] = param_preset.value + if hasattr(route, "intent") and route.intent != "": + # Check the entities annotated on this intent + intent_name = self.intents_map[route.intent] + intent_params = self._get_intent_parameters(intent_name) + for param in intent_params: + new_presets[ + param.id + ] = f"(potentially set by {intent_name})" + return new_presets + def _find_reachable_pages_rec( self, flow_id: str, From 3af4212e216fa8d2cc75396a5fb261b0b470e465 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 16 Dec 2022 20:47:47 +0000 Subject: [PATCH 063/151] Lint fixes --- src/dfcx_scrapi/tools/agent_checker_util.py | 42 ++++++++++----------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 8fcd1eb7..c1a56979 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -116,9 +116,9 @@ def __init__( flow_id=flow_id, reverse=True ) self.route_groups_map = {} - for flow_id in self.flows_map.keys(): - self.route_groups_map[flow_id] = self.route_groups.get_route_groups_map( - flow_id=flow_id + for fid in self.flows_map.keys(): + self.route_groups_map[fid] = self.route_groups.get_route_groups_map( + flow_id=fid ) # Get intent, flow, and page data @@ -443,7 +443,7 @@ def _find_reachable_pages_rec_helper( < min_intent_counts[reachable.index(page_name)] ): # Better route found, traverse from here - min_intent_counts[reachable.index(page_name)] = intent_route_count + min_intent_counts[reachable.index(page_name)]=intent_route_count conversation_path.append(page_name) if verbose: print(conversation_path, intent_route_count) @@ -511,8 +511,7 @@ def _find_reachable_pages_rec_helper( and intent_route_count < min_intent_counts[reachable.index(page_name)] ): - # TODO: barely too long - min_intent_counts[reachable.index(page_name)] = intent_route_count + min_intent_counts[reachable.index(page_name)]=intent_route_count elif "PREVIOUS_PAGE" in target_page: if verbose: print(page.display_name, "-> PREVIOUS PAGE") @@ -567,7 +566,7 @@ def _find_reachable_pages_rec_helper( < min_intent_counts[reachable.index(page_name)] ): # Better route found, traverse from here - min_intent_counts[reachable.index(page_name)] = intent_route_count + min_intent_counts[reachable.index(page_name)]=intent_route_count conversation_path.append(page_name) if verbose: print(conversation_path, intent_route_count) @@ -608,8 +607,7 @@ def _find_reachable_pages_rec_helper( and intent_route_count < min_intent_counts[reachable.index(flow_name)] ): - # TODO: barely too long - min_intent_counts[reachable.index(flow_name)] = intent_route_count + min_intent_counts[reachable.index(flow_name)]=intent_route_count else: if verbose: print(page.display_name, "->", route.target_flow, "(empty)") @@ -619,8 +617,7 @@ def _find_reachable_pages_rec_helper( and intent_route_count < min_intent_counts[reachable.index(page_name)] ): - # TODO: barely too long - min_intent_counts[reachable.index(page_name)] = intent_route_count + min_intent_counts[reachable.index(page_name)]=intent_route_count def _get_new_presets(self, presets, page, route): new_presets = presets.copy() @@ -633,19 +630,20 @@ def _get_new_presets(self, presets, page, route): if hasattr(page, "form"): for parameter in page.form.parameters: if (hasattr(parameter, "fill_behavior") - and hasattr(parameter.fill_behavior, "initial_prompt_fulfillment")): - if hasattr( + and hasattr( + parameter.fill_behavior, + "initial_prompt_fulfillment", + ) + and hasattr( parameter.fill_behavior.initial_prompt_fulfillment, "set_parameter_actions", - ): - for ( - param_preset - ) in ( - parameter.fill_behavior.initial_prompt_fulfillment.set_parameter_actions - ): - new_presets[ - param_preset.parameter - ] = param_preset.value + ) + ): + ipf = parameter.fill_behavior.initial_prompt_fulfillment + for param_preset in ipf.set_parameter_actions: + new_presets[ + param_preset.parameter + ] = param_preset.value if hasattr(route, "trigger_fulfillment"): if hasattr(route.trigger_fulfillment, "set_parameter_actions"): for ( From 45a5dff834336e406d4f9619e2cf886cd4bc3142 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 16 Dec 2022 20:57:09 +0000 Subject: [PATCH 064/151] Debug data mapping --- src/dfcx_scrapi/tools/agent_checker_util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index c1a56979..37b12606 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -136,14 +136,14 @@ def get_all_flow_data(self): def get_all_page_data(self): page_data = {} - for flow_id in self.flows_map.values(): + for flow_id in self.flows_map.keys(): page_list = self.pages.list_pages(flow_id=flow_id) page_data[flow_id] = {page.name: page for page in page_list} return page_data def get_all_route_group_data(self): route_group_data = {} - for flow_id in self.flows_map.values(): + for flow_id in self.flows_map.keys(): group_list = self.route_groups.list_transition_route_groups( flow_id=flow_id ) From 6217286558414bc8798c2e6044d8401befb1bb6e Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 16 Dec 2022 21:02:19 +0000 Subject: [PATCH 065/151] Correct negation logic error --- src/dfcx_scrapi/tools/agent_checker_util.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 37b12606..3dc5d1cc 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -383,9 +383,10 @@ def _find_reachable_pages_rec_helper( flow_name = self.flows_map[flow_id] target_page = route.target_page target_flow = route.target_flow - if hasattr(route, "intent") and route.intent != "": - return - if intent_route_limit and intent_route_count < intent_route_limit: + if ( + hasattr(route, "intent") and route.intent != "" + and intent_route_limit and intent_route_count >= intent_route_limit + ): return if hasattr(page, "form") and page.form: for parameter in page.form.parameters: From 68165355488a4b8eb3640579e4dbf5793101222c Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 16 Dec 2022 21:04:20 +0000 Subject: [PATCH 066/151] Fix typo in function name --- src/dfcx_scrapi/tools/agent_checker_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 3dc5d1cc..5a9e2fba 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -794,7 +794,7 @@ def _find_reachable_pages_rec( verbose=verbose, ) - def _process_form_for_reachable_pages( + def _process_form_parameter_for_reachable_pages( self, flow_id: str, flow_name: str, From e997b519ff7f25c90a48de9c99955b44df66527b Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Fri, 16 Dec 2022 21:18:42 +0000 Subject: [PATCH 067/151] Add functions find_all_reachable_pages and find_all_unreachable_pages --- src/dfcx_scrapi/tools/agent_checker_util.py | 56 +++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 5a9e2fba..c58b55b9 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -1038,3 +1038,59 @@ def find_unreachable_pages( flow_id, flow_name, include_groups=include_groups, verbose=verbose ) return list(set(self.pages_map[flow_id].values()) - set(reachable)) + + def find_all_reachable_pages( + self, + include_groups: bool = True, + verbose: bool = False, + ): + """Gets a dataframe of all reachable pages in this agent + + Args: + include_groups: whether or not to consider route group routes + as being reachable. Defaults to True. + verbose: whether to display debug info in the agent structure + traversal. Defaults to False. + + Returns: + A dataframe with columns flow_name and page_name + """ + flow_names = [] + page_names = [] + for flow_id, flow_name in self.flows_map.items(): + reachable = self.find_reachable_pages( + flow_id=flow_id, + include_groups=include_groups, + verbose=verbose + ) + flow_names.extend([flow_name for _ in reachable]) + page_names.extend(reachable) + return pd.DataFrame({"flow_name": flow_names, "page_name": page_names}) + + def find_all_unreachable_pages( + self, + include_groups: bool = True, + verbose: bool = False, + ): + """Gets a dataframe of all unreachable pages in this agent + + Args: + include_groups: whether or not to consider route group routes + as being reachable. Defaults to True. + verbose: whether to display debug info in the agent structure + traversal. Defaults to False. + + Returns: + A dataframe with columns flow_name and page_name + """ + flow_names = [] + page_names = [] + for flow_id, flow_name in self.flows_map.items(): + unreachable = self.find_unreachable_pages( + flow_id=flow_id, + include_groups=include_groups, + verbose=verbose + ) + flow_names.extend([flow_name for _ in unreachable]) + page_names.extend(unreachable) + return pd.DataFrame({"flow_name": flow_names, "page_name": page_names}) From 743020cf1ce86aad3627e10bd910a7b5da32e1bc Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Wed, 28 Dec 2022 18:25:29 +0000 Subject: [PATCH 068/151] Create function find_reachable_intents --- src/dfcx_scrapi/tools/agent_checker_util.py | 99 +++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index c58b55b9..134ea7a4 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -1094,3 +1094,102 @@ def find_all_unreachable_pages( flow_names.extend([flow_name for _ in unreachable]) page_names.extend(unreachable) return pd.DataFrame({"flow_name": flow_names, "page_name": page_names}) + + def add_intents_from_routes(self, + transition_list: List[DFCXRoute], + intents: List[str], + routegroups: List[str], + route_group + ) -> None: + """Helper function which adds intents from routes to a list of intents + + Args: + transition_list, The list of transition routes + intents, The list of intent names + + Returns: + Nothing (appends to the intent list) + """ + for route in transition_list: + # Ignore empty intents (such as the true condition) + if len(route.intent) == 0: + continue + intent = self.intents_map[route.intent] + if intent not in intents: + intents.append(intent) + if route_group is not None: + routegroups.append(route_group.display_name) + else: + routegroups.append('') + + def get_page_intents(self, + flow_id: Optional[str] = None, + flow_name: Optional[str] = None, + page_id: Optional[str] = None, + page_name: Optional[str] = None, + include_groups: bool = True + ) -> List[str]: + """Get the list of intents for a given page of this flow. + + Args: + flow_id OR flow_name: The ID or name of the flow + page_id OR page_name: The ID or name of the page + include_groups (Optional): If true, intents from transition route + groups on the given page will be included + + Returns: + List of intent names + """ + page = self.get_page(flow_id=flow_id, flow_name=flow_name, + page_id=page_id, page_name=page_name) + + page_routegroups = [] + page_intents = [] + transition_list = page.transition_routes + self.add_intents_from_routes(transition_list, + page_intents, + page_routegroups, + None) + + # Get intents in transition route groups + if include_groups: + for route_group_id in page.transition_route_groups: + route_group = self.transition_route_groups[route_group_id] + self.add_intents_from_routes(route_group.transition_routes, + page_intents, + page_routegroups, + route_group) + + return pd.DataFrame({ + 'route group': page_routegroups, + 'intent': page_intents + }) + + def find_reachable_intents(self, + flow_name, + include_groups: bool = True + ) -> List[str]: + """Finds all intents which are on reachable pages, starting from the + start page of the given flow. + + Args: + flow_name: The name of the flow to check for reachable intents. + include_groups (Optional): If true, intents from transition route + groups will be included, but only if they are actually referenced + on some page. + + Returns: + The list of intents on reachable pages in this flow + """ + intents = set() + reachable_pages = self.find_reachable_pages( + flow_name=flow_name, + include_groups=include_groups) + for page_name in reachable_pages: + page_intents = set(self.get_page_intents( + flow_name=flow_name, + page_name=page_name, + include_groups=include_groups + )['intent']) + intents |= page_intents + return list(intents) From b6644dc41252cbacee92905c96d7cc57c5cef096 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Wed, 28 Dec 2022 18:49:33 +0000 Subject: [PATCH 069/151] Create functions find_all_(un)reachable_intents --- src/dfcx_scrapi/tools/agent_checker_util.py | 57 ++++++++++++++++++--- 1 file changed, 50 insertions(+), 7 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 134ea7a4..e2ede03b 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -1120,7 +1120,7 @@ def add_intents_from_routes(self, if route_group is not None: routegroups.append(route_group.display_name) else: - routegroups.append('') + routegroups.append("") def get_page_intents(self, flow_id: Optional[str] = None, @@ -1161,12 +1161,12 @@ def get_page_intents(self, route_group) return pd.DataFrame({ - 'route group': page_routegroups, - 'intent': page_intents + "route group": page_routegroups, + "intent": page_intents }) - def find_reachable_intents(self, - flow_name, + def find_reachable_intents(self, + flow_name, include_groups: bool = True ) -> List[str]: """Finds all intents which are on reachable pages, starting from the @@ -1188,8 +1188,51 @@ def find_reachable_intents(self, for page_name in reachable_pages: page_intents = set(self.get_page_intents( flow_name=flow_name, - page_name=page_name, + page_name=page_name, include_groups=include_groups - )['intent']) + )["intent"]) intents |= page_intents return list(intents) + + def find_all_reachable_intents(self) -> pd.DataFrame: + """Finds all intents referenced in the agent, across all flows, + and produces a dataframe listing which flows reference each intent. + + Returns: + A dataframe with columns + intent - the intent display name + flows - a list of flow display names that use this intent + """ + intents = {} + for flow_name in self.flows_map.values(): + flow_intents = self.find_reachable_intents(flow_name=flow_name, + include_groups=True) + for intent in flow_intents: + if intent in intents: + intents[intent].append(flow_name) + else: + intents[intent] = [flow_name] + # Also return the unreachable ones, because why not + return pd.DataFrame({ + "intent": intents.keys(), + "flows": intents.values() + }) + + def find_all_unreachable_intents(self) -> List[str]: + """Finds all unreachable intents, either because they are on + unreachable pages or they are unused in the agent. + + Returns: + A list of unreachable intent display names + """ + all_reachable_intents = set() + for flow_name in self.flows_map.values(): + flow_intents = self.find_reachable_intents(flow_name=flow_name, + include_groups=True) + all_reachable_intents |= set(flow_intents) + unreachable_intents = [] + for intent in self.intent_data: + if intent.display_name in all_reachable_intents: + continue + unreachable_intents.append(intent.display_name) + return unreachable_intents From 6646ac4d8db73486bb600a16cf185e2b9bb7980f Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Wed, 28 Dec 2022 18:52:41 +0000 Subject: [PATCH 070/151] Fix function name typo --- src/dfcx_scrapi/tools/agent_checker_util.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index e2ede03b..cf0c629c 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -1122,7 +1122,7 @@ def add_intents_from_routes(self, else: routegroups.append("") - def get_page_intents(self, + def _get_page_intents(self, flow_id: Optional[str] = None, flow_name: Optional[str] = None, page_id: Optional[str] = None, @@ -1140,7 +1140,7 @@ def get_page_intents(self, Returns: List of intent names """ - page = self.get_page(flow_id=flow_id, flow_name=flow_name, + page = self._get_page(flow_id=flow_id, flow_name=flow_name, page_id=page_id, page_name=page_name) page_routegroups = [] @@ -1186,7 +1186,7 @@ def find_reachable_intents(self, flow_name=flow_name, include_groups=include_groups) for page_name in reachable_pages: - page_intents = set(self.get_page_intents( + page_intents = set(self._get_page_intents( flow_name=flow_name, page_name=page_name, include_groups=include_groups From 268d2672c9139b9434a88c9295789a0fc6c93245 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Wed, 28 Dec 2022 19:05:04 +0000 Subject: [PATCH 071/151] Prevent checking transitions to other flows as if they are pages --- src/dfcx_scrapi/tools/agent_checker_util.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index cf0c629c..96570b1b 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -1186,11 +1186,12 @@ def find_reachable_intents(self, flow_name=flow_name, include_groups=include_groups) for page_name in reachable_pages: - page_intents = set(self._get_page_intents( - flow_name=flow_name, - page_name=page_name, - include_groups=include_groups - )["intent"]) + if page_name not in self.flows_map_rev: + page_intents = set(self._get_page_intents( + flow_name=flow_name, + page_name=page_name, + include_groups=include_groups + )["intent"]) intents |= page_intents return list(intents) @@ -1204,7 +1205,7 @@ def find_all_reachable_intents(self) -> pd.DataFrame: flows - a list of flow display names that use this intent """ intents = {} - for flow_name in self.flows_map.values(): + for flow_name in self.flows_map_rev: flow_intents = self.find_reachable_intents(flow_name=flow_name, include_groups=True) for intent in flow_intents: @@ -1226,7 +1227,7 @@ def find_all_unreachable_intents(self) -> List[str]: A list of unreachable intent display names """ all_reachable_intents = set() - for flow_name in self.flows_map.values(): + for flow_name in self.flows_map_rev: flow_intents = self.find_reachable_intents(flow_name=flow_name, include_groups=True) all_reachable_intents |= set(flow_intents) From cd46afcece66f643f608445629630dbe9dd6fd49 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Wed, 28 Dec 2022 19:09:10 +0000 Subject: [PATCH 072/151] Debug find_all_reachable_intents --- src/dfcx_scrapi/tools/agent_checker_util.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 96570b1b..f336d2ab 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -1151,10 +1151,13 @@ def _get_page_intents(self, page_routegroups, None) + if not flow_id: + flow_id = self.flows_map_rev[flow_name] + # Get intents in transition route groups if include_groups: for route_group_id in page.transition_route_groups: - route_group = self.transition_route_groups[route_group_id] + route_group = self.route_group_data[flow_id][route_group_id] self.add_intents_from_routes(route_group.transition_routes, page_intents, page_routegroups, @@ -1221,7 +1224,8 @@ def find_all_reachable_intents(self) -> pd.DataFrame: def find_all_unreachable_intents(self) -> List[str]: """Finds all unreachable intents, either because they are on - unreachable pages or they are unused in the agent. + unreachable pages or they are unused in the agent. Note that + Default Negative Intent will always show up here. Returns: A list of unreachable intent display names From 2b97bf0af6d677ca5d363ab2a81034441c0c84bc Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Thu, 29 Dec 2022 17:07:14 +0000 Subject: [PATCH 073/151] Move get_test_case_results_df to TestCases --- src/dfcx_scrapi/core/test_cases.py | 154 +++++++++++++++++++- src/dfcx_scrapi/tools/agent_checker_util.py | 109 -------------- 2 files changed, 153 insertions(+), 110 deletions(-) diff --git a/src/dfcx_scrapi/core/test_cases.py b/src/dfcx_scrapi/core/test_cases.py index 78976aea..0cfb04a4 100644 --- a/src/dfcx_scrapi/core/test_cases.py +++ b/src/dfcx_scrapi/core/test_cases.py @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. - +import pandas as pd import logging from typing import Dict, List @@ -22,7 +22,13 @@ from google.cloud.dialogflowcx_v3beta1 import types from google.protobuf import field_mask_pb2 +<<<<<<< HEAD from dfcx_scrapi.core import scrapi_base +======= +from dfcx_scrapi.core.scrapi_base import ScrapiBase +from dfcx_scrapi.core.flows import Flows +from dfcx_scrapi.core.pages import Pages +>>>>>>> Move get_test_case_results_df to TestCases # logging config logging.basicConfig( @@ -453,3 +459,149 @@ def calculate_coverage(self, coverage_type: int, agent_id: str = None): ) response = client.calculate_coverage(request) return response + + def _convert_flow(self, flow_id, flows_map): + """Gets a flow display name from a flow ID""" + if flow_id.split("/")[-1] == "-": + return "" + # flow_id_converted = str(agent_id) + '/flows/' + str(flow_id) + if flow_id in flows_map: + return flows_map[flow_id] + # TODO: Should throw error instead of returning default + return "Default Start Flow" + + # Note that flow id includes agent, normally... + def _convert_page(self, page_id, flow_id, pages_map): + """Gets a page display name from a page and flow ID""" + if page_id == "END_SESSION": + return "End Session" + elif page_id == "END_FLOW": + return "End Flow" + elif page_id == "START_PAGE": + return "Start" + page_id_converted = str(flow_id) + "/pages/" + str(page_id) + if flow_id in pages_map: + if page_id_converted in pages_map[flow_id]: + return pages_map[flow_id][page_id_converted] + else: + # TODO: Should throw error instead of returning default + return "Start" + print("Flow not found") + # TODO: Should throw error, but returning this probably will anyway + return "Invalid" + + def get_test_case_results_df(self, agent_id=None, retest_all=False): + """Gets the test case results for this agent, + and generates a dataframe with their details. + Any tests without a result will be run in a batch. + + Args: + agent_id: required only if not set when initializing this class + retest_all: if true, all test cases are re-run, + regardless of whether or not they had a result + + Returns: + DataFrame of test case results for this agent, with columns: + display_name, id, short_id (excluding agent ID), + tags (comma-separated string), creation_time, + start_flow, start_page, passed, test_time + """ + if agent_id: + self.agent_id = agent_id + + dfcx_flows = Flows(creds=self.creds, agent_id=self.agent_id) + dfcx_pages = Pages(creds=self.creds) + flows_map = dfcx_flows.get_flows_map(agent_id=self.agent_id) + pages_map = {} + for flow_id in flows_map.keys(): + pages_map[flow_id] = dfcx_pages.get_pages_map(flow_id=flow_id) + + test_case_results = self.list_test_cases(self.agent_id) + retest = [] + retest_names = [] + + display_names = [] + ids = [] + short_ids = [] + tags = [] + creation_times = [] + flows = [] + pages = [] + test_results = [] + test_times = [] + passed = [] + + for response in test_case_results: + # Collect untested cases to be retested + # (or all if retest_all is True) + if ( + retest_all + or str(response.last_test_result.test_result) + == "TestResult.TEST_RESULT_UNSPECIFIED" + ): + retest.append(response.name) + retest_names.append(response.display_name) + # Collect additional information for dataframe + display_names.append(response.display_name) + ids.append(response.name) + short_ids.append(response.name.split("/")[-1]) + tags.append(",".join(response.tags)) + creation_times.append(response.creation_time) + flows.append(self._convert_flow(response.test_config.flow, + flows_map) + ) + pages.append( + self._convert_page(response.test_config.page, + response.test_config.flow, + pages_map) + ) + test_results.append(str(response.last_test_result.test_result)) + test_times.append(response.last_test_result.test_time) + passed.append( + str(response.last_test_result.test_result)=="TestResult.PASSED" + ) + + # Create dataframe + test_case_df = pd.DataFrame( + { + "display_name": display_names, + "id": ids, + "short_id": short_ids, + "tags": tags, + "creation_time": creation_times, + "start_flow": flows, + "start_page": pages, + "test_result": test_results, + "passed": passed, + "test_time": test_times, + } + ) + + # Retest any that haven't been run yet + print("To retest:", len(retest)) + if len(retest) > 0: + response = self.batch_run_test_cases(retest, self.agent_id) + for result in response.results: + # Results may not be in the same order as they went in + # Process the name a bit to remove the /results/id part + tc_id_full = "/".join(result.name.split("/")[:-2]) + tc_id = tc_id_full.rsplit("/", maxsplit=1)[-1] + + # Update dataframe where id = tc_id_full + # row = test_case_df.loc[test_case_df['id']==tc_id_full] + test_case_df.loc[ + test_case_df["id"] == tc_id_full, "short_id" + ] = tc_id + test_case_df.loc[ + test_case_df["id"] == tc_id_full, "test_result" + ] = str(result.test_result) + test_case_df.loc[ + test_case_df["id"] == tc_id_full, "test_time" + ] = result.test_time + test_case_df.loc[test_case_df["id"] == tc_id_full,"passed"] = ( + str(result.test_result) == "TestResult.PASSED" + ) + + # This column is redundant, since we have passed (bool) + test_case_df = test_case_df.drop(columns=["test_result"]) + return test_case_df diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index f336d2ab..7f1a5d0b 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -244,115 +244,6 @@ def _get_page( else: return self.page_data[flow_id][page_id] - # Test case results - - # TODO: Should this function be in the base test_cases class, - # as get_test_case_results_df or something? - def get_test_case_results(self, retest_all=False): - """Gets the test case results for this agent, - and generates a dataframe with their details. - Any tests without a result will be run in a batch. - - Args: - retest_all: if true, all test cases are re-run, - regardless of whether or not they had a result - - Returns: - DataFrame of test case results for this agent, with columns: - display_name, id, short_id (excluding agent ID), - tags (comma-separated string), creation_time, - start_flow, start_page, passed, test_time - """ - test_case_results = self.test_cases.list_test_cases(self.agent_id) - retest = [] - retest_names = [] - - display_names = [] - ids = [] - short_ids = [] - tags = [] - creation_times = [] - flows = [] - pages = [] - test_results = [] - test_times = [] - passed = [] - - for response in test_case_results: - # Collect untested cases to be retested - # (or all if retest_all is True) - if ( - retest_all - or str(response.last_test_result.test_result) - == "TestResult.TEST_RESULT_UNSPECIFIED" - ): - retest.append(response.name) - retest_names.append(response.display_name) - # Collect additional information for dataframe - display_names.append(response.display_name) - ids.append(response.name) - short_ids.append(response.name.split("/")[-1]) - tags.append(",".join(response.tags)) - creation_times.append(response.creation_time) - flows.append(self._convert_flow(response.test_config.flow)) - pages.append( - self._convert_page(response.test_config.page, - response.test_config.flow) - ) - test_results.append(str(response.last_test_result.test_result)) - test_times.append(response.last_test_result.test_time) - passed.append( - str(response.last_test_result.test_result)=="TestResult.PASSED" - ) - - # Create dataframe - test_case_df = pd.DataFrame( - { - "display_name": display_names, - "id": ids, - "short_id": short_ids, - "tags": tags, - "creation_time": creation_times, - "start_flow": flows, - "start_page": pages, - "test_result": test_results, - "passed": passed, - "test_time": test_times, - } - ) - - # Retest any that haven't been run yet - print("To retest:", len(retest)) - if len(retest) > 0: - response = self.test_cases.batch_run_test_cases(retest, - self.agent_id) - for result in response.results: - # Results may not be in the same order as they went in - # Process the name a bit to remove the /results/id part - tc_id_full = "/".join(result.name.split("/")[:-2]) - tc_id = tc_id_full.rsplit("/", maxsplit=1)[-1] - - # Update dataframe where id = tc_id_full - # row = test_case_df.loc[test_case_df['id']==tc_id_full] - test_case_df.loc[ - test_case_df["id"] == tc_id_full, "short_id" - ] = tc_id - test_case_df.loc[ - test_case_df["id"] == tc_id_full, "test_result" - ] = str(result.test_result) - test_case_df.loc[ - test_case_df["id"] == tc_id_full, "test_time" - ] = result.test_time - test_case_df.loc[test_case_df["id"] == tc_id_full,"passed"] = ( - str(result.test_result) == "TestResult.PASSED" - ) - - # This column is redundant, since we have passed (bool) - test_case_df = test_case_df.drop(columns=["test_result"]) - return test_case_df - - # Test case comparison/report - # Changelogs # Reachable and unreachable pages From 40152c9e7ae4b31e40586e6aef049584bd5ea6ef Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Wed, 4 Jan 2023 22:28:15 +0000 Subject: [PATCH 074/151] Switch to parameter dictionary --- src/dfcx_scrapi/tools/agent_checker_util.py | 656 +++++++------------- 1 file changed, 210 insertions(+), 446 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 7f1a5d0b..a07c2722 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -250,266 +250,211 @@ def _get_page( def _find_reachable_pages_rec_helper( self, - flow_id: str, - flow_name: str, - page: DFCXPage | DFCXFlow, - route: DFCXRoute, - reachable: List[str], - conversation_path: List[str], - min_intent_counts: List[int], - presets: Dict[str, str], - intent_route_count: int = 0, - intent_route_limit: Optional[int] = None, - include_groups: bool = True, - include_start_page_routes: bool = True, - limit_intent_to_initial: bool = False, - is_initial: bool = False, - include_meta: bool = False, - verbose: bool = False, + params: Dict ) -> None: """Helper function for the recursion involved in finding reachable pages """ - if not flow_name: - flow_name = self.flows_map[flow_id] - target_page = route.target_page - target_flow = route.target_flow + if not params["flow_name"]: + params["flow_name"] = self.flows_map[params["flow_id"]] + target_page = params["route"].target_page + target_flow = params["route"].target_flow if ( - hasattr(route, "intent") and route.intent != "" - and intent_route_limit and intent_route_count >= intent_route_limit + hasattr(params["route"], "intent") and params["route"].intent != "" + and params["intent_route_limit"] and params["intent_route_count"] >= params["intent_route_limit"] ): return - if hasattr(page, "form") and page.form: - for parameter in page.form.parameters: + if hasattr(params["page"], "form") and params["page"].form: + for parameter in params["page"].form.parameters: parameter_name = parameter.display_name # Need to also account for parameters being # set by intents (or by webhooks...) if ( - parameter_name not in presets - or presets[parameter_name] == "NULL" + parameter_name not in params["presets"] + or params["presets"][parameter_name] == "NULL" ): # This page has an unfilled parameter - if limit_intent_to_initial and not is_initial: + if params["limit_intent_to_initial"] and not params["is_initial"]: return - if hasattr(route, "intent") and route.intent != "": - if limit_intent_to_initial and not is_initial: + if hasattr(params["route"], "intent") and params["route"].intent != "": + if params["limit_intent_to_initial"] and not params["is_initial"]: # Don't continue on this path return - intent_route_count += 1 - if target_page in self.page_data[flow_id]: - page_name = self.page_data[flow_id][target_page].display_name - if verbose: - print(page.display_name, "->", page_name) + params["intent_route_count"] += 1 + if target_page in self.page_data[params["flow_id"]]: + page_name = self.page_data[params["flow_id"]][target_page].display_name + if params["verbose"]: + print(params["page"].display_name, "->", page_name) # Move to this page (this is also the recursion limiting step # to prevent infinite loops) - if page_name not in reachable: - reachable.append(page_name) - min_intent_counts.append(intent_route_count) - conversation_path.append(page_name) - if verbose: - print(conversation_path, intent_route_count) - - new_presets = self._get_new_presets(presets, page, route) - - self._find_reachable_pages_rec( - flow_id, - flow_name, - self.page_data[flow_id][target_page], - reachable, - conversation_path, - min_intent_counts, - new_presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=False, - include_meta=include_meta, - verbose=verbose, - ) - conversation_path.pop(-1) + if page_name not in params["reachable"]: + params["reachable"].append(page_name) + params["min_intent_counts"].append(params["intent_route_count"]) + params["conversation_path"].append(page_name) + if params["verbose"]: + print(params["conversation_path"], params["intent_route_count"]) + + old_presets = params["presets"].copy() + new_presets = self._get_new_presets(params["presets"], params["page"], params["route"]) + params["page"] = self.page_data[params["flow_id"]][target_page] + params["presets"] = new_presets + + self._find_reachable_pages_rec(params) + + params["conversation_path"].pop(-1) + # pop presets since we can't do it if we're passing a params dict like this + params["presets"] = old_presets elif ( - page_name in reachable - and intent_route_count - < min_intent_counts[reachable.index(page_name)] + page_name in params["reachable"] + and params["intent_route_count"] + < params["min_intent_counts"][params["reachable"].index(page_name)] ): # Better route found, traverse from here - min_intent_counts[reachable.index(page_name)]=intent_route_count - conversation_path.append(page_name) - if verbose: - print(conversation_path, intent_route_count) - - new_presets = self._get_new_presets(presets, page, route) - - self._find_reachable_pages_rec( - flow_id, - flow_name, - self.page_data[flow_id][target_page], - reachable, - conversation_path, - min_intent_counts, - new_presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=False, - include_meta=include_meta, - verbose=verbose, - ) - conversation_path.pop(-1) + params["min_intent_counts"][params["reachable"].index(page_name)] = params["intent_route_count"] + params["conversation_path"].append(page_name) + if params["verbose"]: + print(params["conversation_path"], params["intent_route_count"]) + + old_presets = params["presets"].copy() + new_presets = self._get_new_presets(params["presets"], params["page"], params["route"]) + params["page"] = self.page_data[params["flow_id"]][target_page] + params["presets"] = new_presets + + self._find_reachable_pages_rec(params) + + params["conversation_path"].pop(-1) + # pop presets since we can't do it if we're passing a params dict like this + params["presets"] = old_presets elif "END_FLOW" in target_page: - if verbose: - print(page.display_name, "-> END FLOW") - if include_meta: + if params["verbose"]: + print(params["page"].display_name, "-> END FLOW") + if params["include_meta"]: page_name = "END FLOW" - if page_name not in reachable: - reachable.append(page_name) - min_intent_counts.append(intent_route_count) + if page_name not in params["reachable"]: + params["reachable"].append(page_name) + params["min_intent_counts"].append(params["intent_route_count"]) elif ( - page_name in reachable - and intent_route_count - < min_intent_counts[reachable.index(page_name)] + page_name in params["reachable"] + and params["intent_route_count"] + < params["min_intent_counts"][params["reachable"].index(page_name)] ): - min_intent_counts[ - reachable.index(page_name) - ] = intent_route_count + params["min_intent_counts"][ + params["reachable"].index(page_name) + ] = params["intent_route_count"] # reachable.append('END FLOW') elif "END_SESSION" in target_page: - if verbose: - print(page.display_name, "-> END SESSION") - if include_meta: + if params["verbose"]: + print(params["page"].display_name, "-> END SESSION") + if params["include_meta"]: page_name = "END SESSION" - if page_name not in reachable: - reachable.append(page_name) - min_intent_counts.append(intent_route_count) + if page_name not in params["reachable"]: + params["reachable"].append(page_name) + params["min_intent_counts"].append(params["intent_route_count"]) elif ( - page_name in reachable - and intent_route_count - < min_intent_counts[reachable.index(page_name)] + page_name in params["reachable"] + and params["intent_route_count"] + < params["min_intent_counts"][params["reachable"].index(page_name)] ): - min_intent_counts[ - reachable.index(page_name) - ] = intent_route_count + params["min_intent_counts"][ + params["reachable"].index(page_name) + ] = params["intent_route_count"] # reachable.append('END SESSION') elif "CURRENT_PAGE" in target_page: - if verbose: - print(page.display_name, "-> CURRENT PAGE") - page_name = page.display_name + if params["verbose"]: + print(params["page"].display_name, "-> CURRENT PAGE") + page_name = params["page"].display_name if ( - page_name in reachable - and intent_route_count - < min_intent_counts[reachable.index(page_name)] + page_name in params["reachable"] + and params["intent_route_count"] + < params["min_intent_counts"][params["reachable"].index(page_name)] ): - min_intent_counts[reachable.index(page_name)]=intent_route_count + params["min_intent_counts"][params["reachable"].index(page_name)] = params["intent_route_count"] elif "PREVIOUS_PAGE" in target_page: - if verbose: - print(page.display_name, "-> PREVIOUS PAGE") - if include_meta: + if params["verbose"]: + print(params["page"].display_name, "-> PREVIOUS PAGE") + if params["include_meta"]: page_name = "PREVIOUS PAGE" - if page_name not in reachable: - reachable.append(page_name) - min_intent_counts.append(intent_route_count) + if page_name not in params["reachable"]: + params["reachable"].append(page_name) + params["min_intent_counts"].append(params["intent_route_count"]) elif ( - page_name in reachable - and intent_route_count - < min_intent_counts[reachable.index(page_name)] + page_name in params["reachable"] + and params["intent_route_count"] + < params["min_intent_counts"][params["reachable"].index(page_name)] ): - min_intent_counts[ - reachable.index(page_name) - ] = intent_route_count + params["min_intent_counts"][ + params["reachable"].index(page_name) + ] = params["intent_route_count"] # TODO: This could cause huge problems... elif "START_PAGE" in target_page: - if verbose: - print(page.display_name, "-> START PAGE") + if params["verbose"]: + print(params["page"].display_name, "-> START PAGE") page_name = "Start" - if page_name not in reachable: - reachable.append(page_name) - min_intent_counts.append(intent_route_count) - conversation_path.append(page_name) - if verbose: - print(conversation_path, intent_route_count) - - new_presets = self._get_new_presets(presets, page, route) - - self._find_reachable_pages_rec( - flow_id, - flow_name, - self.flow_data[flow_id], - reachable, - conversation_path, - min_intent_counts, - new_presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=False, - include_meta=include_meta, - verbose=verbose, - ) - conversation_path.pop(-1) + if page_name not in params["reachable"]: + params["reachable"].append(page_name) + params["min_intent_counts"].append(params["intent_route_count"]) + params["conversation_path"].append(page_name) + if params["verbose"]: + print(params["conversation_path"], params["intent_route_count"]) + + old_presets = params["presets"].copy() + new_presets = self._get_new_presets(params["presets"], params["page"], params["route"]) + params["page"] = self.flow_data[params["flow_id"]] + params["presets"] = new_presets + + self._find_reachable_pages_rec(params) + + params["conversation_path"].pop(-1) + # pop presets since we can't do it if we're passing a params dict like this + params["presets"] = old_presets elif ( - page_name in reachable - and intent_route_count - < min_intent_counts[reachable.index(page_name)] + page_name in params["reachable"] + and params["intent_route_count"] + < params["min_intent_counts"][params["reachable"].index(page_name)] ): # Better route found, traverse from here - min_intent_counts[reachable.index(page_name)]=intent_route_count - conversation_path.append(page_name) - if verbose: - print(conversation_path, intent_route_count) - - new_presets = self._get_new_presets(presets, page, route) - - self._find_reachable_pages_rec( - flow_id, - flow_name, - self.flow_data[flow_id], - reachable, - conversation_path, - min_intent_counts, - new_presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=False, - include_meta=include_meta, - verbose=verbose, - ) - conversation_path.pop(-1) + params["min_intent_counts"][params["reachable"].index(page_name)] = params["intent_route_count"] + params["conversation_path"].append(page_name) + if params["verbose"]: + print(params["conversation_path"], params["intent_route_count"]) + + old_presets = params["presets"].copy() + new_presets = self._get_new_presets(params["presets"], params["page"], params["route"]) + params["page"] = self.flow_data[params["flow_id"]] + params["presets"] = new_presets + + self._find_reachable_pages_rec(params) + + params["conversation_path"].pop(-1) + # pop presets since we can't do it if we're passing a params dict like this + params["presets"] = old_presets elif len(target_page) > 0: - print(page.display_name, "->", target_page) + print(params["page"].display_name, "->", target_page) # This should not happen, and if it does it needs to be fixed input() elif len(target_flow) > 0: - flow_name = self.flows_map[route.target_flow] - if verbose: - print(page.display_name, "->", flow_name) - if flow_name not in reachable: - reachable.append(flow_name) - min_intent_counts.append(intent_route_count) + flow_name = self.flows_map[params["route"].target_flow] + if params["verbose"]: + print(params["page"].display_name, "->", flow_name) + if flow_name not in params["reachable"]: + params["reachable"].append(flow_name) + params["min_intent_counts"].append(params["intent_route_count"]) elif ( - flow_name in reachable - and intent_route_count - < min_intent_counts[reachable.index(flow_name)] + flow_name in params["reachable"] + and params["intent_route_count"] + < params["min_intent_counts"][params["reachable"].index(flow_name)] ): - min_intent_counts[reachable.index(flow_name)]=intent_route_count + params["min_intent_counts"][params["reachable"].index(flow_name)] = params["intent_route_count"] else: - if verbose: - print(page.display_name, "->", route.target_flow, "(empty)") - page_name = page.display_name + if params["verbose"]: + print(params["page"].display_name, "->", params["route"].target_flow, "(empty)") + page_name = params["page"].display_name if ( - page_name in reachable - and intent_route_count - < min_intent_counts[reachable.index(page_name)] + page_name in params["reachable"] + and params["intent_route_count"] + < params["min_intent_counts"][params["reachable"].index(page_name)] ): - min_intent_counts[reachable.index(page_name)]=intent_route_count + params["min_intent_counts"][params["reachable"].index(page_name)] = params["intent_route_count"] def _get_new_presets(self, presets, page, route): new_presets = presets.copy() @@ -554,264 +499,83 @@ def _get_new_presets(self, presets, page, route): def _find_reachable_pages_rec( self, - flow_id: str, - flow_name: str, - page: DFCXPage | DFCXFlow, - reachable: List[str], - conversation_path: List[str], - min_intent_counts: List[int], - presets: Dict[str, str], - intent_route_count: int = 0, - intent_route_limit: Optional[int] = None, - include_groups: bool = True, - include_start_page_routes: bool = True, - limit_intent_to_initial: bool = False, - is_initial: bool = False, - include_meta: bool = False, - verbose: bool = False, + params: Dict ) -> None: """Recursive function to find reachable pages within a given flow, starting at a particular page. Other parameters here are used for more general traversal, but not currently used.""" - if not flow_name: - flow_name = self.flows_map[flow_id] - if hasattr(page, "form") and page.form: - for parameter in page.form.parameters: + if not params["flow_name"]: + params["flow_name"] = self.flows_map[params["flow_id"]] + if hasattr(params["page"], "form") and params["page"].form: + for parameter in params["page"].form.parameters: self._process_form_parameter_for_reachable_pages( - flow_id, - flow_name, - page, - parameter, - reachable, - conversation_path, - min_intent_counts, - presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=is_initial, - include_meta=include_meta, - verbose=verbose, + params, + parameter ) - for event_handler in page.event_handlers: - if limit_intent_to_initial and not is_initial: + for event_handler in params["page"].event_handlers: + if params["limit_intent_to_initial"] and not params["is_initial"]: continue if hasattr(event_handler, "target_page") or hasattr( event_handler, "target_flow" ): - self._find_reachable_pages_rec_helper( - flow_id, - flow_name, - page, - event_handler, - reachable, - conversation_path, - min_intent_counts, - presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=is_initial, - include_meta=include_meta, - verbose=verbose, - ) - for route in page.transition_routes: - self._find_reachable_pages_rec_helper( - flow_id, - flow_name, - page, - route, - reachable, - conversation_path, - min_intent_counts, - presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=is_initial, - include_meta=include_meta, - verbose=verbose, - ) - if include_groups: - for route_group in page.transition_route_groups: + params["route"] = event_handler + self._find_reachable_pages_rec_helper(params) + for route in params["page"].transition_routes: + params["route"] = route + self._find_reachable_pages_rec_helper(params) + if params["include_groups"]: + for route_group in params["page"].transition_route_groups: # TODO: Need to map by flow - for route in self.route_group_data[flow_id][ + for route in self.route_group_data[params["flow_id"]][ route_group ].transition_routes: - self._find_reachable_pages_rec_helper( - flow_id, - flow_name, - page, - route, - reachable, - conversation_path, - min_intent_counts, - presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=is_initial, - include_meta=include_meta, - verbose=verbose, - ) + params["route"] = route + self._find_reachable_pages_rec_helper(params) # Start page routes and route groups are also accessible from this page if ( - include_start_page_routes - and page.display_name != flow_name - and (not limit_intent_to_initial or is_initial) + params["include_start_page_routes"] + and params["page"].display_name != params["flow_name"] + and (not params["limit_intent_to_initial"] or params["is_initial"]) ): - self._process_start_page_routes_for_reachable_pages( - flow_id, - flow_name, - reachable, - conversation_path, - min_intent_counts, - presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=is_initial, - include_meta=include_meta, - verbose=verbose, - ) + self._process_start_page_routes_for_reachable_pages(params) def _process_form_parameter_for_reachable_pages( self, - flow_id: str, - flow_name: str, - page: DFCXPage | DFCXFlow, - parameter, # TODO: Data type for DFCX Parameter - reachable: List[str], - conversation_path: List[str], - min_intent_counts: List[int], - presets: Dict[str, str], - intent_route_count: int = 0, - intent_route_limit: Optional[int] = None, - include_groups: bool = True, - include_start_page_routes: bool = True, - limit_intent_to_initial: bool = False, - is_initial: bool = False, - include_meta: bool = False, - verbose: bool = False, + params: Dict, + parameter # TODO: Data type for DFCX Parameter ) -> None: for event_handler in parameter.fill_behavior.reprompt_event_handlers: - if limit_intent_to_initial and not is_initial: + if params["limit_intent_to_initial"] and not params["is_initial"]: continue if hasattr(event_handler, "target_page") or hasattr( event_handler, "target_flow" ): - self._find_reachable_pages_rec_helper( - flow_id, - flow_name, - page, - event_handler, - reachable, - conversation_path, - min_intent_counts, - presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=is_initial, - include_meta=include_meta, - verbose=verbose, - ) + params["route"] = event_handler + self._find_reachable_pages_rec_helper(params) def _process_start_page_routes_for_reachable_pages( self, - flow_id: str, - flow_name: str, - reachable: List[str], - conversation_path: List[str], - min_intent_counts: List[int], - presets: Dict[str, str], - intent_route_count: int = 0, - intent_route_limit: Optional[int] = None, - include_groups: bool = True, - include_start_page_routes: bool = True, - limit_intent_to_initial: bool = False, - is_initial: bool = False, - include_meta: bool = False, - verbose: bool = False, + params: Dict ): - for event_handler in self.flow_data[flow_id].event_handlers: + params["page"] = self.flow_data[params["flow_id"]] + for event_handler in params["page"].event_handlers: if hasattr(event_handler, "target_page") or hasattr( event_handler, "target_flow" ): - self._find_reachable_pages_rec_helper( - flow_id, - flow_name, - self.flow_data[flow_id], - event_handler, - reachable, - conversation_path, - min_intent_counts, - presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=is_initial, - include_meta=include_meta, - verbose=verbose, - ) - for route in self.flow_data[flow_id].transition_routes: + params["route"] = event_handler + self._find_reachable_pages_rec_helper(params) + for route in params["page"].transition_routes: if hasattr(route, "intent") and route.intent != "": - self._find_reachable_pages_rec_helper( - flow_id, - flow_name, - self.flow_data[flow_id], - route, - reachable, - conversation_path, - min_intent_counts, - presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=is_initial, - include_meta=include_meta, - verbose=verbose, - ) - if include_groups: - for route_group in self.flow_data[flow_id].transition_route_groups: - for route in self.route_group_data[flow_id][ + params["route"] = route + self._find_reachable_pages_rec_helper(params) + if params["include_groups"]: + for route_group in params["page"].transition_route_groups: + for route in self.route_group_data[params["flow_id"]][ route_group ].transition_routes: if hasattr(route, "intent") and route.intent != "": - self._find_reachable_pages_rec_helper( - flow_id, - flow_name, - self.flow_data[flow_id], - route, - reachable, - conversation_path, - min_intent_counts, - presets, - intent_route_count=intent_route_count, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=is_initial, - include_meta=include_meta, - verbose=verbose, - ) + params["route"] = route + self._find_reachable_pages_rec_helper(params) def find_reachable_pages( self, @@ -871,23 +635,23 @@ def find_reachable_pages( flow_id=flow_id, flow_name=flow_name, page_id=None, page_name=from_page ) - self._find_reachable_pages_rec( - flow_id, - flow_name, - page_data, - reachable, - conversation_path, - min_intent_counts, - presets, - intent_route_count=0, - intent_route_limit=intent_route_limit, - include_groups=include_groups, - include_start_page_routes=include_start_page_routes, - limit_intent_to_initial=limit_intent_to_initial, - is_initial=is_initial, - include_meta=include_meta, - verbose=verbose, - ) + params = { + 'flow_id': flow_id, + 'flow_name': flow_name, + 'page': from_page, + 'reachable': reachable, + 'conversation_path': conversation_path, + 'min_intent_counts': min_intent_counts, + 'presets': presets, + 'intent_route_limit': intent_route_limit, + 'include_groups': include_groups, + 'include_start_page_routes': include_start_page_routes, + 'limit_intent_to_initial': limit_intent_to_initial, + 'is_initial': is_initial, + 'include_meta': include_meta, + 'verbose': verbose + } + self._find_reachable_pages_rec(params) return reachable def find_unreachable_pages( From c8c4437ff9a50310cf82495fe68df618f13604b3 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Wed, 4 Jan 2023 23:27:41 +0000 Subject: [PATCH 075/151] Fix some bugs with parameter dict and simplify error checking --- src/dfcx_scrapi/tools/agent_checker_util.py | 58 ++++++++------------- 1 file changed, 22 insertions(+), 36 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index a07c2722..dbb538ce 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -155,19 +155,15 @@ def get_all_route_group_data(self): def _convert_intent(self, intent_id): """Gets an intent display name from an intent ID""" intent_id_converted = str(self.agent_id) + "/intents/" + str(intent_id) - if intent_id_converted in self.intents_map: - return self.intents_map[intent_id_converted] - return "" + return self.intents_map.get(intent_id_converted, "") def _convert_flow(self, flow_id): """Gets a flow display name from a flow ID""" if flow_id.split("/")[-1] == "-": return "" # flow_id_converted = str(agent_id) + '/flows/' + str(flow_id) - if flow_id in self.flows_map: - return self.flows_map[flow_id] + return self.flows_map.get(flow_id, "Default Start Flow") # TODO: Should throw error instead of returning default - return "Default Start Flow" # Note that flow id includes agent, normally... def _convert_page(self, page_id, flow_id): @@ -180,11 +176,8 @@ def _convert_page(self, page_id, flow_id): return "Start" page_id_converted = str(flow_id) + "/pages/" + str(page_id) if flow_id in self.pages_map: - if page_id_converted in self.pages_map[flow_id]: - return self.pages_map[flow_id][page_id_converted] - else: - # TODO: Should throw error instead of returning default - return "Start" + return self.pages_map[flow_id].get(page_id_converted, "Start") + # TODO: Should throw error instead of returning default print("Flow not found") # TODO: Should throw error, but returning this probably will anyway return "Invalid" @@ -218,31 +211,23 @@ def _get_page( Raises: KeyError, if the page is not found """ - if flow_id is None and flow_name is None: - raise Exception("Please specify a flow") - elif flow_name is not None: - if flow_name in self.flows_map_rev: - flow_id = self.flows_map_rev[flow_name] - else: - raise Exception(f"Flow not found: {flow_name}") + # Look up flow ID + if flow_name: + flow_id = self.flows_map_rev.get(flow_name, None) + if not flow_id: + raise Exception(f"Flow not found: {flow_name}") # Now that flow_id is set, look up the page - if page_id is None and page_name is None: - raise Exception("Please specify a page") - elif page_name is not None: - if page_name == "Start": - return self.flow_data[flow_id] - if page_name in self.pages_map_rev[flow_id]: - page_id = self.pages_map_rev[flow_id][page_name] - return self.page_data[flow_id][page_id] - else: + # Special case for the start page + if page_name == "Start" or (page_id and "START_PAGE" in page_id): + return self.flow_data[flow_id] + # Look up page ID + if page_name: + page_id = self.pages_map_rev[flow_id].get(page_name, None) + if not page_id: + if not page_name: raise KeyError('Page not found. Did you forget "page_name="?') - else: - if "START_PAGE" in page_id: - return self.flow_data[flow_id] - elif page_id not in self.pages_map[flow_id]: - raise KeyError("Page not found.") - else: - return self.page_data[flow_id][page_id] + raise KeyError(f"Page not found: {page_name}") + return self.page_data[flow_id][page_id] # Changelogs @@ -557,7 +542,7 @@ def _process_start_page_routes_for_reachable_pages( self, params: Dict ): - params["page"] = self.flow_data[params["flow_id"]] + page = self.flow_data[params["flow_id"]] for event_handler in params["page"].event_handlers: if hasattr(event_handler, "target_page") or hasattr( event_handler, "target_flow" @@ -638,12 +623,13 @@ def find_reachable_pages( params = { 'flow_id': flow_id, 'flow_name': flow_name, - 'page': from_page, + 'page': page_data, 'reachable': reachable, 'conversation_path': conversation_path, 'min_intent_counts': min_intent_counts, 'presets': presets, 'intent_route_limit': intent_route_limit, + 'intent_route_count': 0, 'include_groups': include_groups, 'include_start_page_routes': include_start_page_routes, 'limit_intent_to_initial': limit_intent_to_initial, From 7a80ce5552a4e8f8cfea7abecd114fbcd683c2b5 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Thu, 5 Jan 2023 16:42:36 +0000 Subject: [PATCH 076/151] Fix logic error in parameter dict --- src/dfcx_scrapi/tools/agent_checker_util.py | 115 ++++++++++---------- 1 file changed, 56 insertions(+), 59 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index dbb538ce..ee42b57f 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -235,6 +235,8 @@ def _get_page( def _find_reachable_pages_rec_helper( self, + page: DFCXPage | DFCXFlow, + route: DFCXRoute, params: Dict ) -> None: """Helper function for the recursion involved in @@ -242,15 +244,15 @@ def _find_reachable_pages_rec_helper( """ if not params["flow_name"]: params["flow_name"] = self.flows_map[params["flow_id"]] - target_page = params["route"].target_page - target_flow = params["route"].target_flow + target_page = route.target_page + target_flow = route.target_flow if ( - hasattr(params["route"], "intent") and params["route"].intent != "" + hasattr(route, "intent") and route.intent != "" and params["intent_route_limit"] and params["intent_route_count"] >= params["intent_route_limit"] ): return - if hasattr(params["page"], "form") and params["page"].form: - for parameter in params["page"].form.parameters: + if hasattr(page, "form") and page.form: + for parameter in page.form.parameters: parameter_name = parameter.display_name # Need to also account for parameters being # set by intents (or by webhooks...) @@ -261,7 +263,7 @@ def _find_reachable_pages_rec_helper( # This page has an unfilled parameter if params["limit_intent_to_initial"] and not params["is_initial"]: return - if hasattr(params["route"], "intent") and params["route"].intent != "": + if hasattr(route, "intent") and route.intent != "": if params["limit_intent_to_initial"] and not params["is_initial"]: # Don't continue on this path return @@ -269,7 +271,7 @@ def _find_reachable_pages_rec_helper( if target_page in self.page_data[params["flow_id"]]: page_name = self.page_data[params["flow_id"]][target_page].display_name if params["verbose"]: - print(params["page"].display_name, "->", page_name) + print(page.display_name, "->", page_name) # Move to this page (this is also the recursion limiting step # to prevent infinite loops) if page_name not in params["reachable"]: @@ -280,11 +282,11 @@ def _find_reachable_pages_rec_helper( print(params["conversation_path"], params["intent_route_count"]) old_presets = params["presets"].copy() - new_presets = self._get_new_presets(params["presets"], params["page"], params["route"]) - params["page"] = self.page_data[params["flow_id"]][target_page] + new_presets = self._get_new_presets(params["presets"], page, route) + next_page = self.page_data[params["flow_id"]][target_page] params["presets"] = new_presets - self._find_reachable_pages_rec(params) + self._find_reachable_pages_rec(next_page, params) params["conversation_path"].pop(-1) # pop presets since we can't do it if we're passing a params dict like this @@ -301,18 +303,18 @@ def _find_reachable_pages_rec_helper( print(params["conversation_path"], params["intent_route_count"]) old_presets = params["presets"].copy() - new_presets = self._get_new_presets(params["presets"], params["page"], params["route"]) - params["page"] = self.page_data[params["flow_id"]][target_page] + new_presets = self._get_new_presets(params["presets"], page, route) + next_page = self.page_data[params["flow_id"]][target_page] params["presets"] = new_presets - self._find_reachable_pages_rec(params) + self._find_reachable_pages_rec(next_page, params) params["conversation_path"].pop(-1) # pop presets since we can't do it if we're passing a params dict like this params["presets"] = old_presets elif "END_FLOW" in target_page: if params["verbose"]: - print(params["page"].display_name, "-> END FLOW") + print(page.display_name, "-> END FLOW") if params["include_meta"]: page_name = "END FLOW" if page_name not in params["reachable"]: @@ -329,7 +331,7 @@ def _find_reachable_pages_rec_helper( # reachable.append('END FLOW') elif "END_SESSION" in target_page: if params["verbose"]: - print(params["page"].display_name, "-> END SESSION") + print(page.display_name, "-> END SESSION") if params["include_meta"]: page_name = "END SESSION" if page_name not in params["reachable"]: @@ -346,8 +348,8 @@ def _find_reachable_pages_rec_helper( # reachable.append('END SESSION') elif "CURRENT_PAGE" in target_page: if params["verbose"]: - print(params["page"].display_name, "-> CURRENT PAGE") - page_name = params["page"].display_name + print(page.display_name, "-> CURRENT PAGE") + page_name = page.display_name if ( page_name in params["reachable"] and params["intent_route_count"] @@ -356,7 +358,7 @@ def _find_reachable_pages_rec_helper( params["min_intent_counts"][params["reachable"].index(page_name)] = params["intent_route_count"] elif "PREVIOUS_PAGE" in target_page: if params["verbose"]: - print(params["page"].display_name, "-> PREVIOUS PAGE") + print(page.display_name, "-> PREVIOUS PAGE") if params["include_meta"]: page_name = "PREVIOUS PAGE" if page_name not in params["reachable"]: @@ -373,7 +375,7 @@ def _find_reachable_pages_rec_helper( # TODO: This could cause huge problems... elif "START_PAGE" in target_page: if params["verbose"]: - print(params["page"].display_name, "-> START PAGE") + print(page.display_name, "-> START PAGE") page_name = "Start" if page_name not in params["reachable"]: params["reachable"].append(page_name) @@ -383,11 +385,11 @@ def _find_reachable_pages_rec_helper( print(params["conversation_path"], params["intent_route_count"]) old_presets = params["presets"].copy() - new_presets = self._get_new_presets(params["presets"], params["page"], params["route"]) - params["page"] = self.flow_data[params["flow_id"]] + new_presets = self._get_new_presets(params["presets"], page, route) + next_page = self.flow_data[params["flow_id"]] params["presets"] = new_presets - self._find_reachable_pages_rec(params) + self._find_reachable_pages_rec(next_page, params) params["conversation_path"].pop(-1) # pop presets since we can't do it if we're passing a params dict like this @@ -404,23 +406,23 @@ def _find_reachable_pages_rec_helper( print(params["conversation_path"], params["intent_route_count"]) old_presets = params["presets"].copy() - new_presets = self._get_new_presets(params["presets"], params["page"], params["route"]) - params["page"] = self.flow_data[params["flow_id"]] + new_presets = self._get_new_presets(params["presets"], page, route) + next_page = self.flow_data[params["flow_id"]] params["presets"] = new_presets - self._find_reachable_pages_rec(params) + self._find_reachable_pages_rec(next_page, params) params["conversation_path"].pop(-1) # pop presets since we can't do it if we're passing a params dict like this params["presets"] = old_presets elif len(target_page) > 0: - print(params["page"].display_name, "->", target_page) + print(page.display_name, "->", target_page) # This should not happen, and if it does it needs to be fixed input() elif len(target_flow) > 0: - flow_name = self.flows_map[params["route"].target_flow] + flow_name = self.flows_map[route.target_flow] if params["verbose"]: - print(params["page"].display_name, "->", flow_name) + print(page.display_name, "->", flow_name) if flow_name not in params["reachable"]: params["reachable"].append(flow_name) params["min_intent_counts"].append(params["intent_route_count"]) @@ -432,8 +434,8 @@ def _find_reachable_pages_rec_helper( params["min_intent_counts"][params["reachable"].index(flow_name)] = params["intent_route_count"] else: if params["verbose"]: - print(params["page"].display_name, "->", params["route"].target_flow, "(empty)") - page_name = params["page"].display_name + print(page.display_name, "->", route.target_flow, "(empty)") + page_name = page.display_name if ( page_name in params["reachable"] and params["intent_route_count"] @@ -484,6 +486,7 @@ def _get_new_presets(self, presets, page, route): def _find_reachable_pages_rec( self, + page: DFCXPage | DFCXFlow, params: Dict ) -> None: """Recursive function to find reachable pages within a given flow, @@ -491,43 +494,42 @@ def _find_reachable_pages_rec( more general traversal, but not currently used.""" if not params["flow_name"]: params["flow_name"] = self.flows_map[params["flow_id"]] - if hasattr(params["page"], "form") and params["page"].form: - for parameter in params["page"].form.parameters: + if hasattr(page, "form") and page.form: + for parameter in page.form.parameters: self._process_form_parameter_for_reachable_pages( - params, - parameter + page, + parameter, + params ) - for event_handler in params["page"].event_handlers: + for event_handler in page.event_handlers: if params["limit_intent_to_initial"] and not params["is_initial"]: continue if hasattr(event_handler, "target_page") or hasattr( event_handler, "target_flow" ): - params["route"] = event_handler - self._find_reachable_pages_rec_helper(params) - for route in params["page"].transition_routes: - params["route"] = route - self._find_reachable_pages_rec_helper(params) + self._find_reachable_pages_rec_helper(page, event_handler, params) + for route in page.transition_routes: + self._find_reachable_pages_rec_helper(page, route, params) if params["include_groups"]: - for route_group in params["page"].transition_route_groups: + for route_group in page.transition_route_groups: # TODO: Need to map by flow for route in self.route_group_data[params["flow_id"]][ route_group ].transition_routes: - params["route"] = route - self._find_reachable_pages_rec_helper(params) + self._find_reachable_pages_rec_helper(page, route, params) # Start page routes and route groups are also accessible from this page if ( params["include_start_page_routes"] - and params["page"].display_name != params["flow_name"] + and page.display_name != params["flow_name"] and (not params["limit_intent_to_initial"] or params["is_initial"]) ): self._process_start_page_routes_for_reachable_pages(params) def _process_form_parameter_for_reachable_pages( self, - params: Dict, - parameter # TODO: Data type for DFCX Parameter + page: DFCXPage | DFCXFlow, + parameter, # TODO: Data type for DFCX Parameter + params: Dict ) -> None: for event_handler in parameter.fill_behavior.reprompt_event_handlers: if params["limit_intent_to_initial"] and not params["is_initial"]: @@ -535,32 +537,28 @@ def _process_form_parameter_for_reachable_pages( if hasattr(event_handler, "target_page") or hasattr( event_handler, "target_flow" ): - params["route"] = event_handler - self._find_reachable_pages_rec_helper(params) + self._find_reachable_pages_rec_helper(page, event_handler, params) def _process_start_page_routes_for_reachable_pages( self, params: Dict ): page = self.flow_data[params["flow_id"]] - for event_handler in params["page"].event_handlers: + for event_handler in page.event_handlers: if hasattr(event_handler, "target_page") or hasattr( event_handler, "target_flow" ): - params["route"] = event_handler - self._find_reachable_pages_rec_helper(params) - for route in params["page"].transition_routes: + self._find_reachable_pages_rec_helper(page, event_handler, params) + for route in page.transition_routes: if hasattr(route, "intent") and route.intent != "": - params["route"] = route - self._find_reachable_pages_rec_helper(params) + self._find_reachable_pages_rec_helper(page, route, params) if params["include_groups"]: - for route_group in params["page"].transition_route_groups: + for route_group in page.transition_route_groups: for route in self.route_group_data[params["flow_id"]][ route_group ].transition_routes: if hasattr(route, "intent") and route.intent != "": - params["route"] = route - self._find_reachable_pages_rec_helper(params) + self._find_reachable_pages_rec_helper(page, route, params) def find_reachable_pages( self, @@ -623,7 +621,6 @@ def find_reachable_pages( params = { 'flow_id': flow_id, 'flow_name': flow_name, - 'page': page_data, 'reachable': reachable, 'conversation_path': conversation_path, 'min_intent_counts': min_intent_counts, @@ -637,7 +634,7 @@ def find_reachable_pages( 'include_meta': include_meta, 'verbose': verbose } - self._find_reachable_pages_rec(params) + self._find_reachable_pages_rec(page_data, params) return reachable def find_unreachable_pages( From 2d99b5ecb052ee6ffe16df34946214e3f47ad819 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Thu, 5 Jan 2023 16:53:36 +0000 Subject: [PATCH 077/151] Create additional recursion helper function --- src/dfcx_scrapi/tools/agent_checker_util.py | 114 ++++++++------------ 1 file changed, 42 insertions(+), 72 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index ee42b57f..3ccd3389 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -232,6 +232,39 @@ def _get_page( # Changelogs # Reachable and unreachable pages + + def _continue_page_recursion( + self, + page: DFCXPage | DFCXFlow, + page_name: str, + route: DFCXRoute, + target_page: str, + params: Dict + ) -> None: + if page_name not in params["reachable"]: + params["reachable"].append(page_name) + params["min_intent_counts"].append(params["intent_route_count"]) + else: + # Better route found, traverse from here + params["min_intent_counts"][params["reachable"].index(page_name)] = params["intent_route_count"] + + params["conversation_path"].append(page_name) + if params["verbose"]: + print(params["conversation_path"], params["intent_route_count"]) + + old_presets = params["presets"].copy() + new_presets = self._get_new_presets(params["presets"], page, route) + if "START_PAGE" in target_page: + next_page = self.flow_data[params["flow_id"]] + else: + next_page = self.page_data[params["flow_id"]][target_page] + params["presets"] = new_presets + + self._find_reachable_pages_rec(next_page, params) + + params["conversation_path"].pop(-1) + # pop presets since we can't do it if we're passing a params dict like this + params["presets"] = old_presets def _find_reachable_pages_rec_helper( self, @@ -274,44 +307,13 @@ def _find_reachable_pages_rec_helper( print(page.display_name, "->", page_name) # Move to this page (this is also the recursion limiting step # to prevent infinite loops) - if page_name not in params["reachable"]: - params["reachable"].append(page_name) - params["min_intent_counts"].append(params["intent_route_count"]) - params["conversation_path"].append(page_name) - if params["verbose"]: - print(params["conversation_path"], params["intent_route_count"]) - - old_presets = params["presets"].copy() - new_presets = self._get_new_presets(params["presets"], page, route) - next_page = self.page_data[params["flow_id"]][target_page] - params["presets"] = new_presets - - self._find_reachable_pages_rec(next_page, params) - - params["conversation_path"].pop(-1) - # pop presets since we can't do it if we're passing a params dict like this - params["presets"] = old_presets - elif ( - page_name in params["reachable"] + if ( + page_name not in params["reachable"] + or (page_name in params["reachable"] and params["intent_route_count"] - < params["min_intent_counts"][params["reachable"].index(page_name)] + < params["min_intent_counts"][params["reachable"].index(page_name)]) ): - # Better route found, traverse from here - params["min_intent_counts"][params["reachable"].index(page_name)] = params["intent_route_count"] - params["conversation_path"].append(page_name) - if params["verbose"]: - print(params["conversation_path"], params["intent_route_count"]) - - old_presets = params["presets"].copy() - new_presets = self._get_new_presets(params["presets"], page, route) - next_page = self.page_data[params["flow_id"]][target_page] - params["presets"] = new_presets - - self._find_reachable_pages_rec(next_page, params) - - params["conversation_path"].pop(-1) - # pop presets since we can't do it if we're passing a params dict like this - params["presets"] = old_presets + self._continue_page_recursion(page, page_name, route, target_page, params) elif "END_FLOW" in target_page: if params["verbose"]: print(page.display_name, "-> END FLOW") @@ -377,44 +379,12 @@ def _find_reachable_pages_rec_helper( if params["verbose"]: print(page.display_name, "-> START PAGE") page_name = "Start" - if page_name not in params["reachable"]: - params["reachable"].append(page_name) - params["min_intent_counts"].append(params["intent_route_count"]) - params["conversation_path"].append(page_name) - if params["verbose"]: - print(params["conversation_path"], params["intent_route_count"]) - - old_presets = params["presets"].copy() - new_presets = self._get_new_presets(params["presets"], page, route) - next_page = self.flow_data[params["flow_id"]] - params["presets"] = new_presets - - self._find_reachable_pages_rec(next_page, params) - - params["conversation_path"].pop(-1) - # pop presets since we can't do it if we're passing a params dict like this - params["presets"] = old_presets - elif ( - page_name in params["reachable"] + if (page_name not in params["reachable"] + or (page_name in params["reachable"] and params["intent_route_count"] - < params["min_intent_counts"][params["reachable"].index(page_name)] + < params["min_intent_counts"][params["reachable"].index(page_name)]) ): - # Better route found, traverse from here - params["min_intent_counts"][params["reachable"].index(page_name)] = params["intent_route_count"] - params["conversation_path"].append(page_name) - if params["verbose"]: - print(params["conversation_path"], params["intent_route_count"]) - - old_presets = params["presets"].copy() - new_presets = self._get_new_presets(params["presets"], page, route) - next_page = self.flow_data[params["flow_id"]] - params["presets"] = new_presets - - self._find_reachable_pages_rec(next_page, params) - - params["conversation_path"].pop(-1) - # pop presets since we can't do it if we're passing a params dict like this - params["presets"] = old_presets + self._continue_page_recursion(page, page_name, route, target_page, params) elif len(target_page) > 0: print(page.display_name, "->", target_page) # This should not happen, and if it does it needs to be fixed From 2625b3393d3c0dcb33e0c858ebb15a3779d01786 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Thu, 5 Jan 2023 17:17:35 +0000 Subject: [PATCH 078/151] Create helper function for meta-pages --- src/dfcx_scrapi/tools/agent_checker_util.py | 98 ++++++++------------- 1 file changed, 37 insertions(+), 61 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 3ccd3389..35a5c8fd 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -266,6 +266,37 @@ def _continue_page_recursion( # pop presets since we can't do it if we're passing a params dict like this params["presets"] = old_presets + def _handle_meta_page( + self, + page: DFCXPage | DFCXFlow, + target_page: str, + params: Dict + ) -> None: + page_name = page.display_name + if "END_SESSION" in target_page: + page_name = "END SESSION" + elif "END_FLOW" in target_page: + page_name = "END FLOW" + elif "PREVIOUS_PAGE" in target_page: + page_name = "PREVIOUS PAGE" + #elif "CURRENT_PAGE" in target_page: + # page_name = page.display_name + + if params["verbose"]: + print(page.display_name, "->", page_name) + if page_name == page.display_name or params["include_meta"]: + if page_name not in params["reachable"]: + params["reachable"].append(page_name) + params["min_intent_counts"].append(params["intent_route_count"]) + elif ( + page_name in params["reachable"] + and params["intent_route_count"] + < params["min_intent_counts"][params["reachable"].index(page_name)] + ): + params["min_intent_counts"][ + params["reachable"].index(page_name) + ] = params["intent_route_count"] + def _find_reachable_pages_rec_helper( self, page: DFCXPage | DFCXFlow, @@ -314,67 +345,12 @@ def _find_reachable_pages_rec_helper( < params["min_intent_counts"][params["reachable"].index(page_name)]) ): self._continue_page_recursion(page, page_name, route, target_page, params) - elif "END_FLOW" in target_page: - if params["verbose"]: - print(page.display_name, "-> END FLOW") - if params["include_meta"]: - page_name = "END FLOW" - if page_name not in params["reachable"]: - params["reachable"].append(page_name) - params["min_intent_counts"].append(params["intent_route_count"]) - elif ( - page_name in params["reachable"] - and params["intent_route_count"] - < params["min_intent_counts"][params["reachable"].index(page_name)] - ): - params["min_intent_counts"][ - params["reachable"].index(page_name) - ] = params["intent_route_count"] - # reachable.append('END FLOW') - elif "END_SESSION" in target_page: - if params["verbose"]: - print(page.display_name, "-> END SESSION") - if params["include_meta"]: - page_name = "END SESSION" - if page_name not in params["reachable"]: - params["reachable"].append(page_name) - params["min_intent_counts"].append(params["intent_route_count"]) - elif ( - page_name in params["reachable"] - and params["intent_route_count"] - < params["min_intent_counts"][params["reachable"].index(page_name)] - ): - params["min_intent_counts"][ - params["reachable"].index(page_name) - ] = params["intent_route_count"] - # reachable.append('END SESSION') - elif "CURRENT_PAGE" in target_page: - if params["verbose"]: - print(page.display_name, "-> CURRENT PAGE") - page_name = page.display_name - if ( - page_name in params["reachable"] - and params["intent_route_count"] - < params["min_intent_counts"][params["reachable"].index(page_name)] - ): - params["min_intent_counts"][params["reachable"].index(page_name)] = params["intent_route_count"] - elif "PREVIOUS_PAGE" in target_page: - if params["verbose"]: - print(page.display_name, "-> PREVIOUS PAGE") - if params["include_meta"]: - page_name = "PREVIOUS PAGE" - if page_name not in params["reachable"]: - params["reachable"].append(page_name) - params["min_intent_counts"].append(params["intent_route_count"]) - elif ( - page_name in params["reachable"] - and params["intent_route_count"] - < params["min_intent_counts"][params["reachable"].index(page_name)] - ): - params["min_intent_counts"][ - params["reachable"].index(page_name) - ] = params["intent_route_count"] - # TODO: This could cause huge problems... + elif ("END_FLOW" in target_page + or "END_SESSION" in target_page + or "PREVIOUS_PAGE" in target_page + or "CURRENT_PAGE" in target_page + ): + self._handle_meta_page(page, target_page, params) elif "START_PAGE" in target_page: if params["verbose"]: print(page.display_name, "-> START PAGE") From ec4de151ffdf84207661c5143d448bc7746babbb Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Thu, 5 Jan 2023 22:08:32 +0000 Subject: [PATCH 079/151] Lint fixes --- src/dfcx_scrapi/tools/agent_checker_util.py | 105 +++++++++++++------- 1 file changed, 68 insertions(+), 37 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 35a5c8fd..bd8be8c5 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -232,7 +232,7 @@ def _get_page( # Changelogs # Reachable and unreachable pages - + def _continue_page_recursion( self, page: DFCXPage | DFCXFlow, @@ -246,8 +246,10 @@ def _continue_page_recursion( params["min_intent_counts"].append(params["intent_route_count"]) else: # Better route found, traverse from here - params["min_intent_counts"][params["reachable"].index(page_name)] = params["intent_route_count"] - + params["min_intent_counts"][ + params["reachable"].index(page_name) + ] = params["intent_route_count"] + params["conversation_path"].append(page_name) if params["verbose"]: print(params["conversation_path"], params["intent_route_count"]) @@ -263,7 +265,7 @@ def _continue_page_recursion( self._find_reachable_pages_rec(next_page, params) params["conversation_path"].pop(-1) - # pop presets since we can't do it if we're passing a params dict like this + # pop presets since we can't if we're passing a params dict like this params["presets"] = old_presets def _handle_meta_page( @@ -287,16 +289,20 @@ def _handle_meta_page( if page_name == page.display_name or params["include_meta"]: if page_name not in params["reachable"]: params["reachable"].append(page_name) - params["min_intent_counts"].append(params["intent_route_count"]) + params["min_intent_counts"].append( + params["intent_route_count"] + ) elif ( page_name in params["reachable"] and params["intent_route_count"] - < params["min_intent_counts"][params["reachable"].index(page_name)] + < params["min_intent_counts"][ + params["reachable"].index(page_name) + ] ): params["min_intent_counts"][ params["reachable"].index(page_name) ] = params["intent_route_count"] - + def _find_reachable_pages_rec_helper( self, page: DFCXPage | DFCXFlow, @@ -312,7 +318,8 @@ def _find_reachable_pages_rec_helper( target_flow = route.target_flow if ( hasattr(route, "intent") and route.intent != "" - and params["intent_route_limit"] and params["intent_route_count"] >= params["intent_route_limit"] + and params["intent_route_limit"] + and params["intent_route_count"] >= params["intent_route_limit"] ): return if hasattr(page, "form") and page.form: @@ -325,7 +332,9 @@ def _find_reachable_pages_rec_helper( or params["presets"][parameter_name] == "NULL" ): # This page has an unfilled parameter - if params["limit_intent_to_initial"] and not params["is_initial"]: + if (params["limit_intent_to_initial"] + and not params["is_initial"] + ): return if hasattr(route, "intent") and route.intent != "": if params["limit_intent_to_initial"] and not params["is_initial"]: @@ -333,7 +342,9 @@ def _find_reachable_pages_rec_helper( return params["intent_route_count"] += 1 if target_page in self.page_data[params["flow_id"]]: - page_name = self.page_data[params["flow_id"]][target_page].display_name + page_name = self.page_data[params["flow_id"]][ + target_page + ].display_name if params["verbose"]: print(page.display_name, "->", page_name) # Move to this page (this is also the recursion limiting step @@ -342,9 +353,12 @@ def _find_reachable_pages_rec_helper( page_name not in params["reachable"] or (page_name in params["reachable"] and params["intent_route_count"] - < params["min_intent_counts"][params["reachable"].index(page_name)]) + < params["min_intent_counts"][ + params["reachable"].index(page_name) + ]) ): - self._continue_page_recursion(page, page_name, route, target_page, params) + self._continue_page_recursion(page, page_name, route, + target_page, params) elif ("END_FLOW" in target_page or "END_SESSION" in target_page or "PREVIOUS_PAGE" in target_page @@ -358,9 +372,12 @@ def _find_reachable_pages_rec_helper( if (page_name not in params["reachable"] or (page_name in params["reachable"] and params["intent_route_count"] - < params["min_intent_counts"][params["reachable"].index(page_name)]) + < params["min_intent_counts"][ + params["reachable"].index(page_name) + ]) ): - self._continue_page_recursion(page, page_name, route, target_page, params) + self._continue_page_recursion(page, page_name, route, + target_page, params) elif len(target_page) > 0: print(page.display_name, "->", target_page) # This should not happen, and if it does it needs to be fixed @@ -371,13 +388,19 @@ def _find_reachable_pages_rec_helper( print(page.display_name, "->", flow_name) if flow_name not in params["reachable"]: params["reachable"].append(flow_name) - params["min_intent_counts"].append(params["intent_route_count"]) + params["min_intent_counts"].append( + params["intent_route_count"] + ) elif ( flow_name in params["reachable"] and params["intent_route_count"] - < params["min_intent_counts"][params["reachable"].index(flow_name)] + < params["min_intent_counts"][ + params["reachable"].index(flow_name) + ] ): - params["min_intent_counts"][params["reachable"].index(flow_name)] = params["intent_route_count"] + params["min_intent_counts"][ + params["reachable"].index(flow_name) + ] = params["intent_route_count"] else: if params["verbose"]: print(page.display_name, "->", route.target_flow, "(empty)") @@ -385,9 +408,13 @@ def _find_reachable_pages_rec_helper( if ( page_name in params["reachable"] and params["intent_route_count"] - < params["min_intent_counts"][params["reachable"].index(page_name)] + < params["min_intent_counts"][ + params["reachable"].index(page_name) + ] ): - params["min_intent_counts"][params["reachable"].index(page_name)] = params["intent_route_count"] + params["min_intent_counts"][ + params["reachable"].index(page_name) + ] = params["intent_route_count"] def _get_new_presets(self, presets, page, route): new_presets = presets.copy() @@ -453,7 +480,8 @@ def _find_reachable_pages_rec( if hasattr(event_handler, "target_page") or hasattr( event_handler, "target_flow" ): - self._find_reachable_pages_rec_helper(page, event_handler, params) + self._find_reachable_pages_rec_helper(page, event_handler, + params) for route in page.transition_routes: self._find_reachable_pages_rec_helper(page, route, params) if params["include_groups"]: @@ -483,7 +511,8 @@ def _process_form_parameter_for_reachable_pages( if hasattr(event_handler, "target_page") or hasattr( event_handler, "target_flow" ): - self._find_reachable_pages_rec_helper(page, event_handler, params) + self._find_reachable_pages_rec_helper(page, event_handler, + params) def _process_start_page_routes_for_reachable_pages( self, @@ -494,7 +523,8 @@ def _process_start_page_routes_for_reachable_pages( if hasattr(event_handler, "target_page") or hasattr( event_handler, "target_flow" ): - self._find_reachable_pages_rec_helper(page, event_handler, params) + self._find_reachable_pages_rec_helper(page, event_handler, + params) for route in page.transition_routes: if hasattr(route, "intent") and route.intent != "": self._find_reachable_pages_rec_helper(page, route, params) @@ -504,7 +534,8 @@ def _process_start_page_routes_for_reachable_pages( route_group ].transition_routes: if hasattr(route, "intent") and route.intent != "": - self._find_reachable_pages_rec_helper(page, route, params) + self._find_reachable_pages_rec_helper(page, route, + params) def find_reachable_pages( self, @@ -565,20 +596,20 @@ def find_reachable_pages( page_id=None, page_name=from_page ) params = { - 'flow_id': flow_id, - 'flow_name': flow_name, - 'reachable': reachable, - 'conversation_path': conversation_path, - 'min_intent_counts': min_intent_counts, - 'presets': presets, - 'intent_route_limit': intent_route_limit, - 'intent_route_count': 0, - 'include_groups': include_groups, - 'include_start_page_routes': include_start_page_routes, - 'limit_intent_to_initial': limit_intent_to_initial, - 'is_initial': is_initial, - 'include_meta': include_meta, - 'verbose': verbose + "flow_id": flow_id, + "flow_name": flow_name, + "reachable": reachable, + "conversation_path": conversation_path, + "min_intent_counts": min_intent_counts, + "presets": presets, + "intent_route_limit": intent_route_limit, + "intent_route_count": 0, + "include_groups": include_groups, + "include_start_page_routes": include_start_page_routes, + "limit_intent_to_initial": limit_intent_to_initial, + "is_initial": is_initial, + "include_meta": include_meta, + "verbose": verbose } self._find_reachable_pages_rec(page_data, params) return reachable From 9f554e035670cc2cf0e47cc41283875362addcc9 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Mon, 9 Jan 2023 21:23:27 +0000 Subject: [PATCH 080/151] Clean up style and make compatible with Python 3.8 --- src/dfcx_scrapi/tools/agent_checker_util.py | 82 +++++++++++---------- 1 file changed, 44 insertions(+), 38 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index bd8be8c5..e6d90b65 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -17,7 +17,7 @@ from __future__ import annotations import logging -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Union import pandas as pd import google.cloud.dialogflowcx_v3beta1.types as dfcx_types @@ -29,7 +29,6 @@ from dfcx_scrapi.core.pages import Pages from dfcx_scrapi.core.webhooks import Webhooks from dfcx_scrapi.core.transition_route_groups import TransitionRouteGroups -from dfcx_scrapi.core.test_cases import TestCases # Type aliases DFCXFlow = dfcx_types.flow.Flow @@ -99,7 +98,6 @@ def __init__( self.route_groups = TransitionRouteGroups( creds=self.creds, agent_id=self.agent_id ) - self.test_cases = TestCases(creds=self.creds, agent_id=self.agent_id) # Generate maps self.intents_map = self.intents.get_intents_map(agent_id=self.agent_id) @@ -195,7 +193,7 @@ def _get_page( flow_name: str = None, page_id: str = None, page_name: str = None, - ) -> DFCXPage | DFCXFlow: + ) -> Union[DFCXPage, DFCXFlow]: """Gets the page data for a specified page within a specified flow. The flow and page can be specified by ID or by display name. @@ -235,7 +233,7 @@ def _get_page( def _continue_page_recursion( self, - page: DFCXPage | DFCXFlow, + page: Union[DFCXPage, DFCXFlow], page_name: str, route: DFCXRoute, target_page: str, @@ -270,7 +268,7 @@ def _continue_page_recursion( def _handle_meta_page( self, - page: DFCXPage | DFCXFlow, + page: Union[DFCXPage, DFCXFlow], target_page: str, params: Dict ) -> None: @@ -305,7 +303,7 @@ def _handle_meta_page( def _find_reachable_pages_rec_helper( self, - page: DFCXPage | DFCXFlow, + page: Union[DFCXPage, DFCXFlow], route: DFCXRoute, params: Dict ) -> None: @@ -459,7 +457,7 @@ def _get_new_presets(self, presets, page, route): def _find_reachable_pages_rec( self, - page: DFCXPage | DFCXFlow, + page: Union[DFCXPage, DFCXFlow], params: Dict ) -> None: """Recursive function to find reachable pages within a given flow, @@ -501,7 +499,7 @@ def _find_reachable_pages_rec( def _process_form_parameter_for_reachable_pages( self, - page: DFCXPage | DFCXFlow, + page: Union[DFCXPage, DFCXFlow], parameter, # TODO: Data type for DFCX Parameter params: Dict ) -> None: @@ -710,21 +708,22 @@ def find_all_unreachable_pages( page_names.extend(unreachable) return pd.DataFrame({"flow_name": flow_names, "page_name": page_names}) - def add_intents_from_routes(self, - transition_list: List[DFCXRoute], - intents: List[str], - routegroups: List[str], - route_group - ) -> None: + def _get_intents_from_routes( + self, + transition_list: List[DFCXRoute], + route_group + ) -> Dict[str, List[str]]: """Helper function which adds intents from routes to a list of intents Args: transition_list, The list of transition routes - intents, The list of intent names Returns: - Nothing (appends to the intent list) + A dictionary with keys 'intents' and 'routegroups' which each contain + a list of intent/route group names to be added """ + intents = [] + routegroups = [] for route in transition_list: # Ignore empty intents (such as the true condition) if len(route.intent) == 0: @@ -736,13 +735,18 @@ def add_intents_from_routes(self, routegroups.append(route_group.display_name) else: routegroups.append("") + return { + 'intents': intents, + 'routegroups': routegroups + } - def _get_page_intents(self, - flow_id: Optional[str] = None, - flow_name: Optional[str] = None, - page_id: Optional[str] = None, - page_name: Optional[str] = None, - include_groups: bool = True + def _get_page_intents( + self, + flow_id: Optional[str] = None, + flow_name: Optional[str] = None, + page_id: Optional[str] = None, + page_name: Optional[str] = None, + include_groups: bool = True ) -> List[str]: """Get the list of intents for a given page of this flow. @@ -758,13 +762,12 @@ def _get_page_intents(self, page = self._get_page(flow_id=flow_id, flow_name=flow_name, page_id=page_id, page_name=page_name) - page_routegroups = [] page_intents = [] + page_routegroups = [] transition_list = page.transition_routes - self.add_intents_from_routes(transition_list, - page_intents, - page_routegroups, - None) + route_intent_dict = self._get_intents_from_routes(transition_list,None) + page_intents.extend(route_intent_dict["intents"]) + page_routegroups.extend(route_intent_dict["routegroups"]) if not flow_id: flow_id = self.flows_map_rev[flow_name] @@ -773,19 +776,22 @@ def _get_page_intents(self, if include_groups: for route_group_id in page.transition_route_groups: route_group = self.route_group_data[flow_id][route_group_id] - self.add_intents_from_routes(route_group.transition_routes, - page_intents, - page_routegroups, - route_group) + route_intent_dict = self._get_intents_from_routes( + route_group.transition_routes, + route_group + ) + page_intents.extend(route_intent_dict["intents"]) + page_routegroups.extend(route_intent_dict["routegroups"]) return pd.DataFrame({ "route group": page_routegroups, "intent": page_intents }) - def find_reachable_intents(self, - flow_name, - include_groups: bool = True + def find_reachable_intents( + self, + flow_name, + include_groups: bool = True ) -> List[str]: """Finds all intents which are on reachable pages, starting from the start page of the given flow. @@ -810,7 +816,7 @@ def find_reachable_intents(self, page_name=page_name, include_groups=include_groups )["intent"]) - intents |= page_intents + intents.update(page_intents) return list(intents) def find_all_reachable_intents(self) -> pd.DataFrame: @@ -831,7 +837,7 @@ def find_all_reachable_intents(self) -> pd.DataFrame: intents[intent].append(flow_name) else: intents[intent] = [flow_name] - # Also return the unreachable ones, because why not + return pd.DataFrame({ "intent": intents.keys(), "flows": intents.values() @@ -849,7 +855,7 @@ def find_all_unreachable_intents(self) -> List[str]: for flow_name in self.flows_map_rev: flow_intents = self.find_reachable_intents(flow_name=flow_name, include_groups=True) - all_reachable_intents |= set(flow_intents) + all_reachable_intents.update(set(flow_intents)) unreachable_intents = [] for intent in self.intent_data: if intent.display_name in all_reachable_intents: From f4a41092af0b67f122ad5bd40df77166d50a662c Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Mon, 9 Jan 2023 21:50:53 +0000 Subject: [PATCH 081/151] Continue cleaning up style --- src/dfcx_scrapi/tools/agent_checker_util.py | 32 ++++++++++++--------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index e6d90b65..fb008d62 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -19,6 +19,7 @@ import logging from typing import Dict, List, Optional, Union import pandas as pd +from collections import defaultdict import google.cloud.dialogflowcx_v3beta1.types as dfcx_types @@ -121,25 +122,22 @@ def __init__( # Get intent, flow, and page data self.intent_data = self.intents.list_intents(agent_id=self.agent_id) - self.flow_data = self.get_all_flow_data() - self.page_data = self.get_all_page_data() - self.route_group_data = self.get_all_route_group_data() + self.flow_data = self._get_all_flow_data() + self.page_data = self._get_all_page_data() + self.route_group_data = self._get_all_route_group_data() - def get_all_flow_data(self): - flow_data = {} + def _get_all_flow_data(self): flow_list = self.flows.list_flows(self.agent_id) - for flow in flow_list: - flow_data[flow.name] = flow - return flow_data + return {flow.name: flow for flow in flow_list} - def get_all_page_data(self): + def _get_all_page_data(self): page_data = {} for flow_id in self.flows_map.keys(): page_list = self.pages.list_pages(flow_id=flow_id) page_data[flow_id] = {page.name: page for page in page_list} return page_data - def get_all_route_group_data(self): + def _get_all_route_group_data(self): route_group_data = {} for flow_id in self.flows_map.keys(): group_list = self.route_groups.list_transition_route_groups( @@ -415,6 +413,8 @@ def _find_reachable_pages_rec_helper( ] = params["intent_route_count"] def _get_new_presets(self, presets, page, route): + """Gets parameter presets that have been added on a given route. + """ new_presets = presets.copy() if hasattr(page, "entry_fulfillment"): if hasattr(page.entry_fulfillment, "set_parameter_actions"): @@ -561,9 +561,12 @@ def find_reachable_pages( groups will be included, but only if they are actually referenced on some page include_start_page_routes: (Optional) Default true - limit_intent_to_initial: (Optional) Default False + limit_intent_to_initial: (Optional) Default False. If true, only + take intent routes on the initial page, rather than on any page + in the traversal. is_initial: (Optional) Default True - include_meta: (Optional) Default False + include_meta: (Optional) Default False. If true, includes special + transition targets like End Session, End Flow, etc. verbose: (Optional) If true, print debug information about route traversal @@ -828,15 +831,18 @@ def find_all_reachable_intents(self) -> pd.DataFrame: intent - the intent display name flows - a list of flow display names that use this intent """ - intents = {} + intents = defaultdict(lambda: []) # {} for flow_name in self.flows_map_rev: flow_intents = self.find_reachable_intents(flow_name=flow_name, include_groups=True) for intent in flow_intents: + intents[intent].append(flow_name) + """ if intent in intents: intents[intent].append(flow_name) else: intents[intent] = [flow_name] + """ return pd.DataFrame({ "intent": intents.keys(), From f6ed7f8bb66d5b75257bff5bd9dcad7f6339e8da Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Mon, 9 Jan 2023 21:53:18 +0000 Subject: [PATCH 082/151] Lint fixes --- src/dfcx_scrapi/tools/agent_checker_util.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index fb008d62..edb5b6b6 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -739,8 +739,8 @@ def _get_intents_from_routes( else: routegroups.append("") return { - 'intents': intents, - 'routegroups': routegroups + "intents": intents, + "routegroups": routegroups } def _get_page_intents( @@ -831,18 +831,12 @@ def find_all_reachable_intents(self) -> pd.DataFrame: intent - the intent display name flows - a list of flow display names that use this intent """ - intents = defaultdict(lambda: []) # {} + intents = defaultdict(lambda: []) for flow_name in self.flows_map_rev: flow_intents = self.find_reachable_intents(flow_name=flow_name, include_groups=True) for intent in flow_intents: intents[intent].append(flow_name) - """ - if intent in intents: - intents[intent].append(flow_name) - else: - intents[intent] = [flow_name] - """ return pd.DataFrame({ "intent": intents.keys(), From 7db4f35d0f24cbc8f36b0f59aa793e29e965184b Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Mon, 30 Jan 2023 21:46:00 +0000 Subject: [PATCH 083/151] Clean up and add delays for API limits --- src/dfcx_scrapi/tools/agent_checker_util.py | 186 +++++++++++--------- 1 file changed, 102 insertions(+), 84 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index edb5b6b6..cc32904f 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -79,7 +79,18 @@ def __init__( creds=None, scope=False, agent_id: str = None, + delay: float = 1.0 ): + """ + Args: + delay (optional): The time in seconds to wait between CX API calls, + if you need to limit the rate. + + TODO: Total number of API calls made when + initializing this class is currently 5 + 5*(number of flows). This + can be optimized down to 2 + 2*(number of flows) by manually + creating the maps after listing the objects. + """ super().__init__( creds_path=creds_path, creds_dict=creds_dict, @@ -91,59 +102,68 @@ def __init__( if not self.agent_id: raise Exception("agent_id parameter is required") - self.intents = Intents(creds=self.creds, agent_id=self.agent_id) - self.entities = EntityTypes(creds=self.creds, agent_id=self.agent_id) - self.flows = Flows(creds=self.creds, agent_id=self.agent_id) - self.pages = Pages(creds=self.creds) - self.webhooks = Webhooks(creds=self.creds, agent_id=self.agent_id) - self.route_groups = TransitionRouteGroups( + self._intents = Intents(creds=self.creds, agent_id=self.agent_id) + self._entities = EntityTypes(creds=self.creds, agent_id=self.agent_id) + self._flows = Flows(creds=self.creds, agent_id=self.agent_id) + self._pages = Pages(creds=self.creds) + self._webhooks = Webhooks(creds=self.creds, agent_id=self.agent_id) + self._route_groups = TransitionRouteGroups( creds=self.creds, agent_id=self.agent_id ) # Generate maps - self.intents_map = self.intents.get_intents_map(agent_id=self.agent_id) - self.flows_map = self.flows.get_flows_map(agent_id=self.agent_id) - self.flows_map_rev = self.flows.get_flows_map( + self._intents_map = self._intents.get_intents_map(agent_id=self.agent_id) + time.sleep(delay) + self._flows_map = self._flows.get_flows_map(agent_id=self.agent_id) + time.sleep(delay) + self._flows_map_rev = self._flows.get_flows_map( agent_id=self.agent_id, reverse=True ) - self.pages_map = {} - for flow_id in self.flows_map.keys(): - self.pages_map[flow_id] = self.pages.get_pages_map(flow_id=flow_id) - self.pages_map_rev = {} - for flow_id in self.flows_map.keys(): - self.pages_map_rev[flow_id] = self.pages.get_pages_map( - flow_id=flow_id, reverse=True + time.sleep(delay) + + self._pages_map = {} + self._pages_map_rev = {} + self._route_groups_map = {} + for fid in self._flows_map.keys(): + self._pages_map[fid] = self._pages.get_pages_map(flow_id=fid) + time.sleep(delay) + self._pages_map_rev[fid] = self._pages.get_pages_map( + flow_id=fid, reverse=True ) - self.route_groups_map = {} - for fid in self.flows_map.keys(): - self.route_groups_map[fid] = self.route_groups.get_route_groups_map( + time.sleep(delay) + self._route_groups_map[fid] = self._route_groups.get_route_groups_map( flow_id=fid ) + time.sleep(delay) # Get intent, flow, and page data - self.intent_data = self.intents.list_intents(agent_id=self.agent_id) - self.flow_data = self._get_all_flow_data() - self.page_data = self._get_all_page_data() - self.route_group_data = self._get_all_route_group_data() - - def _get_all_flow_data(self): - flow_list = self.flows.list_flows(self.agent_id) + self._intent_data = self._intents.list_intents(agent_id=self.agent_id) + time.sleep(delay) + self._flow_data = self._get_all_flow_data(delay) + self._page_data = self._get_all_page_data(delay) + self._route_group_data = self._get_all_route_group_data(delay) + + def _get_all_flow_data(self, delay): + flow_list = self._flows.list_flows(self.agent_id) + time.sleep(delay) return {flow.name: flow for flow in flow_list} - def _get_all_page_data(self): + def _get_all_page_data(self, delay): page_data = {} - for flow_id in self.flows_map.keys(): - page_list = self.pages.list_pages(flow_id=flow_id) + for flow_id in self._flows_map.keys(): + page_list = self._pages.list_pages(flow_id=flow_id) page_data[flow_id] = {page.name: page for page in page_list} + time.sleep(delay) return page_data - def _get_all_route_group_data(self): + def _get_all_route_group_data(self, delay): route_group_data = {} - for flow_id in self.flows_map.keys(): - group_list = self.route_groups.list_transition_route_groups( + for flow_id in self._flows_map.keys(): + group_list = self._route_groups.list_transition_route_groups( flow_id=flow_id ) route_group_data[flow_id] = {rg.name: rg for rg in group_list} + time.sleep(delay) return route_group_data # Conversion utilities @@ -151,14 +171,14 @@ def _get_all_route_group_data(self): def _convert_intent(self, intent_id): """Gets an intent display name from an intent ID""" intent_id_converted = str(self.agent_id) + "/intents/" + str(intent_id) - return self.intents_map.get(intent_id_converted, "") + return self._intents_map.get(intent_id_converted, "") def _convert_flow(self, flow_id): """Gets a flow display name from a flow ID""" if flow_id.split("/")[-1] == "-": return "" # flow_id_converted = str(agent_id) + '/flows/' + str(flow_id) - return self.flows_map.get(flow_id, "Default Start Flow") + return self._flows_map.get(flow_id, "Default Start Flow") # TODO: Should throw error instead of returning default # Note that flow id includes agent, normally... @@ -171,16 +191,16 @@ def _convert_page(self, page_id, flow_id): elif page_id == "START_PAGE": return "Start" page_id_converted = str(flow_id) + "/pages/" + str(page_id) - if flow_id in self.pages_map: - return self.pages_map[flow_id].get(page_id_converted, "Start") + if flow_id in self._pages_map: + return self._pages_map[flow_id].get(page_id_converted, "Start") # TODO: Should throw error instead of returning default - print("Flow not found") + logging.info("Flow not found") # TODO: Should throw error, but returning this probably will anyway return "Invalid" def _get_intent_parameters(self, intent_name): """Gets the parameters for a particular intent, by display name""" - for intent in self.intent_data: + for intent in self._intent_data: if intent.display_name == intent_name: return intent.parameters return None @@ -209,21 +229,21 @@ def _get_page( """ # Look up flow ID if flow_name: - flow_id = self.flows_map_rev.get(flow_name, None) + flow_id = self._flows_map_rev.get(flow_name, None) if not flow_id: raise Exception(f"Flow not found: {flow_name}") # Now that flow_id is set, look up the page # Special case for the start page if page_name == "Start" or (page_id and "START_PAGE" in page_id): - return self.flow_data[flow_id] + return self._flow_data[flow_id] # Look up page ID if page_name: - page_id = self.pages_map_rev[flow_id].get(page_name, None) + page_id = self._pages_map_rev[flow_id].get(page_name, None) if not page_id: if not page_name: raise KeyError('Page not found. Did you forget "page_name="?') raise KeyError(f"Page not found: {page_name}") - return self.page_data[flow_id][page_id] + return self._page_data[flow_id][page_id] # Changelogs @@ -248,14 +268,14 @@ def _continue_page_recursion( params["conversation_path"].append(page_name) if params["verbose"]: - print(params["conversation_path"], params["intent_route_count"]) + logging.info(params["conversation_path"], params["intent_route_count"]) old_presets = params["presets"].copy() new_presets = self._get_new_presets(params["presets"], page, route) if "START_PAGE" in target_page: - next_page = self.flow_data[params["flow_id"]] + next_page = self._flow_data[params["flow_id"]] else: - next_page = self.page_data[params["flow_id"]][target_page] + next_page = self._page_data[params["flow_id"]][target_page] params["presets"] = new_presets self._find_reachable_pages_rec(next_page, params) @@ -281,7 +301,7 @@ def _handle_meta_page( # page_name = page.display_name if params["verbose"]: - print(page.display_name, "->", page_name) + logging.info(page.display_name, "->", page_name) if page_name == page.display_name or params["include_meta"]: if page_name not in params["reachable"]: params["reachable"].append(page_name) @@ -309,7 +329,7 @@ def _find_reachable_pages_rec_helper( finding reachable pages """ if not params["flow_name"]: - params["flow_name"] = self.flows_map[params["flow_id"]] + params["flow_name"] = self._flows_map[params["flow_id"]] target_page = route.target_page target_flow = route.target_flow if ( @@ -337,12 +357,12 @@ def _find_reachable_pages_rec_helper( # Don't continue on this path return params["intent_route_count"] += 1 - if target_page in self.page_data[params["flow_id"]]: - page_name = self.page_data[params["flow_id"]][ + if target_page in self._page_data[params["flow_id"]]: + page_name = self._page_data[params["flow_id"]][ target_page ].display_name if params["verbose"]: - print(page.display_name, "->", page_name) + logging.info(page.display_name, "->", page_name) # Move to this page (this is also the recursion limiting step # to prevent infinite loops) if ( @@ -363,7 +383,7 @@ def _find_reachable_pages_rec_helper( self._handle_meta_page(page, target_page, params) elif "START_PAGE" in target_page: if params["verbose"]: - print(page.display_name, "-> START PAGE") + logging.info(page.display_name, "-> START PAGE") page_name = "Start" if (page_name not in params["reachable"] or (page_name in params["reachable"] @@ -375,13 +395,13 @@ def _find_reachable_pages_rec_helper( self._continue_page_recursion(page, page_name, route, target_page, params) elif len(target_page) > 0: - print(page.display_name, "->", target_page) + logging.info(page.display_name, "->", target_page) # This should not happen, and if it does it needs to be fixed input() elif len(target_flow) > 0: - flow_name = self.flows_map[route.target_flow] + flow_name = self._flows_map[route.target_flow] if params["verbose"]: - print(page.display_name, "->", flow_name) + logging.info(page.display_name, "->", flow_name) if flow_name not in params["reachable"]: params["reachable"].append(flow_name) params["min_intent_counts"].append( @@ -399,7 +419,7 @@ def _find_reachable_pages_rec_helper( ] = params["intent_route_count"] else: if params["verbose"]: - print(page.display_name, "->", route.target_flow, "(empty)") + logging.info(page.display_name, "->", route.target_flow, "(empty)") page_name = page.display_name if ( page_name in params["reachable"] @@ -447,7 +467,7 @@ def _get_new_presets(self, presets, page, route): new_presets[param_preset.parameter] = param_preset.value if hasattr(route, "intent") and route.intent != "": # Check the entities annotated on this intent - intent_name = self.intents_map[route.intent] + intent_name = self._intents_map[route.intent] intent_params = self._get_intent_parameters(intent_name) for param in intent_params: new_presets[ @@ -464,8 +484,8 @@ def _find_reachable_pages_rec( starting at a particular page. Other parameters here are used for more general traversal, but not currently used.""" if not params["flow_name"]: - params["flow_name"] = self.flows_map[params["flow_id"]] - if hasattr(page, "form") and page.form: + params["flow_name"] = self._flows_map[params["flow_id"]] + if hasattr(page, "form") and page.form: # if getattr(page, "form", None): for parameter in page.form.parameters: self._process_form_parameter_for_reachable_pages( page, @@ -485,7 +505,7 @@ def _find_reachable_pages_rec( if params["include_groups"]: for route_group in page.transition_route_groups: # TODO: Need to map by flow - for route in self.route_group_data[params["flow_id"]][ + for route in self._route_group_data[params["flow_id"]][ route_group ].transition_routes: self._find_reachable_pages_rec_helper(page, route, params) @@ -528,7 +548,7 @@ def _process_start_page_routes_for_reachable_pages( self._find_reachable_pages_rec_helper(page, route, params) if params["include_groups"]: for route_group in page.transition_route_groups: - for route in self.route_group_data[params["flow_id"]][ + for route in self._route_group_data[params["flow_id"]][ route_group ].transition_routes: if hasattr(route, "intent") and route.intent != "": @@ -576,12 +596,12 @@ def find_reachable_pages( if not flow_id: if not flow_name: raise Exception("One of flow_id or flow_name must be set") - if flow_name in self.flows_map_rev: - flow_id = self.flows_map_rev[flow_name] + if flow_name in self._flows_map_rev: + flow_id = self._flows_map_rev[flow_name] else: raise Exception(f"Flow not found: {flow_name}") - if flow_id in self.flows_map: - flow_name = self.flows_map[flow_id] + if flow_id in self._flows_map: + flow_name = self._flows_map[flow_id] else: raise Exception(f"Flow not found: {flow_id}") @@ -641,19 +661,19 @@ def find_unreachable_pages( if not flow_id: if not flow_name: raise Exception("One of flow_id or flow_name must be set") - if flow_name in self.flows_map_rev: - flow_id = self.flows_map_rev[flow_name] + if flow_name in self._flows_map_rev: + flow_id = self._flows_map_rev[flow_name] else: raise Exception(f"Flow not found: {flow_name}") - if flow_id in self.flows_map: - flow_name = self.flows_map[flow_id] + if flow_id in self._flows_map: + flow_name = self._flows_map[flow_id] else: raise Exception(f"Flow not found: {flow_id}") reachable = self.find_reachable_pages( flow_id, flow_name, include_groups=include_groups, verbose=verbose ) - return list(set(self.pages_map[flow_id].values()) - set(reachable)) + return list(set(self._pages_map[flow_id].values()) - set(reachable)) def find_all_reachable_pages( self, @@ -673,7 +693,7 @@ def find_all_reachable_pages( """ flow_names = [] page_names = [] - for flow_id, flow_name in self.flows_map.items(): + for flow_id, flow_name in self._flows_map.items(): reachable = self.find_reachable_pages( flow_id=flow_id, include_groups=include_groups, @@ -701,7 +721,7 @@ def find_all_unreachable_pages( """ flow_names = [] page_names = [] - for flow_id, flow_name in self.flows_map.items(): + for flow_id, flow_name in self._flows_map.items(): unreachable = self.find_unreachable_pages( flow_id=flow_id, include_groups=include_groups, @@ -719,7 +739,9 @@ def _get_intents_from_routes( """Helper function which adds intents from routes to a list of intents Args: - transition_list, The list of transition routes + transition_list: The list of transition routes + route_group (Optional): The route group where the route is + located. Returns: A dictionary with keys 'intents' and 'routegroups' which each contain @@ -731,7 +753,7 @@ def _get_intents_from_routes( # Ignore empty intents (such as the true condition) if len(route.intent) == 0: continue - intent = self.intents_map[route.intent] + intent = self._intents_map[route.intent] if intent not in intents: intents.append(intent) if route_group is not None: @@ -773,12 +795,12 @@ def _get_page_intents( page_routegroups.extend(route_intent_dict["routegroups"]) if not flow_id: - flow_id = self.flows_map_rev[flow_name] + flow_id = self._flows_map_rev[flow_name] # Get intents in transition route groups if include_groups: for route_group_id in page.transition_route_groups: - route_group = self.route_group_data[flow_id][route_group_id] + route_group = self._route_group_data[flow_id][route_group_id] route_intent_dict = self._get_intents_from_routes( route_group.transition_routes, route_group @@ -813,7 +835,7 @@ def find_reachable_intents( flow_name=flow_name, include_groups=include_groups) for page_name in reachable_pages: - if page_name not in self.flows_map_rev: + if page_name not in self._flows_map_rev: page_intents = set(self._get_page_intents( flow_name=flow_name, page_name=page_name, @@ -832,7 +854,7 @@ def find_all_reachable_intents(self) -> pd.DataFrame: flows - a list of flow display names that use this intent """ intents = defaultdict(lambda: []) - for flow_name in self.flows_map_rev: + for flow_name in self._flows_map_rev: flow_intents = self.find_reachable_intents(flow_name=flow_name, include_groups=True) for intent in flow_intents: @@ -852,13 +874,9 @@ def find_all_unreachable_intents(self) -> List[str]: A list of unreachable intent display names """ all_reachable_intents = set() - for flow_name in self.flows_map_rev: + for flow_name in self._flows_map_rev: flow_intents = self.find_reachable_intents(flow_name=flow_name, include_groups=True) all_reachable_intents.update(set(flow_intents)) - unreachable_intents = [] - for intent in self.intent_data: - if intent.display_name in all_reachable_intents: - continue - unreachable_intents.append(intent.display_name) - return unreachable_intents + all_intents = {intent.display_name for intent in self._intent_data} + return all_intents - all_reachable_intents From 3bc51a7873f2713b8ada8a59ee510ae66efe7608 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Mon, 30 Jan 2023 22:05:36 +0000 Subject: [PATCH 084/151] Use flow display name instead of optional choice to use flow ID --- src/dfcx_scrapi/tools/agent_checker_util.py | 80 ++++++++------------- 1 file changed, 28 insertions(+), 52 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index cc32904f..0ad5ec47 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -167,6 +167,7 @@ def _get_all_route_group_data(self, delay): return route_group_data # Conversion utilities + # (Not currently used) def _convert_intent(self, intent_id): """Gets an intent display name from an intent ID""" @@ -207,18 +208,16 @@ def _get_intent_parameters(self, intent_name): def _get_page( self, - flow_id: str = None, - flow_name: str = None, - page_id: str = None, - page_name: str = None, + flow_name: str, + page_name: str ) -> Union[DFCXPage, DFCXFlow]: """Gets the page data for a specified page within a specified flow. The flow and page can be specified by ID or by display name. Args: - flow_id OR flow_name: The ID or display name of the flow - page_id OR page_name: The ID or display name of the page + flow_name: The display name of the flow + page_name: The display name of the page Returns: A DFCX Page object for this page, @@ -228,20 +227,16 @@ def _get_page( KeyError, if the page is not found """ # Look up flow ID - if flow_name: - flow_id = self._flows_map_rev.get(flow_name, None) + flow_id = self._flows_map_rev.get(flow_name, None) if not flow_id: raise Exception(f"Flow not found: {flow_name}") # Now that flow_id is set, look up the page # Special case for the start page - if page_name == "Start" or (page_id and "START_PAGE" in page_id): + if page_name == "Start": return self._flow_data[flow_id] # Look up page ID - if page_name: - page_id = self._pages_map_rev[flow_id].get(page_name, None) + page_id = self._pages_map_rev[flow_id].get(page_name, None) if not page_id: - if not page_name: - raise KeyError('Page not found. Did you forget "page_name="?') raise KeyError(f"Page not found: {page_name}") return self._page_data[flow_id][page_id] @@ -536,7 +531,7 @@ def _process_start_page_routes_for_reachable_pages( self, params: Dict ): - page = self.flow_data[params["flow_id"]] + page = self._flow_data[params["flow_id"]] for event_handler in page.event_handlers: if hasattr(event_handler, "target_page") or hasattr( event_handler, "target_flow" @@ -557,8 +552,7 @@ def _process_start_page_routes_for_reachable_pages( def find_reachable_pages( self, - flow_id: str = None, - flow_name: str = None, + flow_name: str, from_page: str = "Start", intent_route_limit: Optional[int] = None, include_groups: bool = True, @@ -573,7 +567,7 @@ def find_reachable_pages( flow_name must be used. Args: - flow_id OR flow_name: The ID or name of the flow + flow_name: The display name of the flow from_page: (Optional) The page to start from. If left blank, it will start on the Start Page intent_route_limit: (Optional) Default None @@ -593,17 +587,9 @@ def find_reachable_pages( Returns: The list of reachable pages in this flow """ + flow_id = self._flows_map_rev.get(flow_name, None) if not flow_id: - if not flow_name: - raise Exception("One of flow_id or flow_name must be set") - if flow_name in self._flows_map_rev: - flow_id = self._flows_map_rev[flow_name] - else: - raise Exception(f"Flow not found: {flow_name}") - if flow_id in self._flows_map: - flow_name = self._flows_map[flow_id] - else: - raise Exception(f"Flow not found: {flow_id}") + raise Exception(f"Flow not found: {flow_name}") # Start at the start page... reachable = [from_page] @@ -613,8 +599,8 @@ def find_reachable_pages( min_intent_counts = [25] presets = {} page_data = self._get_page( - flow_id=flow_id, flow_name=flow_name, - page_id=None, page_name=from_page + flow_name=flow_name, + page_name=from_page ) params = { "flow_id": flow_id, @@ -637,8 +623,7 @@ def find_reachable_pages( def find_unreachable_pages( self, - flow_id: str = None, - flow_name: str = None, + flow_name: str, include_groups: bool = True, verbose: bool = False, ) -> List[str]: @@ -658,20 +643,12 @@ def find_unreachable_pages( Returns: The list of unreachable pages in this flow """ + flow_id = self._flows_map_rev.get(flow_name, None) if not flow_id: - if not flow_name: - raise Exception("One of flow_id or flow_name must be set") - if flow_name in self._flows_map_rev: - flow_id = self._flows_map_rev[flow_name] - else: - raise Exception(f"Flow not found: {flow_name}") - if flow_id in self._flows_map: - flow_name = self._flows_map[flow_id] - else: - raise Exception(f"Flow not found: {flow_id}") + raise Exception(f"Flow not found: {flow_name}") reachable = self.find_reachable_pages( - flow_id, flow_name, include_groups=include_groups, verbose=verbose + flow_name, include_groups=include_groups, verbose=verbose ) return list(set(self._pages_map[flow_id].values()) - set(reachable)) @@ -767,25 +744,23 @@ def _get_intents_from_routes( def _get_page_intents( self, - flow_id: Optional[str] = None, - flow_name: Optional[str] = None, - page_id: Optional[str] = None, - page_name: Optional[str] = None, + flow_name: str, + page_name: str, include_groups: bool = True ) -> List[str]: """Get the list of intents for a given page of this flow. Args: - flow_id OR flow_name: The ID or name of the flow - page_id OR page_name: The ID or name of the page + flow_name: The display name of the flow + page_name: The display name of the page include_groups (Optional): If true, intents from transition route groups on the given page will be included Returns: List of intent names """ - page = self._get_page(flow_id=flow_id, flow_name=flow_name, - page_id=page_id, page_name=page_name) + page = self._get_page(flow_name=flow_name, + page_name=page_name) page_intents = [] page_routegroups = [] @@ -794,8 +769,9 @@ def _get_page_intents( page_intents.extend(route_intent_dict["intents"]) page_routegroups.extend(route_intent_dict["routegroups"]) + flow_id = self._flows_map_rev.get(flow_name, None) if not flow_id: - flow_id = self._flows_map_rev[flow_name] + raise Exception(f"Flow not found: {flow_name}") # Get intents in transition route groups if include_groups: @@ -815,7 +791,7 @@ def _get_page_intents( def find_reachable_intents( self, - flow_name, + flow_name: str, include_groups: bool = True ) -> List[str]: """Finds all intents which are on reachable pages, starting from the From 51eb98ba1ff0a03e348573617ad48bb4122cfbed Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Tue, 31 Jan 2023 00:09:33 +0000 Subject: [PATCH 085/151] Fix bug introduced before when trying to refactor is_initial --- src/dfcx_scrapi/tools/agent_checker_util.py | 137 +++++++++++++------- 1 file changed, 91 insertions(+), 46 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 0ad5ec47..1b1a280e 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -16,6 +16,7 @@ from __future__ import annotations +import time import logging from typing import Dict, List, Optional, Union import pandas as pd @@ -273,7 +274,7 @@ def _continue_page_recursion( next_page = self._page_data[params["flow_id"]][target_page] params["presets"] = new_presets - self._find_reachable_pages_rec(next_page, params) + self._find_reachable_pages_rec(next_page, params) # is_initial=False params["conversation_path"].pop(-1) # pop presets since we can't if we're passing a params dict like this @@ -297,6 +298,8 @@ def _handle_meta_page( if params["verbose"]: logging.info(page.display_name, "->", page_name) + # Only include the special "pages" like END SESSION + # if the include_meta parameter is set. if page_name == page.display_name or params["include_meta"]: if page_name not in params["reachable"]: params["reachable"].append(page_name) @@ -318,21 +321,26 @@ def _find_reachable_pages_rec_helper( self, page: Union[DFCXPage, DFCXFlow], route: DFCXRoute, - params: Dict + params: Dict, + # Having a default of False is absolutely critical + is_initial: bool = False ) -> None: """Helper function for the recursion involved in finding reachable pages """ + # TODO: Is this even used? if not params["flow_name"]: params["flow_name"] = self._flows_map[params["flow_id"]] target_page = route.target_page target_flow = route.target_flow + # TODO: Can this just be route.intent? Or do event routes give an error from this? if ( hasattr(route, "intent") and route.intent != "" and params["intent_route_limit"] and params["intent_route_count"] >= params["intent_route_limit"] ): return + # TODO: Could also change to a check on isinstance(page, DFCXPage) if hasattr(page, "form") and page.form: for parameter in page.form.parameters: parameter_name = parameter.display_name @@ -344,11 +352,12 @@ def _find_reachable_pages_rec_helper( ): # This page has an unfilled parameter if (params["limit_intent_to_initial"] - and not params["is_initial"] + and not is_initial ): return + # TODO: Just route.intent? if hasattr(route, "intent") and route.intent != "": - if params["limit_intent_to_initial"] and not params["is_initial"]: + if params["limit_intent_to_initial"] and not is_initial: # Don't continue on this path return params["intent_route_count"] += 1 @@ -360,6 +369,7 @@ def _find_reachable_pages_rec_helper( logging.info(page.display_name, "->", page_name) # Move to this page (this is also the recursion limiting step # to prevent infinite loops) + # TODO: Condition can be simplified if ( page_name not in params["reachable"] or (page_name in params["reachable"] @@ -380,6 +390,7 @@ def _find_reachable_pages_rec_helper( if params["verbose"]: logging.info(page.display_name, "-> START PAGE") page_name = "Start" + # TODO: Condition can be simplified if (page_name not in params["reachable"] or (page_name in params["reachable"] and params["intent_route_count"] @@ -431,14 +442,18 @@ def _get_new_presets(self, presets, page, route): """Gets parameter presets that have been added on a given route. """ new_presets = presets.copy() + # TODO: Change to check isinstance(page, DFCXPage) if hasattr(page, "entry_fulfillment"): + # TODO: Unnecessary if hasattr(page.entry_fulfillment, "set_parameter_actions"): for ( param_preset ) in page.entry_fulfillment.set_parameter_actions: new_presets[param_preset.parameter] = param_preset.value + # TODO: Combine into previous condition, since again, only DFCXPages have forms if hasattr(page, "form"): for parameter in page.form.parameters: + # TODO: Probably unnecessary if (hasattr(parameter, "fill_behavior") and hasattr( parameter.fill_behavior, @@ -454,12 +469,14 @@ def _get_new_presets(self, presets, page, route): new_presets[ param_preset.parameter ] = param_preset.value + # TODO: Probably unnecessary? if hasattr(route, "trigger_fulfillment"): if hasattr(route.trigger_fulfillment, "set_parameter_actions"): for ( param_preset ) in route.trigger_fulfillment.set_parameter_actions: new_presets[param_preset.parameter] = param_preset.value + # TODO: Just use route.intent? if hasattr(route, "intent") and route.intent != "": # Check the entities annotated on this intent intent_name = self._intents_map[route.intent] @@ -473,82 +490,108 @@ def _get_new_presets(self, presets, page, route): def _find_reachable_pages_rec( self, page: Union[DFCXPage, DFCXFlow], - params: Dict + params: Dict, + # Having a default of False is absolutely critical + is_initial: bool = False ) -> None: """Recursive function to find reachable pages within a given flow, starting at a particular page. Other parameters here are used for - more general traversal, but not currently used.""" + more general traversal options.""" + # TODO: Is this used? if not params["flow_name"]: params["flow_name"] = self._flows_map[params["flow_id"]] + # TODO: Change to check isinstance(page, DFCXPage) if hasattr(page, "form") and page.form: # if getattr(page, "form", None): for parameter in page.form.parameters: self._process_form_parameter_for_reachable_pages( page, parameter, - params + params, + is_initial=is_initial ) for event_handler in page.event_handlers: - if params["limit_intent_to_initial"] and not params["is_initial"]: + if params["limit_intent_to_initial"] and not is_initial: continue + # TODO: Pretty sure this is always true, but the idea was to not continue if there is no transition if hasattr(event_handler, "target_page") or hasattr( event_handler, "target_flow" ): - self._find_reachable_pages_rec_helper(page, event_handler, - params) + self._find_reachable_pages_rec_helper(page, + event_handler, + params, + is_initial=is_initial) for route in page.transition_routes: - self._find_reachable_pages_rec_helper(page, route, params) + self._find_reachable_pages_rec_helper(page, + route, + params, + is_initial=is_initial) if params["include_groups"]: for route_group in page.transition_route_groups: - # TODO: Need to map by flow for route in self._route_group_data[params["flow_id"]][ route_group ].transition_routes: - self._find_reachable_pages_rec_helper(page, route, params) + self._find_reachable_pages_rec_helper(page, + route, + params, + is_initial=is_initial + ) # Start page routes and route groups are also accessible from this page if ( params["include_start_page_routes"] and page.display_name != params["flow_name"] - and (not params["limit_intent_to_initial"] or params["is_initial"]) + and (not params["limit_intent_to_initial"] or is_initial) ): - self._process_start_page_routes_for_reachable_pages(params) + self._process_start_page_routes_for_reachable_pages(params, + is_initial=is_initial) def _process_form_parameter_for_reachable_pages( self, page: Union[DFCXPage, DFCXFlow], parameter, # TODO: Data type for DFCX Parameter - params: Dict + params: Dict, + is_initial: bool = False ) -> None: for event_handler in parameter.fill_behavior.reprompt_event_handlers: - if params["limit_intent_to_initial"] and not params["is_initial"]: + if params["limit_intent_to_initial"] and not is_initial: continue + # TODO: Pretty sure this is always true, but the idea was to not continue if there is no transition if hasattr(event_handler, "target_page") or hasattr( event_handler, "target_flow" ): - self._find_reachable_pages_rec_helper(page, event_handler, - params) + self._find_reachable_pages_rec_helper(page, + event_handler, + params, + is_initial=is_initial) def _process_start_page_routes_for_reachable_pages( self, - params: Dict + params: Dict, + is_initial: bool = False ): page = self._flow_data[params["flow_id"]] for event_handler in page.event_handlers: + # TODO: Pretty sure this is always true, but the idea was to not continue if there is no transition if hasattr(event_handler, "target_page") or hasattr( event_handler, "target_flow" ): - self._find_reachable_pages_rec_helper(page, event_handler, - params) + self._find_reachable_pages_rec_helper(page, + event_handler, + params, + is_initial=is_initial) for route in page.transition_routes: + # TODO: Just use route.intent? if hasattr(route, "intent") and route.intent != "": - self._find_reachable_pages_rec_helper(page, route, params) + self._find_reachable_pages_rec_helper( + page, route, params, is_initial=is_initial) if params["include_groups"]: for route_group in page.transition_route_groups: for route in self._route_group_data[params["flow_id"]][ route_group ].transition_routes: + # TODO: Just use route.intent? if hasattr(route, "intent") and route.intent != "": - self._find_reachable_pages_rec_helper(page, route, - params) + self._find_reachable_pages_rec_helper( + page, route, params, is_initial=is_initial) def find_reachable_pages( self, @@ -557,8 +600,6 @@ def find_reachable_pages( intent_route_limit: Optional[int] = None, include_groups: bool = True, include_start_page_routes: bool = True, - limit_intent_to_initial: bool = False, - is_initial: bool = True, include_meta: bool = False, verbose: bool = False, ) -> List[str]: @@ -567,22 +608,23 @@ def find_reachable_pages( flow_name must be used. Args: - flow_name: The display name of the flow + flow_name: The display name of the flow. from_page: (Optional) The page to start from. If left blank, it will - start on the Start Page - intent_route_limit: (Optional) Default None + start on the Start Page of the given flow. + intent_route_limit: (Optional) Default None. The maximum number of + intent routes to take. This can be used to answer questions like + "which pages can I reach within N turns, starting at this page?" include_groups: (Optional) If true, intents from transition route groups will be included, but only if they are actually referenced - on some page - include_start_page_routes: (Optional) Default true - limit_intent_to_initial: (Optional) Default False. If true, only - take intent routes on the initial page, rather than on any page - in the traversal. - is_initial: (Optional) Default True + on each given page in the traversal. + include_start_page_routes: (Optional) Default true. If true, intent + routes on the start page are always considered in scope. This is + how DFCX normally behaves. include_meta: (Optional) Default False. If true, includes special - transition targets like End Session, End Flow, etc. - verbose: (Optional) If true, print debug information about - route traversal + transition targets like End Session, End Flow, etc. as if they + are actual pages. + verbose: (Optional) If true, prints debug information about + route traversal. Returns: The list of reachable pages in this flow @@ -613,12 +655,15 @@ def find_reachable_pages( "intent_route_count": 0, "include_groups": include_groups, "include_start_page_routes": include_start_page_routes, - "limit_intent_to_initial": limit_intent_to_initial, - "is_initial": is_initial, + "limit_intent_to_initial": False, + # This can't be stored here unless I want to add a lot of complex + # conditions to change it to False and back depending on the level + # of recursion + #"is_initial": True, "include_meta": include_meta, "verbose": verbose } - self._find_reachable_pages_rec(page_data, params) + self._find_reachable_pages_rec(page_data, params, is_initial=True) return reachable def find_unreachable_pages( @@ -670,9 +715,9 @@ def find_all_reachable_pages( """ flow_names = [] page_names = [] - for flow_id, flow_name in self._flows_map.items(): + for flow_name in self._flows_map_rev: reachable = self.find_reachable_pages( - flow_id=flow_id, + flow_name=flow_name, include_groups=include_groups, verbose=verbose ) @@ -698,9 +743,9 @@ def find_all_unreachable_pages( """ flow_names = [] page_names = [] - for flow_id, flow_name in self._flows_map.items(): + for flow_name in self._flows_map_rev: unreachable = self.find_unreachable_pages( - flow_id=flow_id, + flow_name=flow_name, include_groups=include_groups, verbose=verbose ) From d61af59dd106934fca66b9e9915200bf737c3505 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Thu, 9 Feb 2023 19:16:34 +0000 Subject: [PATCH 086/151] Optimize API calls --- src/dfcx_scrapi/tools/agent_checker_util.py | 61 ++++++++++++--------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 1b1a280e..5e9513fc 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -111,38 +111,45 @@ def __init__( self._route_groups = TransitionRouteGroups( creds=self.creds, agent_id=self.agent_id ) - - # Generate maps - self._intents_map = self._intents.get_intents_map(agent_id=self.agent_id) - time.sleep(delay) - self._flows_map = self._flows.get_flows_map(agent_id=self.agent_id) - time.sleep(delay) - self._flows_map_rev = self._flows.get_flows_map( - agent_id=self.agent_id, reverse=True - ) - time.sleep(delay) - self._pages_map = {} - self._pages_map_rev = {} - self._route_groups_map = {} - for fid in self._flows_map.keys(): - self._pages_map[fid] = self._pages.get_pages_map(flow_id=fid) - time.sleep(delay) - self._pages_map_rev[fid] = self._pages.get_pages_map( - flow_id=fid, reverse=True - ) - time.sleep(delay) - self._route_groups_map[fid] = self._route_groups.get_route_groups_map( - flow_id=fid - ) - time.sleep(delay) - - # Get intent, flow, and page data + # Intent data (1 API call) self._intent_data = self._intents.list_intents(agent_id=self.agent_id) - time.sleep(delay) + # Intents map (0 API calls) + self._intents_map = { + intent.name: intent.display_name for intent in self._intent_data + } + + # Flow data (1 API call) self._flow_data = self._get_all_flow_data(delay) + # Flows maps (0 API calls) + self._flows_map = { + flow.name: flow.display_name for flow in self._flow_data + } + self._flows_map_rev = { + flow.display_name: flow.name for flow in self._flow_data + } + + # Page data (len(flows) API calls) self._page_data = self._get_all_page_data(delay) + + # Route group data (len(flows) API calls) self._route_group_data = self._get_all_route_group_data(delay) + + # Pages and route groups maps (0 API calls) + self._pages_map = {} + self._pages_map_rev = {} + self._route_groups_map = {} + for fid in self._flows_map.keys(): + self._pages_map[fid] = { + page.name: page.display_name for page in self._page_data[fid] + } + self._pages_map_rev[fid] = { + page.display_name: page.name for page in self._page_data[fid] + } + self._route_groups_map[fid] = { + rg.name: rg.display_name for rg in self._route_group_data[fid] + } + # Total API calls: 2*len(flows) + 2 def _get_all_flow_data(self, delay): flow_list = self._flows.list_flows(self.agent_id) From 27cdf8dee0432a99992bfcad920865fd69c044de Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Thu, 9 Feb 2023 20:17:10 +0000 Subject: [PATCH 087/151] Clean up conditions and fix map init bug --- src/dfcx_scrapi/tools/agent_checker_util.py | 130 ++++++-------------- 1 file changed, 39 insertions(+), 91 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 5e9513fc..6b960ad3 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -84,13 +84,10 @@ def __init__( ): """ Args: + agent_id (required): The agent ID delay (optional): The time in seconds to wait between CX API calls, - if you need to limit the rate. - - TODO: Total number of API calls made when - initializing this class is currently 5 + 5*(number of flows). This - can be optimized down to 2 + 2*(number of flows) by manually - creating the maps after listing the objects. + if you need to limit the rate. The number of API calls used in this + initialization is 2*(number of flows) + 2. """ super().__init__( creds_path=creds_path, @@ -111,43 +108,43 @@ def __init__( self._route_groups = TransitionRouteGroups( creds=self.creds, agent_id=self.agent_id ) - + # Intent data (1 API call) self._intent_data = self._intents.list_intents(agent_id=self.agent_id) # Intents map (0 API calls) self._intents_map = { intent.name: intent.display_name for intent in self._intent_data } - + # Flow data (1 API call) self._flow_data = self._get_all_flow_data(delay) # Flows maps (0 API calls) self._flows_map = { - flow.name: flow.display_name for flow in self._flow_data + flow.name: flow.display_name for flow in self._flow_data.values() } self._flows_map_rev = { - flow.display_name: flow.name for flow in self._flow_data + flow.display_name: flow.name for flow in self._flow_data.values() } - + # Page data (len(flows) API calls) self._page_data = self._get_all_page_data(delay) - + # Route group data (len(flows) API calls) self._route_group_data = self._get_all_route_group_data(delay) - + # Pages and route groups maps (0 API calls) self._pages_map = {} self._pages_map_rev = {} self._route_groups_map = {} for fid in self._flows_map.keys(): self._pages_map[fid] = { - page.name: page.display_name for page in self._page_data[fid] + page.name: page.display_name for page in self._page_data[fid].values() } self._pages_map_rev[fid] = { - page.display_name: page.name for page in self._page_data[fid] + page.display_name: page.name for page in self._page_data[fid].values() } self._route_groups_map[fid] = { - rg.name: rg.display_name for rg in self._route_group_data[fid] + rg.name: rg.display_name for rg in self._route_group_data[fid].values() } # Total API calls: 2*len(flows) + 2 @@ -202,7 +199,6 @@ def _convert_page(self, page_id, flow_id): page_id_converted = str(flow_id) + "/pages/" + str(page_id) if flow_id in self._pages_map: return self._pages_map[flow_id].get(page_id_converted, "Start") - # TODO: Should throw error instead of returning default logging.info("Flow not found") # TODO: Should throw error, but returning this probably will anyway return "Invalid" @@ -335,20 +331,15 @@ def _find_reachable_pages_rec_helper( """Helper function for the recursion involved in finding reachable pages """ - # TODO: Is this even used? - if not params["flow_name"]: - params["flow_name"] = self._flows_map[params["flow_id"]] target_page = route.target_page target_flow = route.target_flow - # TODO: Can this just be route.intent? Or do event routes give an error from this? if ( - hasattr(route, "intent") and route.intent != "" + getattr(route, "intent", "") != "" and params["intent_route_limit"] and params["intent_route_count"] >= params["intent_route_limit"] ): return - # TODO: Could also change to a check on isinstance(page, DFCXPage) - if hasattr(page, "form") and page.form: + if isinstance(page, DFCXPage): for parameter in page.form.parameters: parameter_name = parameter.display_name # Need to also account for parameters being @@ -362,8 +353,7 @@ def _find_reachable_pages_rec_helper( and not is_initial ): return - # TODO: Just route.intent? - if hasattr(route, "intent") and route.intent != "": + if getattr(route, "intent", "") != "": if params["limit_intent_to_initial"] and not is_initial: # Don't continue on this path return @@ -376,7 +366,6 @@ def _find_reachable_pages_rec_helper( logging.info(page.display_name, "->", page_name) # Move to this page (this is also the recursion limiting step # to prevent infinite loops) - # TODO: Condition can be simplified if ( page_name not in params["reachable"] or (page_name in params["reachable"] @@ -397,7 +386,6 @@ def _find_reachable_pages_rec_helper( if params["verbose"]: logging.info(page.display_name, "-> START PAGE") page_name = "Start" - # TODO: Condition can be simplified if (page_name not in params["reachable"] or (page_name in params["reachable"] and params["intent_route_count"] @@ -410,7 +398,7 @@ def _find_reachable_pages_rec_helper( elif len(target_page) > 0: logging.info(page.display_name, "->", target_page) # This should not happen, and if it does it needs to be fixed - input() + logging.error(f"Page target not in list of pages: {target_page}") elif len(target_flow) > 0: flow_name = self._flows_map[route.target_flow] if params["verbose"]: @@ -432,7 +420,8 @@ def _find_reachable_pages_rec_helper( ] = params["intent_route_count"] else: if params["verbose"]: - logging.info(page.display_name, "->", route.target_flow, "(empty)") + logging.info(page.display_name, "->", + route.target_flow, "(empty)") page_name = page.display_name if ( page_name in params["reachable"] @@ -449,49 +438,21 @@ def _get_new_presets(self, presets, page, route): """Gets parameter presets that have been added on a given route. """ new_presets = presets.copy() - # TODO: Change to check isinstance(page, DFCXPage) - if hasattr(page, "entry_fulfillment"): - # TODO: Unnecessary - if hasattr(page.entry_fulfillment, "set_parameter_actions"): - for ( - param_preset - ) in page.entry_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value - # TODO: Combine into previous condition, since again, only DFCXPages have forms - if hasattr(page, "form"): + if isinstance(page, DFCXPage): + for preset in page.entry_fulfillment.set_parameter_actions: + new_presets[preset.parameter] = preset.value for parameter in page.form.parameters: - # TODO: Probably unnecessary - if (hasattr(parameter, "fill_behavior") - and hasattr( - parameter.fill_behavior, - "initial_prompt_fulfillment", - ) - and hasattr( - parameter.fill_behavior.initial_prompt_fulfillment, - "set_parameter_actions", - ) - ): - ipf = parameter.fill_behavior.initial_prompt_fulfillment - for param_preset in ipf.set_parameter_actions: - new_presets[ - param_preset.parameter - ] = param_preset.value - # TODO: Probably unnecessary? - if hasattr(route, "trigger_fulfillment"): - if hasattr(route.trigger_fulfillment, "set_parameter_actions"): - for ( - param_preset - ) in route.trigger_fulfillment.set_parameter_actions: - new_presets[param_preset.parameter] = param_preset.value - # TODO: Just use route.intent? - if hasattr(route, "intent") and route.intent != "": + ipf = parameter.fill_behavior.initial_prompt_fulfillment + for preset in ipf.set_parameter_actions: + new_presets[preset.parameter] = preset.value + for preset in route.trigger_fulfillment.set_parameter_actions: + new_presets[preset.parameter] = preset.value + if getattr(route, "intent", "") != "": # Check the entities annotated on this intent intent_name = self._intents_map[route.intent] intent_params = self._get_intent_parameters(intent_name) for param in intent_params: - new_presets[ - param.id - ] = f"(potentially set by {intent_name})" + new_presets[param.id] = f"(potentially set by {intent_name})" return new_presets def _find_reachable_pages_rec( @@ -504,25 +465,18 @@ def _find_reachable_pages_rec( """Recursive function to find reachable pages within a given flow, starting at a particular page. Other parameters here are used for more general traversal options.""" - # TODO: Is this used? - if not params["flow_name"]: - params["flow_name"] = self._flows_map[params["flow_id"]] - # TODO: Change to check isinstance(page, DFCXPage) - if hasattr(page, "form") and page.form: # if getattr(page, "form", None): + if isinstance(page, DFCXPage): for parameter in page.form.parameters: self._process_form_parameter_for_reachable_pages( page, parameter, params, - is_initial=is_initial - ) + is_initial=is_initial) for event_handler in page.event_handlers: if params["limit_intent_to_initial"] and not is_initial: continue - # TODO: Pretty sure this is always true, but the idea was to not continue if there is no transition - if hasattr(event_handler, "target_page") or hasattr( - event_handler, "target_flow" - ): + if (event_handler.target_page != "" + or event_handler.target_flow != ""): self._find_reachable_pages_rec_helper(page, event_handler, params, @@ -561,10 +515,8 @@ def _process_form_parameter_for_reachable_pages( for event_handler in parameter.fill_behavior.reprompt_event_handlers: if params["limit_intent_to_initial"] and not is_initial: continue - # TODO: Pretty sure this is always true, but the idea was to not continue if there is no transition - if hasattr(event_handler, "target_page") or hasattr( - event_handler, "target_flow" - ): + if (event_handler.target_page != "" + or event_handler.target_flow != ""): self._find_reachable_pages_rec_helper(page, event_handler, params, @@ -577,17 +529,14 @@ def _process_start_page_routes_for_reachable_pages( ): page = self._flow_data[params["flow_id"]] for event_handler in page.event_handlers: - # TODO: Pretty sure this is always true, but the idea was to not continue if there is no transition - if hasattr(event_handler, "target_page") or hasattr( - event_handler, "target_flow" - ): + if (event_handler.target_page != "" + or event_handler.target_flow != ""): self._find_reachable_pages_rec_helper(page, event_handler, params, is_initial=is_initial) for route in page.transition_routes: - # TODO: Just use route.intent? - if hasattr(route, "intent") and route.intent != "": + if route.intent: self._find_reachable_pages_rec_helper( page, route, params, is_initial=is_initial) if params["include_groups"]: @@ -595,8 +544,7 @@ def _process_start_page_routes_for_reachable_pages( for route in self._route_group_data[params["flow_id"]][ route_group ].transition_routes: - # TODO: Just use route.intent? - if hasattr(route, "intent") and route.intent != "": + if route.intent: self._find_reachable_pages_rec_helper( page, route, params, is_initial=is_initial) From 0e08b8080112b1d0eee0af20a9f9bf5711e60977 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Thu, 9 Feb 2023 20:20:00 +0000 Subject: [PATCH 088/151] Lint fixes --- src/dfcx_scrapi/tools/agent_checker_util.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 6b960ad3..ebceac80 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -138,13 +138,16 @@ def __init__( self._route_groups_map = {} for fid in self._flows_map.keys(): self._pages_map[fid] = { - page.name: page.display_name for page in self._page_data[fid].values() + page.name: page.display_name + for page in self._page_data[fid].values() } self._pages_map_rev[fid] = { - page.display_name: page.name for page in self._page_data[fid].values() + page.display_name: page.name + for page in self._page_data[fid].values() } self._route_groups_map[fid] = { - rg.name: rg.display_name for rg in self._route_group_data[fid].values() + rg.name: rg.display_name + for rg in self._route_group_data[fid].values() } # Total API calls: 2*len(flows) + 2 @@ -267,7 +270,8 @@ def _continue_page_recursion( params["conversation_path"].append(page_name) if params["verbose"]: - logging.info(params["conversation_path"], params["intent_route_count"]) + logging.info(params["conversation_path"], + params["intent_route_count"]) old_presets = params["presets"].copy() new_presets = self._get_new_presets(params["presets"], page, route) @@ -420,7 +424,7 @@ def _find_reachable_pages_rec_helper( ] = params["intent_route_count"] else: if params["verbose"]: - logging.info(page.display_name, "->", + logging.info(page.display_name, "->", route.target_flow, "(empty)") page_name = page.display_name if ( @@ -475,7 +479,7 @@ def _find_reachable_pages_rec( for event_handler in page.event_handlers: if params["limit_intent_to_initial"] and not is_initial: continue - if (event_handler.target_page != "" + if (event_handler.target_page != "" or event_handler.target_flow != ""): self._find_reachable_pages_rec_helper(page, event_handler, @@ -515,7 +519,7 @@ def _process_form_parameter_for_reachable_pages( for event_handler in parameter.fill_behavior.reprompt_event_handlers: if params["limit_intent_to_initial"] and not is_initial: continue - if (event_handler.target_page != "" + if (event_handler.target_page != "" or event_handler.target_flow != ""): self._find_reachable_pages_rec_helper(page, event_handler, @@ -529,7 +533,7 @@ def _process_start_page_routes_for_reachable_pages( ): page = self._flow_data[params["flow_id"]] for event_handler in page.event_handlers: - if (event_handler.target_page != "" + if (event_handler.target_page != "" or event_handler.target_flow != ""): self._find_reachable_pages_rec_helper(page, event_handler, From 657f7ee98aecefb4e9a08e572995695ef6de6fc1 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Thu, 9 Feb 2023 21:30:31 +0000 Subject: [PATCH 089/151] Clean up test cases and create process_test_cases function --- src/dfcx_scrapi/core/test_cases.py | 141 ++++++++++++++--------------- 1 file changed, 68 insertions(+), 73 deletions(-) diff --git a/src/dfcx_scrapi/core/test_cases.py b/src/dfcx_scrapi/core/test_cases.py index 0cfb04a4..e503ccdb 100644 --- a/src/dfcx_scrapi/core/test_cases.py +++ b/src/dfcx_scrapi/core/test_cases.py @@ -473,20 +473,20 @@ def _convert_flow(self, flow_id, flows_map): # Note that flow id includes agent, normally... def _convert_page(self, page_id, flow_id, pages_map): """Gets a page display name from a page and flow ID""" - if page_id == "END_SESSION": - return "End Session" - elif page_id == "END_FLOW": - return "End Flow" - elif page_id == "START_PAGE": - return "Start" + special_page_dict = { + "END_SESSION": "End Session", + "END_FLOW": "End Flow", + "START_PAGE": "Start" + } + if page_id in special_page_dict: + return special_page_dict[page_id] + page_id_converted = str(flow_id) + "/pages/" + str(page_id) if flow_id in pages_map: - if page_id_converted in pages_map[flow_id]: - return pages_map[flow_id][page_id_converted] - else: - # TODO: Should throw error instead of returning default - return "Start" - print("Flow not found") + # page_id is sometimes left empty for the test case if it starts + # on the start page + return pages_map[flow_id].get(page_id_converted, "Start") + logging.info(f"Flow not found: {flow_id}") # TODO: Should throw error, but returning this probably will anyway return "Invalid" @@ -517,70 +517,24 @@ def get_test_case_results_df(self, agent_id=None, retest_all=False): pages_map[flow_id] = dfcx_pages.get_pages_map(flow_id=flow_id) test_case_results = self.list_test_cases(self.agent_id) - retest = [] - retest_names = [] - - display_names = [] - ids = [] - short_ids = [] - tags = [] - creation_times = [] - flows = [] - pages = [] - test_results = [] - test_times = [] - passed = [] - - for response in test_case_results: - # Collect untested cases to be retested - # (or all if retest_all is True) - if ( - retest_all - or str(response.last_test_result.test_result) - == "TestResult.TEST_RESULT_UNSPECIFIED" - ): - retest.append(response.name) - retest_names.append(response.display_name) - # Collect additional information for dataframe - display_names.append(response.display_name) - ids.append(response.name) - short_ids.append(response.name.split("/")[-1]) - tags.append(",".join(response.tags)) - creation_times.append(response.creation_time) - flows.append(self._convert_flow(response.test_config.flow, - flows_map) - ) - pages.append( - self._convert_page(response.test_config.page, - response.test_config.flow, - pages_map) - ) - test_results.append(str(response.last_test_result.test_result)) - test_times.append(response.last_test_result.test_time) - passed.append( - str(response.last_test_result.test_result)=="TestResult.PASSED" - ) + retest_ids = [] + test_case_rows = [] + + for test_case in test_case_results: + row = self.process_test_case(test_case, flows_map, pages_map) + test_case_rows.append(row) + test_result = str(test_case.last_test_result.test_result) + untested_str = "TestResult.TEST_RESULT_UNSPECIFIED" + if retest_all or test_result == untested_str: + retest_ids.append(test_case.name) # Create dataframe - test_case_df = pd.DataFrame( - { - "display_name": display_names, - "id": ids, - "short_id": short_ids, - "tags": tags, - "creation_time": creation_times, - "start_flow": flows, - "start_page": pages, - "test_result": test_results, - "passed": passed, - "test_time": test_times, - } - ) + test_case_df = pd.concat(test_case_rows) # Retest any that haven't been run yet - print("To retest:", len(retest)) - if len(retest) > 0: - response = self.batch_run_test_cases(retest, self.agent_id) + print("To retest:", len(retest_ids)) + if len(retest_ids) > 0: + response = self.batch_run_test_cases(retest_ids, self.agent_id) for result in response.results: # Results may not be in the same order as they went in # Process the name a bit to remove the /results/id part @@ -602,6 +556,47 @@ def get_test_case_results_df(self, agent_id=None, retest_all=False): str(result.test_result) == "TestResult.PASSED" ) - # This column is redundant, since we have passed (bool) + # This column is redundant, since we have "passed" (bool) test_case_df = test_case_df.drop(columns=["test_result"]) return test_case_df + + def process_test_case(self, test_case, flows_map, pages_map): + """Takes a response from list_test_cases and returns a single row + dataframe of the test case result. + + Args: + test_case: The test case response + flows_map: A dictionary mapping flow IDs to flow display names + pages_map: A dictionary with keys as flow IDs and values as + dictionaries mapping page IDs to page display names for that flow + + Returns: A dataframe with columns: + display_name, id, short_id, tags, creation_time, + start_flow, start_page, test_result, passed, test_time + """ + display_name = test_case.display_name + test_case_id = test_case.name + short_id = test_case.name.split("/")[-1] + tags = ",".join(test_case.tags) + creation_time = test_case.creation_time + flow = self._convert_flow(test_case.test_config.flow, flows_map) + page = self._convert_page(test_case.test_config.page, + test_case.test_config.flow, pages_map) + test_result = str(test_case.last_test_result.test_result) + passed_str = "TestResult.PASSED" + passed = str(test_case.last_test_result.test_result) == passed_str + test_time = test_case.last_test_result.test_time + return pd.DataFrame( + { + "display_name": [display_name], + "id": [test_case_id], + "short_id": [short_id], + "tags": [tags], + "creation_time": [creation_time], + "start_flow": [flow], + "start_page": [page], + "test_result": [test_result], + "passed": [passed], + "test_time": [test_time] + } + ) From ba5c20e1544d939fd6437b56ccb0d906576e6ba2 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Thu, 9 Feb 2023 21:51:35 +0000 Subject: [PATCH 090/151] Change exception types --- src/dfcx_scrapi/tools/agent_checker_util.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index ebceac80..1884587d 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -98,7 +98,7 @@ def __init__( self.agent_id = agent_id if not self.agent_id: - raise Exception("agent_id parameter is required") + raise ValueError("agent_id parameter is required") self._intents = Intents(creds=self.creds, agent_id=self.agent_id) self._entities = EntityTypes(creds=self.creds, agent_id=self.agent_id) @@ -236,7 +236,7 @@ def _get_page( # Look up flow ID flow_id = self._flows_map_rev.get(flow_name, None) if not flow_id: - raise Exception(f"Flow not found: {flow_name}") + raise KeyError(f"Flow not found: {flow_name}") # Now that flow_id is set, look up the page # Special case for the start page if page_name == "Start": @@ -590,7 +590,7 @@ def find_reachable_pages( """ flow_id = self._flows_map_rev.get(flow_name, None) if not flow_id: - raise Exception(f"Flow not found: {flow_name}") + raise KeyError(f"Flow not found: {flow_name}") # Start at the start page... reachable = [from_page] @@ -649,7 +649,7 @@ def find_unreachable_pages( """ flow_id = self._flows_map_rev.get(flow_name, None) if not flow_id: - raise Exception(f"Flow not found: {flow_name}") + raise KeyError(f"Flow not found: {flow_name}") reachable = self.find_reachable_pages( flow_name, include_groups=include_groups, verbose=verbose @@ -775,7 +775,7 @@ def _get_page_intents( flow_id = self._flows_map_rev.get(flow_name, None) if not flow_id: - raise Exception(f"Flow not found: {flow_name}") + raise KeyError(f"Flow not found: {flow_name}") # Get intents in transition route groups if include_groups: From b11dc4af5a28d3eb8a1ff7f35107738336c4221e Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Thu, 9 Feb 2023 22:07:35 +0000 Subject: [PATCH 091/151] Simplify convert_page for v1.6 and remove unused functions in agent_checker_util --- src/dfcx_scrapi/core/test_cases.py | 12 ++------ src/dfcx_scrapi/tools/agent_checker_util.py | 32 --------------------- 2 files changed, 2 insertions(+), 42 deletions(-) diff --git a/src/dfcx_scrapi/core/test_cases.py b/src/dfcx_scrapi/core/test_cases.py index e503ccdb..d88789e1 100644 --- a/src/dfcx_scrapi/core/test_cases.py +++ b/src/dfcx_scrapi/core/test_cases.py @@ -473,22 +473,14 @@ def _convert_flow(self, flow_id, flows_map): # Note that flow id includes agent, normally... def _convert_page(self, page_id, flow_id, pages_map): """Gets a page display name from a page and flow ID""" - special_page_dict = { - "END_SESSION": "End Session", - "END_FLOW": "End Flow", - "START_PAGE": "Start" - } - if page_id in special_page_dict: - return special_page_dict[page_id] - page_id_converted = str(flow_id) + "/pages/" + str(page_id) if flow_id in pages_map: # page_id is sometimes left empty for the test case if it starts # on the start page - return pages_map[flow_id].get(page_id_converted, "Start") + return pages_map[flow_id].get(page_id_converted, "START_PAGE") logging.info(f"Flow not found: {flow_id}") # TODO: Should throw error, but returning this probably will anyway - return "Invalid" + return "INVALID" def get_test_case_results_df(self, agent_id=None, retest_all=False): """Gets the test case results for this agent, diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 1884587d..0a8a9cc3 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -174,38 +174,6 @@ def _get_all_route_group_data(self, delay): time.sleep(delay) return route_group_data - # Conversion utilities - # (Not currently used) - - def _convert_intent(self, intent_id): - """Gets an intent display name from an intent ID""" - intent_id_converted = str(self.agent_id) + "/intents/" + str(intent_id) - return self._intents_map.get(intent_id_converted, "") - - def _convert_flow(self, flow_id): - """Gets a flow display name from a flow ID""" - if flow_id.split("/")[-1] == "-": - return "" - # flow_id_converted = str(agent_id) + '/flows/' + str(flow_id) - return self._flows_map.get(flow_id, "Default Start Flow") - # TODO: Should throw error instead of returning default - - # Note that flow id includes agent, normally... - def _convert_page(self, page_id, flow_id): - """Gets a page display name from a page and flow ID""" - if page_id == "END_SESSION": - return "End Session" - elif page_id == "END_FLOW": - return "End Flow" - elif page_id == "START_PAGE": - return "Start" - page_id_converted = str(flow_id) + "/pages/" + str(page_id) - if flow_id in self._pages_map: - return self._pages_map[flow_id].get(page_id_converted, "Start") - logging.info("Flow not found") - # TODO: Should throw error, but returning this probably will anyway - return "Invalid" - def _get_intent_parameters(self, intent_name): """Gets the parameters for a particular intent, by display name""" for intent in self._intent_data: From eee3d99c79068511f44975c4bbd9e5dc212f7e7e Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Mon, 10 Jul 2023 15:38:46 +0000 Subject: [PATCH 092/151] Add agent checker util unit tests --- tests/test_agent_checker_util.py | 116 +++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 tests/test_agent_checker_util.py diff --git a/tests/test_agent_checker_util.py b/tests/test_agent_checker_util.py new file mode 100644 index 00000000..ec26b547 --- /dev/null +++ b/tests/test_agent_checker_util.py @@ -0,0 +1,116 @@ +"""Unit Tests for Agent Checker Util Class""" +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import pytest +import pandas as pd +from src.dfcx_scrapi.tools import agent_checker_util + +# logging config +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)-8s %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", +) + +# Note: Each instantiation of the AgentCheckerUtil class with a particular +# agent ID will make 2*num_flows + 2 API calls. Recommended to test in an +# agent which contains only two small flows. + +@pytest.mark.unit +def test_instantiate_agent_checker_util(creds, agent_id): + scrapi_checker = agent_checker_util.AgentCheckerUtil(creds_path=creds, + agent_id=agent_id) + assert isinstance(scrapi_checker, agent_checker_util.AgentCheckerUtil) + assert scrapi_checker.creds_path == creds + +@pytest.mark.unit +def test_find_all_reachable_pages(creds, agent_id): + scrapi_checker = agent_checker_util.AgentCheckerUtil(creds_path=creds, + agent_id=agent_id) + df = scrapi_checker.find_all_reachable_pages() + assert isinstance(df, pd.DataFrame) + assert set(df.columns) == { + "flow_name", + "page_name"} + logging.info("All reachable pages:\n%s", df.to_string()) + +@pytest.mark.unit +def test_find_all_unreachable_pages(creds, agent_id): + scrapi_checker = agent_checker_util.AgentCheckerUtil(creds_path=creds, + agent_id=agent_id) + df = scrapi_checker.find_all_unreachable_pages() + assert isinstance(df, pd.DataFrame) + assert set(df.columns) == { + "flow_name", + "page_name"} + logging.info("All unreachable pages:\n%s", df.to_string()) + +@pytest.mark.unit +def test_find_all_reachable_intents(creds, agent_id): + scrapi_checker = agent_checker_util.AgentCheckerUtil(creds_path=creds, + agent_id=agent_id) + df = scrapi_checker.find_all_reachable_intents() + assert isinstance(df, pd.DataFrame) + assert set(df.columns) == { + "intent", + "flows"} + logging.info("All reachable intents:\n%s", df.to_string()) + +@pytest.mark.unit +def test_find_all_unreachable_intents(creds, agent_id): + scrapi_checker = agent_checker_util.AgentCheckerUtil(creds_path=creds, + agent_id=agent_id) + intents = scrapi_checker.find_all_unreachable_intents() + assert isinstance(intents, list) + logging.info(f"All unreachable intents: {str(intents)}") + +@pytest.mark.unit +def test_find_reachable_intents(creds, agent_id, flow_name): + scrapi_checker = agent_checker_util.AgentCheckerUtil(creds_path=creds, + agent_id=agent_id) + intents = scrapi_checker.find_reachable_intents(flow_name) + assert isinstance(intents, list) + logging.info(f"Reachable intents for flow {flow_name}: {str(intents)}") + +@pytest.mark.unit +def test_find_reachable_pages(creds, agent_id, flow_name, page_name): + scrapi_checker = agent_checker_util.AgentCheckerUtil(creds_path=creds, + agent_id=agent_id) + page_names = scrapi_checker.find_reachable_pages(flow_name=flow_name, + from_page=page_name, + intent_route_limit=None) + assert isinstance(page_names, list) + logging.info(f"Reachable pages for flow {flow_name} starting from \ + {page_name}: {str(page_names)}") + +@pytest.mark.unit +def test_find_one_turn_reachable_pages(creds, agent_id, flow_name, page_name): + scrapi_checker = agent_checker_util.AgentCheckerUtil(creds_path=creds, + agent_id=agent_id) + page_names = scrapi_checker.find_reachable_pages(flow_name=flow_name, + from_page=page_name, + intent_route_limit=1) + assert isinstance(page_names, list) + logging.info(f"Reachable pages for flow {flow_name} starting from \ + {page_name} in one turn: {str(page_names)}") + +@pytest.mark.unit +def test_find_unreachable_pages(creds, agent_id, flow_name): + scrapi_checker = agent_checker_util.AgentCheckerUtil(creds_path=creds, + agent_id=agent_id) + page_names = scrapi_checker.find_unreachable_pages(flow_name=flow_name) + assert isinstance(page_names, list) + logging.info(f"Unreachable pages for flow {flow_name}: {str(page_names)}") From 96916f1a34aa2d05898d37a41934914ecd32b4e4 Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Mon, 10 Jul 2023 18:03:28 +0000 Subject: [PATCH 093/151] Return page list instead of set; update test fixtures --- src/dfcx_scrapi/tools/agent_checker_util.py | 2 +- tests/conftest.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 0a8a9cc3..64581f1b 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -827,4 +827,4 @@ def find_all_unreachable_intents(self) -> List[str]: include_groups=True) all_reachable_intents.update(set(flow_intents)) all_intents = {intent.display_name for intent in self._intent_data} - return all_intents - all_reachable_intents + return list(all_intents - all_reachable_intents) diff --git a/tests/conftest.py b/tests/conftest.py index 70b7e65d..70245224 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,6 +15,7 @@ # limitations under the License. import pytest +import json def pytest_addoption(parser): """Method to add option for creds in tests.""" @@ -22,6 +23,8 @@ def pytest_addoption(parser): parser.addoption("--project_id", action="store") parser.addoption("--gcs_bucket", action="store") parser.addoption("--agent_id", action="store") + parser.addoption("--flow_name", action="store") + parser.addoption("--page_name", action="store") @pytest.fixture(scope="session") @@ -42,3 +45,11 @@ def gcs_bucket(request): @pytest.fixture(scope="session") def agent_id(request): return request.config.getoption("agent_id") + +@pytest.fixture(scope="session") +def flow_name(request): + return request.config.getoption("flow_name") + +@pytest.fixture(scope="session") +def page_name(request): + return request.config.getoption("page_name") \ No newline at end of file From 51a95a4bc00ad5c93c517dd1fc995b758f9d102e Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Mon, 10 Jul 2023 18:05:16 +0000 Subject: [PATCH 094/151] Lint fixes --- tests/conftest.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 70245224..f07da130 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,7 +15,6 @@ # limitations under the License. import pytest -import json def pytest_addoption(parser): """Method to add option for creds in tests.""" @@ -52,4 +51,4 @@ def flow_name(request): @pytest.fixture(scope="session") def page_name(request): - return request.config.getoption("page_name") \ No newline at end of file + return request.config.getoption("page_name") From 64707380c2d161858aee461b2e54d9693a7f83eb Mon Sep 17 00:00:00 2001 From: SeanScripts Date: Mon, 10 Jul 2023 18:33:09 +0000 Subject: [PATCH 095/151] Finish rebase and lint fixes --- src/dfcx_scrapi/core/test_cases.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/dfcx_scrapi/core/test_cases.py b/src/dfcx_scrapi/core/test_cases.py index d88789e1..a492bcd1 100644 --- a/src/dfcx_scrapi/core/test_cases.py +++ b/src/dfcx_scrapi/core/test_cases.py @@ -22,13 +22,9 @@ from google.cloud.dialogflowcx_v3beta1 import types from google.protobuf import field_mask_pb2 -<<<<<<< HEAD from dfcx_scrapi.core import scrapi_base -======= -from dfcx_scrapi.core.scrapi_base import ScrapiBase from dfcx_scrapi.core.flows import Flows from dfcx_scrapi.core.pages import Pages ->>>>>>> Move get_test_case_results_df to TestCases # logging config logging.basicConfig( From 601101f5930e9a6cc3317d4329ab63f1d3b4df4d Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Fri, 18 Aug 2023 19:59:08 -0500 Subject: [PATCH 096/151] fix: refactor to match import standards --- src/dfcx_scrapi/core/test_cases.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dfcx_scrapi/core/test_cases.py b/src/dfcx_scrapi/core/test_cases.py index a492bcd1..00adb229 100644 --- a/src/dfcx_scrapi/core/test_cases.py +++ b/src/dfcx_scrapi/core/test_cases.py @@ -23,8 +23,8 @@ from google.protobuf import field_mask_pb2 from dfcx_scrapi.core import scrapi_base -from dfcx_scrapi.core.flows import Flows -from dfcx_scrapi.core.pages import Pages +from dfcx_scrapi.core import flows +from dfcx_scrapi.core import pages # logging config logging.basicConfig( @@ -497,8 +497,8 @@ def get_test_case_results_df(self, agent_id=None, retest_all=False): if agent_id: self.agent_id = agent_id - dfcx_flows = Flows(creds=self.creds, agent_id=self.agent_id) - dfcx_pages = Pages(creds=self.creds) + dfcx_flows = flows.Flows(creds=self.creds, agent_id=self.agent_id) + dfcx_pages = pages.Pages(creds=self.creds) flows_map = dfcx_flows.get_flows_map(agent_id=self.agent_id) pages_map = {} for flow_id in flows_map.keys(): From f510f0bbe0779c76183c9045a74f11366ed635b1 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Fri, 18 Aug 2023 20:00:40 -0500 Subject: [PATCH 097/151] chore: resort code --- src/dfcx_scrapi/core/test_cases.py | 126 ++++++++++++++--------------- 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/src/dfcx_scrapi/core/test_cases.py b/src/dfcx_scrapi/core/test_cases.py index 00adb229..882a5b88 100644 --- a/src/dfcx_scrapi/core/test_cases.py +++ b/src/dfcx_scrapi/core/test_cases.py @@ -61,6 +61,69 @@ def __init__( self.test_case_id = test_case_id self.client_options = self._set_region(self.test_case_id) + def _convert_flow(self, flow_id, flows_map): + """Gets a flow display name from a flow ID""" + if flow_id.split("/")[-1] == "-": + return "" + # flow_id_converted = str(agent_id) + '/flows/' + str(flow_id) + if flow_id in flows_map: + return flows_map[flow_id] + # TODO: Should throw error instead of returning default + return "Default Start Flow" + + # Note that flow id includes agent, normally... + def _convert_page(self, page_id, flow_id, pages_map): + """Gets a page display name from a page and flow ID""" + page_id_converted = str(flow_id) + "/pages/" + str(page_id) + if flow_id in pages_map: + # page_id is sometimes left empty for the test case if it starts + # on the start page + return pages_map[flow_id].get(page_id_converted, "START_PAGE") + logging.info(f"Flow not found: {flow_id}") + # TODO: Should throw error, but returning this probably will anyway + return "INVALID" + + def _process_test_case(self, test_case, flows_map, pages_map): + """Takes a response from list_test_cases and returns a single row + dataframe of the test case result. + + Args: + test_case: The test case response + flows_map: A dictionary mapping flow IDs to flow display names + pages_map: A dictionary with keys as flow IDs and values as + dictionaries mapping page IDs to page display names for that flow + + Returns: A dataframe with columns: + display_name, id, short_id, tags, creation_time, + start_flow, start_page, test_result, passed, test_time + """ + display_name = test_case.display_name + test_case_id = test_case.name + short_id = test_case.name.split("/")[-1] + tags = ",".join(test_case.tags) + creation_time = test_case.creation_time + flow = self._convert_flow(test_case.test_config.flow, flows_map) + page = self._convert_page(test_case.test_config.page, + test_case.test_config.flow, pages_map) + test_result = str(test_case.last_test_result.test_result) + passed_str = "TestResult.PASSED" + passed = str(test_case.last_test_result.test_result) == passed_str + test_time = test_case.last_test_result.test_time + return pd.DataFrame( + { + "display_name": [display_name], + "id": [test_case_id], + "short_id": [short_id], + "tags": [tags], + "creation_time": [creation_time], + "start_flow": [flow], + "start_page": [page], + "test_result": [test_result], + "passed": [passed], + "test_time": [test_time] + } + ) + @scrapi_base.api_call_counter_decorator def list_test_cases( self, agent_id: str = None, include_conversation_turns: bool = False @@ -456,28 +519,6 @@ def calculate_coverage(self, coverage_type: int, agent_id: str = None): response = client.calculate_coverage(request) return response - def _convert_flow(self, flow_id, flows_map): - """Gets a flow display name from a flow ID""" - if flow_id.split("/")[-1] == "-": - return "" - # flow_id_converted = str(agent_id) + '/flows/' + str(flow_id) - if flow_id in flows_map: - return flows_map[flow_id] - # TODO: Should throw error instead of returning default - return "Default Start Flow" - - # Note that flow id includes agent, normally... - def _convert_page(self, page_id, flow_id, pages_map): - """Gets a page display name from a page and flow ID""" - page_id_converted = str(flow_id) + "/pages/" + str(page_id) - if flow_id in pages_map: - # page_id is sometimes left empty for the test case if it starts - # on the start page - return pages_map[flow_id].get(page_id_converted, "START_PAGE") - logging.info(f"Flow not found: {flow_id}") - # TODO: Should throw error, but returning this probably will anyway - return "INVALID" - def get_test_case_results_df(self, agent_id=None, retest_all=False): """Gets the test case results for this agent, and generates a dataframe with their details. @@ -547,44 +588,3 @@ def get_test_case_results_df(self, agent_id=None, retest_all=False): # This column is redundant, since we have "passed" (bool) test_case_df = test_case_df.drop(columns=["test_result"]) return test_case_df - - def process_test_case(self, test_case, flows_map, pages_map): - """Takes a response from list_test_cases and returns a single row - dataframe of the test case result. - - Args: - test_case: The test case response - flows_map: A dictionary mapping flow IDs to flow display names - pages_map: A dictionary with keys as flow IDs and values as - dictionaries mapping page IDs to page display names for that flow - - Returns: A dataframe with columns: - display_name, id, short_id, tags, creation_time, - start_flow, start_page, test_result, passed, test_time - """ - display_name = test_case.display_name - test_case_id = test_case.name - short_id = test_case.name.split("/")[-1] - tags = ",".join(test_case.tags) - creation_time = test_case.creation_time - flow = self._convert_flow(test_case.test_config.flow, flows_map) - page = self._convert_page(test_case.test_config.page, - test_case.test_config.flow, pages_map) - test_result = str(test_case.last_test_result.test_result) - passed_str = "TestResult.PASSED" - passed = str(test_case.last_test_result.test_result) == passed_str - test_time = test_case.last_test_result.test_time - return pd.DataFrame( - { - "display_name": [display_name], - "id": [test_case_id], - "short_id": [short_id], - "tags": [tags], - "creation_time": [creation_time], - "start_flow": [flow], - "start_page": [page], - "test_result": [test_result], - "passed": [passed], - "test_time": [test_time] - } - ) From f98ddd3f53d9a47a7c34516c513d6d1d72c169d3 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Fri, 18 Aug 2023 22:06:37 -0500 Subject: [PATCH 098/151] fix: refactor process_test_case --- src/dfcx_scrapi/core/test_cases.py | 96 ++++++++++++++++-------------- 1 file changed, 52 insertions(+), 44 deletions(-) diff --git a/src/dfcx_scrapi/core/test_cases.py b/src/dfcx_scrapi/core/test_cases.py index 882a5b88..42d2d3af 100644 --- a/src/dfcx_scrapi/core/test_cases.py +++ b/src/dfcx_scrapi/core/test_cases.py @@ -61,27 +61,42 @@ def __init__( self.test_case_id = test_case_id self.client_options = self._set_region(self.test_case_id) - def _convert_flow(self, flow_id, flows_map): - """Gets a flow display name from a flow ID""" - if flow_id.split("/")[-1] == "-": - return "" - # flow_id_converted = str(agent_id) + '/flows/' + str(flow_id) - if flow_id in flows_map: - return flows_map[flow_id] - # TODO: Should throw error instead of returning default - return "Default Start Flow" - - # Note that flow id includes agent, normally... - def _convert_page(self, page_id, flow_id, pages_map): - """Gets a page display name from a page and flow ID""" - page_id_converted = str(flow_id) + "/pages/" + str(page_id) - if flow_id in pages_map: - # page_id is sometimes left empty for the test case if it starts - # on the start page - return pages_map[flow_id].get(page_id_converted, "START_PAGE") - logging.info(f"Flow not found: {flow_id}") - # TODO: Should throw error, but returning this probably will anyway - return "INVALID" + def _convert_test_result(self, test_case: types.TestCase) -> bool: + """Converts the Enum result to a boolean.""" + if test_case.last_test_result.test_result == 1: + return True + else: + return False + + def _get_flow_id_from_test_config( + self, test_case: types.TestCase) -> str: + """Attempt to get the Flow ID from the Test Case Test Config.""" + if "flow" in test_case.test_config: + return test_case.test_config.flow + elif "page" in test_case.test_config: + return '/'.join(test_case.test_config.page.split("/")[:8]) + else: + agent_id = '/'.join(test_case.name.split('/')[:6]) + return f"{agent_id}/flows/00000000-0000-0000-0000-000000000000" + + def _get_page_id_from_test_config( + self, test_case: types.TestCase, flow_id: str) -> str: + """Attempt to get the Page ID from the Test Case Test Config.""" + if "page" in test_case.test_config: + return test_case.test_config.page + else: + return f"{flow_id}/pages/START_PAGE" + + def _get_page_display_name( + self, flow_id: str, page_id: str, + pages_map: Dict[str, Dict[str, str]]) -> str: + """Get the Page Display Name from the Pages Map based on the Page ID.""" + page_map = pages_map.get(flow_id, None) + page = "START_PAGE" + if page_map: + page = page_map.get(page_id, None) + + return page def _process_test_case(self, test_case, flows_map, pages_map): """Takes a response from list_test_cases and returns a single row @@ -94,33 +109,26 @@ def _process_test_case(self, test_case, flows_map, pages_map): dictionaries mapping page IDs to page display names for that flow Returns: A dataframe with columns: - display_name, id, short_id, tags, creation_time, - start_flow, start_page, test_result, passed, test_time + display_name, id, short_id, tags, creation_time, start_flow, + start_page, test_result, passed, test_time """ - display_name = test_case.display_name - test_case_id = test_case.name - short_id = test_case.name.split("/")[-1] - tags = ",".join(test_case.tags) - creation_time = test_case.creation_time - flow = self._convert_flow(test_case.test_config.flow, flows_map) - page = self._convert_page(test_case.test_config.page, - test_case.test_config.flow, pages_map) - test_result = str(test_case.last_test_result.test_result) - passed_str = "TestResult.PASSED" - passed = str(test_case.last_test_result.test_result) == passed_str - test_time = test_case.last_test_result.test_time + flow_id = self._get_flow_id_from_test_config(test_case) + page_id = self._get_page_id_from_test_config(test_case, flow_id) + page = self._get_page_display_name(flow_id, page_id, pages_map) + test_result = self._convert_test_result(test_case) + return pd.DataFrame( { - "display_name": [display_name], - "id": [test_case_id], - "short_id": [short_id], - "tags": [tags], - "creation_time": [creation_time], - "start_flow": [flow], + "display_name": [test_case.display_name], + "id": [test_case.name], + "short_id": [test_case.name.split("/")[-1]], + "tags": [",".join(test_case.tags)], + "creation_time": [test_case.creation_time], + "start_flow": [flows_map.get(flow_id, None)], "start_page": [page], "test_result": [test_result], - "passed": [passed], - "test_time": [test_time] + "passed": [test_result], + "test_time": [test_case.last_test_result.test_time] } ) @@ -550,7 +558,7 @@ def get_test_case_results_df(self, agent_id=None, retest_all=False): test_case_rows = [] for test_case in test_case_results: - row = self.process_test_case(test_case, flows_map, pages_map) + row = self._process_test_case(test_case, flows_map, pages_map) test_case_rows.append(row) test_result = str(test_case.last_test_result.test_result) untested_str = "TestResult.TEST_RESULT_UNSPECIFIED" From 1500f71890c92b2a47654f2273ba93c970daddd9 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Fri, 18 Aug 2023 22:37:57 -0500 Subject: [PATCH 099/151] fix: refactor get_test_case_results_df --- src/dfcx_scrapi/core/test_cases.py | 98 +++++++++++++++++------------- 1 file changed, 57 insertions(+), 41 deletions(-) diff --git a/src/dfcx_scrapi/core/test_cases.py b/src/dfcx_scrapi/core/test_cases.py index 42d2d3af..a59b6986 100644 --- a/src/dfcx_scrapi/core/test_cases.py +++ b/src/dfcx_scrapi/core/test_cases.py @@ -61,11 +61,22 @@ def __init__( self.test_case_id = test_case_id self.client_options = self._set_region(self.test_case_id) - def _convert_test_result(self, test_case: types.TestCase) -> bool: - """Converts the Enum result to a boolean.""" - if test_case.last_test_result.test_result == 1: + def _convert_test_result_to_string(self, test_case: types.TestCase) -> str: + """Converts the Enum result to a string.""" + if test_case.last_test_result.test_result == 0: + return "TEST_RESULT_UNSPECIFIED" + elif test_case.last_test_result.test_result == 1: + return "PASSED" + elif test_case.last_test_result.test_result == 2: + return "FAILED" + + def _convert_test_result_to_bool(self, test_case: types.TestCase) -> bool: + """Converts the String result to a boolean.""" + test_result = self._convert_test_result_to_string(test_case) + + if test_result == "PASSED": return True - else: + elif test_result == "FAILED": return False def _get_flow_id_from_test_config( @@ -115,7 +126,7 @@ def _process_test_case(self, test_case, flows_map, pages_map): flow_id = self._get_flow_id_from_test_config(test_case) page_id = self._get_page_id_from_test_config(test_case, flow_id) page = self._get_page_display_name(flow_id, page_id, pages_map) - test_result = self._convert_test_result(test_case) + test_result = self._convert_test_result_to_bool(test_case) return pd.DataFrame( { @@ -126,12 +137,40 @@ def _process_test_case(self, test_case, flows_map, pages_map): "creation_time": [test_case.creation_time], "start_flow": [flows_map.get(flow_id, None)], "start_page": [page], - "test_result": [test_result], + # "test_result": [test_result], "passed": [test_result], "test_time": [test_case.last_test_result.test_time] } ) + def _retest_cases( + self, test_case_df: pd.DataFrame, retest_ids: List[str] + ) -> pd.DataFrame: + print("To retest:", len(retest_ids)) + response = self.batch_run_test_cases(retest_ids, self.agent_id) + for result in response.results: + # Results may not be in the same order as they went in + # Process the name a bit to remove the /results/id part + tc_id_full = "/".join(result.name.split("/")[:-2]) + tc_id = tc_id_full.rsplit("/", maxsplit=1)[-1] + + # Update dataframe where id = tc_id_full + # row = test_case_df.loc[test_case_df['id']==tc_id_full] + test_case_df.loc[ + test_case_df["id"] == tc_id_full, "short_id" + ] = tc_id + # test_case_df.loc[ + # test_case_df["id"] == tc_id_full, "test_result" + # ] = str(result.test_result) + test_case_df.loc[ + test_case_df["id"] == tc_id_full, "test_time" + ] = result.test_time + test_case_df.loc[test_case_df["id"] == tc_id_full,"passed"] = ( + str(result.test_result) == "TestResult.PASSED" + ) + + return test_case_df + @scrapi_base.api_call_counter_decorator def list_test_cases( self, agent_id: str = None, include_conversation_turns: bool = False @@ -528,20 +567,21 @@ def calculate_coverage(self, coverage_type: int, agent_id: str = None): return response def get_test_case_results_df(self, agent_id=None, retest_all=False): - """Gets the test case results for this agent, - and generates a dataframe with their details. - Any tests without a result will be run in a batch. + """Convert Test Cases to Dataframe. + + Gets the test case results for this agent, and generates a dataframe + with their details. Any tests without a result will be run in a batch. Args: - agent_id: required only if not set when initializing this class + agent_id: The agent to create the test case for. Format: + `projects//locations//agents/` retest_all: if true, all test cases are re-run, regardless of whether or not they had a result Returns: DataFrame of test case results for this agent, with columns: - display_name, id, short_id (excluding agent ID), - tags (comma-separated string), creation_time, - start_flow, start_page, passed, test_time + display_name, id, short_id, tags, creation_time, start_flow, + start_page, passed, test_time """ if agent_id: self.agent_id = agent_id @@ -560,39 +600,15 @@ def get_test_case_results_df(self, agent_id=None, retest_all=False): for test_case in test_case_results: row = self._process_test_case(test_case, flows_map, pages_map) test_case_rows.append(row) - test_result = str(test_case.last_test_result.test_result) - untested_str = "TestResult.TEST_RESULT_UNSPECIFIED" - if retest_all or test_result == untested_str: + test_result = self._convert_test_result_to_string(test_case) + if retest_all or test_result == "TEST_RESULT_UNSPECIFIED": retest_ids.append(test_case.name) # Create dataframe test_case_df = pd.concat(test_case_rows) # Retest any that haven't been run yet - print("To retest:", len(retest_ids)) if len(retest_ids) > 0: - response = self.batch_run_test_cases(retest_ids, self.agent_id) - for result in response.results: - # Results may not be in the same order as they went in - # Process the name a bit to remove the /results/id part - tc_id_full = "/".join(result.name.split("/")[:-2]) - tc_id = tc_id_full.rsplit("/", maxsplit=1)[-1] - - # Update dataframe where id = tc_id_full - # row = test_case_df.loc[test_case_df['id']==tc_id_full] - test_case_df.loc[ - test_case_df["id"] == tc_id_full, "short_id" - ] = tc_id - test_case_df.loc[ - test_case_df["id"] == tc_id_full, "test_result" - ] = str(result.test_result) - test_case_df.loc[ - test_case_df["id"] == tc_id_full, "test_time" - ] = result.test_time - test_case_df.loc[test_case_df["id"] == tc_id_full,"passed"] = ( - str(result.test_result) == "TestResult.PASSED" - ) - - # This column is redundant, since we have "passed" (bool) - test_case_df = test_case_df.drop(columns=["test_result"]) + test_case_df = self._retest_cases(test_case_df,retest_ids) + return test_case_df From 9a36968ac7f230fdf67f00b9b4fe8c5fdb27d090 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Fri, 18 Aug 2023 22:43:56 -0500 Subject: [PATCH 100/151] fix: Lint fixes --- src/dfcx_scrapi/core/test_cases.py | 72 ++++++++++++++++-------------- 1 file changed, 38 insertions(+), 34 deletions(-) diff --git a/src/dfcx_scrapi/core/test_cases.py b/src/dfcx_scrapi/core/test_cases.py index a59b6986..b705a278 100644 --- a/src/dfcx_scrapi/core/test_cases.py +++ b/src/dfcx_scrapi/core/test_cases.py @@ -69,6 +69,8 @@ def _convert_test_result_to_string(self, test_case: types.TestCase) -> str: return "PASSED" elif test_case.last_test_result.test_result == 2: return "FAILED" + else: + return "" def _convert_test_result_to_bool(self, test_case: types.TestCase) -> bool: """Converts the String result to a boolean.""" @@ -78,6 +80,8 @@ def _convert_test_result_to_bool(self, test_case: types.TestCase) -> bool: return True elif test_result == "FAILED": return False + else: + return None def _get_flow_id_from_test_config( self, test_case: types.TestCase) -> str: @@ -85,9 +89,9 @@ def _get_flow_id_from_test_config( if "flow" in test_case.test_config: return test_case.test_config.flow elif "page" in test_case.test_config: - return '/'.join(test_case.test_config.page.split("/")[:8]) + return "/".join(test_case.test_config.page.split("/")[:8]) else: - agent_id = '/'.join(test_case.name.split('/')[:6]) + agent_id = "/".join(test_case.name.split("/")[:6]) return f"{agent_id}/flows/00000000-0000-0000-0000-000000000000" def _get_page_id_from_test_config( @@ -110,38 +114,38 @@ def _get_page_display_name( return page def _process_test_case(self, test_case, flows_map, pages_map): - """Takes a response from list_test_cases and returns a single row - dataframe of the test case result. - - Args: - test_case: The test case response - flows_map: A dictionary mapping flow IDs to flow display names - pages_map: A dictionary with keys as flow IDs and values as - dictionaries mapping page IDs to page display names for that flow - - Returns: A dataframe with columns: - display_name, id, short_id, tags, creation_time, start_flow, - start_page, test_result, passed, test_time - """ - flow_id = self._get_flow_id_from_test_config(test_case) - page_id = self._get_page_id_from_test_config(test_case, flow_id) - page = self._get_page_display_name(flow_id, page_id, pages_map) - test_result = self._convert_test_result_to_bool(test_case) - - return pd.DataFrame( - { - "display_name": [test_case.display_name], - "id": [test_case.name], - "short_id": [test_case.name.split("/")[-1]], - "tags": [",".join(test_case.tags)], - "creation_time": [test_case.creation_time], - "start_flow": [flows_map.get(flow_id, None)], - "start_page": [page], - # "test_result": [test_result], - "passed": [test_result], - "test_time": [test_case.last_test_result.test_time] - } - ) + """Takes a response from list_test_cases and returns a single row + dataframe of the test case result. + + Args: + test_case: The test case response + flows_map: A dictionary mapping flow IDs to flow display names + pages_map: A dictionary with keys as flow IDs and values as + dictionaries mapping page IDs to page display names for that flow + + Returns: A dataframe with columns: + display_name, id, short_id, tags, creation_time, start_flow, + start_page, test_result, passed, test_time + """ + flow_id = self._get_flow_id_from_test_config(test_case) + page_id = self._get_page_id_from_test_config(test_case, flow_id) + page = self._get_page_display_name(flow_id, page_id, pages_map) + test_result = self._convert_test_result_to_bool(test_case) + + return pd.DataFrame( + { + "display_name": [test_case.display_name], + "id": [test_case.name], + "short_id": [test_case.name.split("/")[-1]], + "tags": [",".join(test_case.tags)], + "creation_time": [test_case.creation_time], + "start_flow": [flows_map.get(flow_id, None)], + "start_page": [page], + # "test_result": [test_result], + "passed": [test_result], + "test_time": [test_case.last_test_result.test_time] + } + ) def _retest_cases( self, test_case_df: pd.DataFrame, retest_ids: List[str] From a6a27d2877eb766ea887a16c11cc566642f7239e Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Sat, 19 Aug 2023 19:54:34 -0500 Subject: [PATCH 101/151] fix: make agent_id required init arg --- src/dfcx_scrapi/tools/agent_checker_util.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 64581f1b..841c7e8e 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -1,6 +1,6 @@ """A set of Utility methods to check DFCX Agents.""" -# Copyright 2022 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -75,11 +75,11 @@ class AgentCheckerUtil(ScrapiBase): def __init__( self, + agent_id: str, creds_path: str = None, creds_dict: Dict = None, creds=None, scope=False, - agent_id: str = None, delay: float = 1.0 ): """ @@ -97,8 +97,6 @@ def __init__( ) self.agent_id = agent_id - if not self.agent_id: - raise ValueError("agent_id parameter is required") self._intents = Intents(creds=self.creds, agent_id=self.agent_id) self._entities = EntityTypes(creds=self.creds, agent_id=self.agent_id) From 47e68d5f0c0a3ed1cc4e723fc258e84f1aac752e Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Sun, 20 Aug 2023 15:16:18 -0500 Subject: [PATCH 102/151] feat: Implement additional export_agent options --- src/dfcx_scrapi/core/agents.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/dfcx_scrapi/core/agents.py b/src/dfcx_scrapi/core/agents.py index f4897250..bdfdf2b1 100644 --- a/src/dfcx_scrapi/core/agents.py +++ b/src/dfcx_scrapi/core/agents.py @@ -364,7 +364,11 @@ def export_agent( self, agent_id: str, gcs_bucket_uri: str, - environment_display_name: str = None + environment_display_name: str = None, + data_format: str = "BLOB", + git_branch: str = None, + git_commit_message: str = None, + include_bigquery_export_settings: bool = False ) -> str: """Exports the specified CX agent to Google Cloud Storage bucket. @@ -374,17 +378,39 @@ def export_agent( gcs_bucket_uri: The Google Cloud Storage bucket/filepath to export the agent to in the following format: `gs:///` - environment_display_name: (Optional) CX Agent environment display name + environment_display_name: CX Agent environment display name as string. If not set, DRAFT environment is assumed. + data_format: Optional. The data format of the exported agent. If not + specified, ``BLOB`` is assumed. + git_branch: Optional. The Git branch to commit the exported agent to. + git_commit_message: Optional. The Git Commit message to send. Only + applicable if using `git_branch` arg. + include_bigquery_export_settings: Will exclude or included the BQ + settings on export. Returns: A Long Running Operation (LRO) ID that can be used to check the status of the export using dfcx_scrapi.core.operations->get_lro() """ + blob_format = types.agent.ExportAgentRequest.DataFormat(1) + json_format = types.agent.ExportAgentRequest.DataFormat(4) + request = types.agent.ExportAgentRequest() request.name = agent_id request.agent_uri = gcs_bucket_uri + request.include_bigquery_export_settings = include_bigquery_export_settings + + if data_format == "JSON" or "ZIP" or "JSON_PACKAGE": + request.data_format = json_format + else: + request.data_format = blob_format + + if git_branch: + git_settings = types.agent.ExportAgentRequest.GitDestination() + git_settings.tracking_branch = git_branch + git_settings.commit_message = git_commit_message + request.git_destination = git_settings if environment_display_name: self._environments = environments.Environments(creds=self.creds) From 83fcd48980c08f2964963242a7470e523f14f9b3 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Sun, 20 Aug 2023 20:00:45 -0500 Subject: [PATCH 103/151] feat: add get_flow_page_map method --- src/dfcx_scrapi/core/flows.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/dfcx_scrapi/core/flows.py b/src/dfcx_scrapi/core/flows.py index 8fd7de97..08d7967d 100644 --- a/src/dfcx_scrapi/core/flows.py +++ b/src/dfcx_scrapi/core/flows.py @@ -15,11 +15,13 @@ # limitations under the License. import logging +import time from typing import Dict, List from google.cloud.dialogflowcx_v3beta1 import services from google.cloud.dialogflowcx_v3beta1 import types from google.protobuf import field_mask_pb2 from dfcx_scrapi.core import scrapi_base +from dfcx_scrapi.core import pages # logging config logging.basicConfig( @@ -54,6 +56,7 @@ def __init__( self.flow_id = flow_id self.agent_id = agent_id + self.pages = pages.Pages(creds=self.creds) # TODO: Migrate to Flow Builder class when ready @staticmethod @@ -129,6 +132,38 @@ def get_flows_map(self, agent_id: str, reverse=False): return flows_dict + def get_flow_page_map( + self, agent_id: str, rate_limit: float = 1.0 + ) -> Dict[str, Dict[str, str]]: + """Exports a user friendly dict containing Flows, Pages, and IDs + This method builds on top of `get_flows_map` and builds out a nested + dictionary containing all of the Page Display Names and UUIDs contained + within each Flow. Output Format: + { + : { + 'id': + 'pages': { : } + } + } + + Args: + agent_id: the formatted CX Agent ID to use + + Returns: + Dictionary containing Flow Names/UUIDs and Page Names/UUIDs + """ + flow_page_map = {} + + flows_map = self.get_flows_map(agent_id, reverse=True) + + for flow in flows_map: + pages_map = self.pages.get_pages_map( + flows_map[flow], reverse=True) + flow_page_map[flow] = {'id': flows_map[flow], 'pages': pages_map} + time.sleep(rate_limit) + + return flow_page_map + @scrapi_base.api_call_counter_decorator def train_flow(self, flow_id: str) -> str: """Trains the specified flow. From 1fcf35c961c82051922bee009782f4f6ae041934 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Sun, 20 Aug 2023 20:01:29 -0500 Subject: [PATCH 104/151] chore: update gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 0bdbf412..a60d4cf3 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ .ipynb_checkpoints *scratch.ipynb *scratch.py +tmp/ # IPython profile_default/ From f6bffdd388a736a1a927b1b5a0d831e6934a6616 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Sun, 20 Aug 2023 23:15:12 -0500 Subject: [PATCH 105/151] feat: adding agent_extract feature for offline processing --- src/dfcx_scrapi/agent_extract/__init__.py | 0 src/dfcx_scrapi/agent_extract/agents.py | 80 +++++ src/dfcx_scrapi/agent_extract/common.py | 73 +++++ src/dfcx_scrapi/agent_extract/entity_types.py | 173 +++++++++++ src/dfcx_scrapi/agent_extract/flows.py | 294 ++++++++++++++++++ src/dfcx_scrapi/agent_extract/gcs_utils.py | 68 ++++ src/dfcx_scrapi/agent_extract/graph.py | 45 +++ src/dfcx_scrapi/agent_extract/intents.py | 170 ++++++++++ src/dfcx_scrapi/agent_extract/pages.py | 135 ++++++++ src/dfcx_scrapi/agent_extract/route_groups.py | 100 ++++++ src/dfcx_scrapi/agent_extract/routes.py | 285 +++++++++++++++++ src/dfcx_scrapi/agent_extract/types.py | 207 ++++++++++++ src/dfcx_scrapi/agent_extract/webhooks.py | 95 ++++++ 13 files changed, 1725 insertions(+) create mode 100644 src/dfcx_scrapi/agent_extract/__init__.py create mode 100644 src/dfcx_scrapi/agent_extract/agents.py create mode 100644 src/dfcx_scrapi/agent_extract/common.py create mode 100644 src/dfcx_scrapi/agent_extract/entity_types.py create mode 100644 src/dfcx_scrapi/agent_extract/flows.py create mode 100644 src/dfcx_scrapi/agent_extract/gcs_utils.py create mode 100644 src/dfcx_scrapi/agent_extract/graph.py create mode 100644 src/dfcx_scrapi/agent_extract/intents.py create mode 100644 src/dfcx_scrapi/agent_extract/pages.py create mode 100644 src/dfcx_scrapi/agent_extract/route_groups.py create mode 100644 src/dfcx_scrapi/agent_extract/routes.py create mode 100644 src/dfcx_scrapi/agent_extract/types.py create mode 100644 src/dfcx_scrapi/agent_extract/webhooks.py diff --git a/src/dfcx_scrapi/agent_extract/__init__.py b/src/dfcx_scrapi/agent_extract/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/dfcx_scrapi/agent_extract/agents.py b/src/dfcx_scrapi/agent_extract/agents.py new file mode 100644 index 00000000..0188276d --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/agents.py @@ -0,0 +1,80 @@ +"""Agent processing methods and functions.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +import os +from typing import Dict +import tempfile + +from dfcx_scrapi.core import agents +from dfcx_scrapi.core import scrapi_base +from dfcx_scrapi.agent_extract import flows +from dfcx_scrapi.agent_extract import intents +from dfcx_scrapi.agent_extract import entity_types +from dfcx_scrapi.agent_extract import webhooks +from dfcx_scrapi.agent_extract import gcs_utils +from dfcx_scrapi.agent_extract import types + +class Agents(scrapi_base.ScrapiBase): + """Agent Metadata methods and functions.""" + def __init__( + self, + agent_id: str, + creds_path: str = None, + creds_dict: Dict = None, + creds=None, + scope=False + ): + super().__init__( + creds_path=creds_path, + creds_dict=creds_dict, + creds=creds, + scope=scope, + ) + self.agent_id = agent_id + self._core_agents = agents.Agents(creds=creds) + self.gcs = gcs_utils.GcsUtils() + self.flows = flows.Flows() + self.intents = intents.Intents() + self.etypes = entity_types.EntityTypes() + self.webhooks = webhooks.Webhooks() + + def process_agent(self, agent_id: str, gcs_bucket_uri: str, + environment_display_name: str = None): + """Process the specified Agent for offline data gathering.""" + agent_local_path = 'tmp/agent' + lro = self._core_agents.export_agent( + agent_id=agent_id,gcs_bucket_uri=gcs_bucket_uri, data_format="JSON", + environment_display_name=environment_display_name) + + if not os.path.exists(agent_local_path): + os.makedirs(agent_local_path) + + time.sleep(2) + agent_file = self.gcs.download_gcs( + gcs_path=gcs_bucket_uri, local_path=agent_local_path) + + self.gcs.unzip(agent_file, agent_local_path) + + data = types.AgentData() + data.agent_id = agent_id + data = self.flows.process_flows_directory(agent_local_path, data) + data = self.intents.process_intents_directory(agent_local_path, data) + data = self.etypes.process_entity_types_directory( + agent_local_path, data) + data = self.webhooks.process_webhooks_directory(agent_local_path, data) + + return data diff --git a/src/dfcx_scrapi/agent_extract/common.py b/src/dfcx_scrapi/agent_extract/common.py new file mode 100644 index 00000000..4b1a6605 --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/common.py @@ -0,0 +1,73 @@ +"""Common methods and helper functions used throughout library.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import re + +# logging config +logging.basicConfig( + level=logging.INFO, + format="%(message)s", +) + +class Common: + """Common methods and helper functions used throughout library.""" + + @staticmethod + def parse_filepath(in_path: str, resource_type: str) -> str: + """Parse file path to provide quick reference for resource.""" + + regex_map = { + "flow": r".*\/flows\/([^\/]*)", + "page": r".*\/pages\/([^\/]*)\.", + "entity_type": r".*\/entityTypes\/([^\/]*)", + "intent": r".*\/intents\/([^\/]*)", + "route_group": r".*\/transitionRouteGroups\/([^\/]*)", + "webhook": r".*\/webhooks\/([^\/]*)\." + } + resource_name = re.match(regex_map[resource_type], in_path).groups()[0] + + return resource_name + + @staticmethod + def clean_display_name(display_name: str): + """Replace cspecial haracters from map for the given display name.""" + patterns = { + "%22": '"', + "%23": "#", + "%24": "$", + "%26": "&", + "%27": "'", + "%28": "(", + "%29": ")", + "%2c": ",", + "%2f": "/", + "%3a": ":", + "%3c": "<", + "%3d": "=", + "%3e": ">", + "%3f": "?", + "%5b": "[", + "%5d": "]", + "%e2%80%9c": "“", + "%e2%80%9d": "”", + } + + for key, value in patterns.items(): + if key in display_name: + display_name = display_name.replace(key, value) + + return display_name diff --git a/src/dfcx_scrapi/agent_extract/entity_types.py b/src/dfcx_scrapi/agent_extract/entity_types.py new file mode 100644 index 00000000..084184ab --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/entity_types.py @@ -0,0 +1,173 @@ +"""Entity Type processing methods and functions.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os + +from typing import Dict + +from dfcx_scrapi.agent_extract import common +from dfcx_scrapi.agent_extract import types + +class EntityTypes: + """Entity Type processing methods and functions.""" + + def __init__(self): + self.common = common.Common() + + @staticmethod + def build_entity_type_path_list(agent_local_path: str): + """Builds a list of dirs, each representing an Entity Type directory. + + Ex: /path/to/agent/entityTypes/ + + This dir path can then be used to find the next level of information + in the directory by appending the appropriate next dir structures like: + - .json, for the Entity Type object + - /entities, for the Entities dir + """ + root_dir = agent_local_path + "/entityTypes" + + entity_type_paths = [] + + for entity_type_dir in os.listdir(root_dir): + entity_type_dir_path = f"{root_dir}/{entity_type_dir}" + entity_type_paths.append(entity_type_dir_path) + + return entity_type_paths + + @staticmethod + def build_lang_code_paths(etype: types.EntityType): + """Builds dict of lang codes and file locations. + + The language_codes and paths for each file are stored in a dictionary + inside of the Entity Type dataclass. This dict is accessed later to + lint each file and provide reporting based on each language code. + """ + root_dir = etype.dir_path + "/entities" + + for lang_file in os.listdir(root_dir): + lang_code = lang_file.split(".")[0] + lang_code_path = f"{root_dir}/{lang_file}" + etype.entities[lang_code] = {"file_path": lang_code_path} + + @staticmethod + def build_excluded_phrases_path(etype: types.EntityType, lang_code: str): + """Builds a dict of excluded phrases and file locations.""" + root_dir = etype.dir_path + "/excludedPhrases" + lang_code_path = f"{root_dir}/{lang_code}.json" + + return lang_code_path + + @staticmethod + def process_entity_type_metadata(etype: types.EntityType): + """Extract metadata for Entity Type for later processing.""" + metadata_file = etype.dir_path + f"/{etype.display_name}.json" + + with open(metadata_file, "r", encoding="UTF-8") as etype_file: + etype.data = json.load(etype_file) + etype.resource_id = etype.data.get("name", None) + etype.kind = etype.data.get("kind", None) + etype.auto_expansion = etype.data.get("autoExpansionMode", None) + etype.fuzzy_extraction = etype.data.get( + "enableFuzzyExtraction", False) + + etype_file.close() + + @staticmethod + def check_lang_code(lang_code: str, stats: types.AgentData): + """Check to see if lang_code already exists in dict, or create it.""" + res = stats.entity_types.get(lang_code, None) + if not res: + stats.entity_types[lang_code] = [] + + return stats + + def process_excluded_phrases_language_codes( + self, data: Dict[str, str], lang_code_path: str): + """Process all ecluded phrases lang_code files.""" + with open(lang_code_path, "r", encoding="UTF-8") as ent_file: + new_data = json.load(ent_file) + data["excluded_phrases"] = new_data.get("excludedPhrases", None) + + return data + + def process_excluded_phrases(self, etype: types.EntityType, lang_code: str, + data: Dict[str, str]): + """Process the excluded phrases if they exist.""" + if "excludedPhrases" in os.listdir(etype.dir_path): + lang_code_path = self.build_excluded_phrases_path(etype, lang_code) + data = self.process_excluded_phrases_language_codes( + data, lang_code_path) + + return data + + def process_language_codes( + self, etype: types.EntityType, stats: types.AgentData): + """Process all Entity Type lang_code files.""" + for lang_code in etype.entities: + ent_file_path = etype.entities[lang_code]["file_path"] + stats = self.check_lang_code(lang_code, stats) + + with open(ent_file_path, "r", encoding="UTF-8") as ent_file: + data = json.load(ent_file) + data['name'] = f"{stats.agent_id}/entityTypes/{etype.resource_id}" + data['display_name'] = etype.display_name + data['kind'] = etype.kind + data["entities"] = data.get("entities", None) + data = self.process_excluded_phrases(etype, lang_code, data) + stats.entity_types[lang_code].append(data) + + ent_file.close() + + return stats + + def process_entities(self, etype: types.EntityType, stats: types.AgentData): + """Process the Entity files inside of an Entity Type.""" + if "entities" in os.listdir(etype.dir_path): + self.build_lang_code_paths(etype) + stats = self.process_language_codes(etype, stats) + + return stats + + def process_entity_type( + self, etype: types.EntityType, stats: types.AgentData): + """Process a Single Entity Type dir and all subdirectories.""" + + etype.display_name = self.common.parse_filepath( + etype.dir_path, "entity_type") + + self.process_entity_type_metadata(etype) + stats = self.process_entities(etype, stats) + stats.total_entity_types += 1 + + return stats + + def process_entity_types_directory( + self, agent_local_path: str, stats: types.AgentData): + """Processing the Entity Types dir in the JSON Package structure.""" + # Create a list of all Entity Type paths to iter through + entity_type_paths = self.build_entity_type_path_list(agent_local_path) + + for entity_type_path in entity_type_paths: + etype = types.EntityType() + etype.dir_path = entity_type_path + + stats = self.process_entity_type(etype, stats) + full_etype_id = f"{stats.agent_id}/entityTypes/{etype.resource_id}" + stats.entity_types_map[etype.display_name] = full_etype_id + + return stats diff --git a/src/dfcx_scrapi/agent_extract/flows.py b/src/dfcx_scrapi/agent_extract/flows.py new file mode 100644 index 00000000..caf28f4e --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/flows.py @@ -0,0 +1,294 @@ +"""Flow extract methods and functions.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os + +from typing import List + +from dfcx_scrapi.agent_extract import common +from dfcx_scrapi.agent_extract import graph +from dfcx_scrapi.agent_extract import types +from dfcx_scrapi.agent_extract import pages +from dfcx_scrapi.agent_extract import routes +from dfcx_scrapi.agent_extract import route_groups + + +class Flows: + """Flow processing methods and functions.""" + + def __init__(self): + self.common = common.Common() + self.pages = pages.Pages() + self.rgs = route_groups.RouteGroups() + self.routes = routes.Fulfillments() + self.special_pages = [ + "End Session", + "End Flow", + "Start Page", + "Current Page", + "Previous Page", + ] + + @staticmethod + def build_flow_path_list(agent_local_path: str): + """Builds a list of dirs, each representing a Flow directory. + + Ex: /path/to/agent/flows/ + + This dir path can then be used to find the next level of information + in the directory by appending the appropriate next dir structures like: + - .json, for the Flow object + - /transitionRouteGroups, for the Route Groups dir + - /pages, for the Pages dir + """ + root_dir = agent_local_path + "/flows" + + flow_paths = [] + + for flow_dir in os.listdir(root_dir): + flow_dir_path = f"{root_dir}/{flow_dir}" + flow_paths.append(flow_dir_path) + + return flow_paths + + @staticmethod + def remove_flow_pages_from_set(input_set: set) -> set: + """Remove any transitions tagged with FLOW. + + Some route transitions go to Flow instead of Page. For these + transitions, we tag them with `FLOW` for easier identification later. + However, when reporting on Graph inconsistencies like Dangling or + Unreachable pages, we want to remove these from any result sets as they + are not relevant. + """ + filtered_set = set() + + for page in input_set: + if "FLOW" not in page: + filtered_set.add(page) + + return filtered_set + + def find_unreachable_pages(self, flow: types.Flow): + """Find Unreachable Pages in the graph. + + An Unreachable Page is defined as: + - A Page which has no incoming edge when traversed from Start Page. + That is, it is unreachable in the graph by any practical means. + - A Page which is connected to a root unreachable page. That is, a + page that could have both incoming or outgoing routes, but due to + its connectedness to the root orphan page, is unreachable in the + graph. + + Here we will compute the symmetric difference of 2 sets: + - Active Pages (i.e. Pages that were reachable in the graph) + - Used Pages (i.e. Pages that were used by some Route) + + If an Unreachable Page has children that it routes to, those children + will appear in Used Pages, although they will ultimately be + unreachable. It's possible for an Unreachable Page to route back to an + Active Page in the graph. For these instances, we don't want to count + those pages as unreachable, because they are reachable via other + sections of the graph. + """ + filtered_set = flow.active_pages.symmetric_difference( + flow.graph.used_nodes + ) + filtered_set = self.remove_flow_pages_from_set(filtered_set) + flow.unreachable_pages.update(filtered_set) + + return flow + + def find_unused_pages(self, flow: types.Flow): + """Find Unused Pages in the graph. + + An Unused Page is defined as: + - A Page which has no incoming or outgoing edge AND + - A Page which exists in the Agent design time, but which is not + present anywhere in the graph, either visible or non-visible. + + Here we will compute the difference of 2 sets: + - All Pages (i.e. Pages that exist in the Agent Design Time) + - Used Pages (i.e. Pages that were used by some Route) + + The resulting set will consist of 2 types of Pages: + - Truly Unused Pages + - Unreachable Root Pages + + Unreachable Root Pages end up in the results due to the fact that no + other Active Page is pointing to them. We remove these from the + resulting set before presenting the Truly Unused Pages. + """ + + # Discard special pages as they are non-relevant for final outcome + for page in self.special_pages: + flow.all_pages.discard(page) + + prelim_unused = flow.all_pages.difference(flow.graph.used_nodes) + + # Filter out Unreachable Root Pages + filtered_set = set() + + for page in prelim_unused: + if page not in flow.graph.edges: + filtered_set.add(page) + else: + flow.unreachable_pages.add(page) + + flow.unused_pages = filtered_set + + return flow + + def recurse_edges( + self, edges: List, page: types.Page, dangling: set, visited: set + ): + """Recursive method searching graph edges for Active / Dangling Pages. + + A byproduct of searching for Dangling Pages in the graph is that we can + produce a set of Active Pages in the graph. These are pages that are + reachable when traversing from the Start Page. These can then be used + to determine Unreachable Pages in another method. + """ + if page in edges: + for inner_page in edges[page]: + if inner_page not in visited: + visited.add(inner_page) + dangling, visited = self.recurse_edges( + edges, inner_page, dangling, visited + ) + + else: + dangling.add(page) + + return dangling, visited + + def find_dangling_pages(self, flow: types.Flow): + """Find Dangling Pages in the graph. + + Dangling Page is defined as: + - Any page that exists in the graph that has no outgoing edge + Active Page is defined as: + - Any page that is reachable via an active route in the graph and can + be traced back to the Start Page. + + These pages can result in a conversational "dead end" which is + potentially unrecoverable. + A byproduct of searching for the dangling pages is locating all of the + "active" pages. These are the pages that are "visited" as we traverse + the graph. We'll also return Active Pages in this method since they + will be used for downstream tasks. + """ + + flow.dangling_pages, flow.active_pages = self.recurse_edges( + flow.graph.edges, + "Start Page", + flow.dangling_pages, + flow.active_pages, + ) + + # Clean up Special Pages + for page in self.special_pages: + flow.dangling_pages.discard(page) + + flow.dangling_pages = self.remove_flow_pages_from_set( + flow.dangling_pages + ) + + return flow + + def process_start_page(self, flow: types.Flow, stats: types.AgentData): + """Process a single Flow Path file.""" + with open(flow.start_page_file, "r", encoding="UTF-8") as flow_file: + page = types.Page(flow=flow) + page.display_name = "Start Page" + + flow.graph.add_node(page.display_name) + + page.data = json.load(flow_file) + stats.flows.append(page.data) + + # page.events = page.data.get("eventHandlers", None) + # page.routes = page.data.get("transitionRoutes", None) + # page.route_groups = page.data.get("transitionRouteGroups", None) + + flow.resource_id = page.data.get("name", None) + # page.agent_id = flow.agent_id + # page.resource_id = "START_PAGE" + # flow.data[page.display_name] = page.resource_id + + # # Order of processing is important + # stats = self.routes.process_routes(page, stats) + # stats = self.routes.process_events(page, stats) + + if page.route_groups: + page = self.routes.set_route_group_targets(page) + + flow_file.close() + + full_flow_id = f"{stats.agent_id}/flows/{flow.resource_id}" + stats.flows_map[flow.display_name] = full_flow_id + stats.flow_page_map[flow.display_name] = { + "id": full_flow_id, + "pages": {} + } + + return stats + + def process_flow(self, flow: types.Flow, stats: types.AgentData): + """Process a Single Flow dir and all subdirectories.""" + flow.file_name = self.common.parse_filepath(flow.dir_path, "flow") + flow.display_name = self.common.clean_display_name(flow.file_name) + + flow.start_page_file = f"{flow.dir_path}/{flow.file_name}.json" + + stats.pages[flow.display_name] = [] + stats = self.process_start_page(flow, stats) + stats = self.pages.process_pages_directory(flow, stats) + stats = self.rgs.process_route_groups_directory(flow, stats) + + # Order of Find Operations is important here! + # flow = self.find_unused_pages(flow) + # flow = self.find_dangling_pages(flow) + # flow = self.find_unreachable_pages(flow) + + return stats + + def process_flows_directory( + self, agent_local_path: str, stats: types.AgentData): + """Process the top level Flows dir in the JSON Package structure. + + The following files/dirs exist under the `flows` dir: + - Flow object (i.e. Flow START_PAGE) + - transitionRouteGroups + - pages + + In Dialogflow CX, the START_PAGE of each Flow is a special kind of Page + that exists within the Flow object itself. In this method, we will lint + the Flow object, all files in the transitionRouteGroups dir and all + files in the pages dir. + """ + # Create a list of all Flow paths to iter through + flow_paths = self.build_flow_path_list(agent_local_path) + stats.total_flows = len(flow_paths) + + for flow_path in flow_paths: + flow = types.Flow() + flow.graph = graph.Graph() + flow.dir_path = flow_path + stats = self.process_flow(flow, stats) + + return stats diff --git a/src/dfcx_scrapi/agent_extract/gcs_utils.py b/src/dfcx_scrapi/agent_extract/gcs_utils.py new file mode 100644 index 00000000..c3daff89 --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/gcs_utils.py @@ -0,0 +1,68 @@ +"""Utils for Cloud Storage and local file manipulation.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import zipfile +from google.cloud import storage +from google.oauth2 import service_account + + +class GcsUtils: + """Utils for Cloud Storage and local file manipulation.""" + + def __init__(self, creds_path: str = None, project_id: str = None): + if creds_path and project_id: + self.creds = service_account.Credentials.from_service_account_file( + creds_path + ) + self.gcs_client = storage.Client( + credentials=self.creds, project=project_id + ) + + else: + self.gcs_client = storage.Client() + + @staticmethod + def unzip(agent_zip_file_path: str, extract_path: str): + """Unzip file locally.""" + with zipfile.ZipFile(agent_zip_file_path, "r") as zip_ref: + zip_ref.extractall(extract_path) + + @staticmethod + def check_for_gcs_file(file_path: str) -> bool: + """Validates GCS path vs. local path.""" + is_gcs_file = False + + file_prefix = file_path.split("/")[0] + if file_prefix == "gs:": + is_gcs_file = True + + return is_gcs_file + + def download_gcs(self, gcs_path: str, local_path: str = None): + """Downloads the specified GCS file to local machine.""" + path = gcs_path.split("//")[1] + bucket = path.split("/", 1)[0] + gcs_object = path.split("/", 1)[1] + file_name = gcs_object.split("/")[-1] + bucket = self.gcs_client.bucket(bucket) + blob = storage.Blob(gcs_object, bucket) + + if local_path: + file_name = local_path + "/" + file_name + + blob.download_to_filename(file_name) + + return file_name diff --git a/src/dfcx_scrapi/agent_extract/graph.py b/src/dfcx_scrapi/agent_extract/graph.py new file mode 100644 index 00000000..b94217ce --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/graph.py @@ -0,0 +1,45 @@ +"""Utility class for managing graph structure.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections + +class Graph: + """Utility class for manaing graph structure.""" + + def __init__(self): + self.nodes = set() + self.edges = collections.defaultdict(list) + self.used_nodes = set() + + def add_node(self, node): + """Add node to set of all nodes, regardless of use in graph.""" + self.nodes.add(node) + + def add_edge(self, node1, node2): + self.edges[node1].append(node2) + + def add_used_node(self, node): + """Add node to set of active in use nodes for the graph.""" + self.used_nodes.add(node) + + def remove_node(self, node): + self.nodes.remove(node) + + def remove_edge(self, node1, node2): + self.edges[node1].remove(node2) + + def __str__(self): + return f"Graph({self.nodes}, {self.edges})" diff --git a/src/dfcx_scrapi/agent_extract/intents.py b/src/dfcx_scrapi/agent_extract/intents.py new file mode 100644 index 00000000..5398fec3 --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/intents.py @@ -0,0 +1,170 @@ +"""Intent processing methods and functions.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os + +from dfcx_scrapi.agent_extract import common +from dfcx_scrapi.agent_extract import types + + +class Intents: + """Intent processing methods and functions.""" + + def __init__(self): + self.common = common.Common() + + @staticmethod + def parse_lang_code(lang_code_path: str) -> str: + """Extract the language_code from the given file path.""" + + first_parse = lang_code_path.split("/")[-1] + lang_code = first_parse.split(".")[0] + + return lang_code + + @staticmethod + def build_lang_code_paths(intent: types.Intent): + """Builds dict of lang codes and file locations. + + The language_codes and paths for each file are stored in a dictionary + inside of the Intent dataclass. This dict is access later to process + each file and provide reporting based on each language code. + """ + root_dir = intent.dir_path + "/trainingPhrases" + + for lang_file in os.listdir(root_dir): + lang_code = lang_file.split(".")[0] + lang_code_path = f"{root_dir}/{lang_file}" + intent.training_phrases[lang_code] = {"file_path": lang_code_path} + + @staticmethod + def build_intent_path_list(agent_local_path: str): + """Builds a list of dirs, each representing an Intent directory. + + Ex: /path/to/agent/intents/ + + This dir path can be used to find the next level of information + in the directory by appending the appropriate next dir structures like: + - .json, for the Intent object metadata + - /trainingPhrases, for the Training Phrases dir + """ + root_dir = agent_local_path + "/intents" + + intent_paths = [] + + for intent_dir in os.listdir(root_dir): + intent_dir_path = f"{root_dir}/{intent_dir}" + intent_paths.append(intent_dir_path) + + return intent_paths + + @staticmethod + def check_lang_code(lang_code: str, stats: types.AgentData): + """Check to see if lang_code already exists in dict, or create it.""" + res = stats.intents.get(lang_code, None) + if not res: + stats.intents[lang_code] = [] + + return stats + + def process_intent_metadata( + self, intent: types.Intent): + """Process the metadata file for a single Intent.""" + intent.metadata_file = f"{intent.dir_path}/{intent.display_name}.json" + + try: + with open(intent.metadata_file, "r", encoding="UTF-8") as meta_file: + intent.data = json.load(meta_file) + intent.resource_id = intent.data.get("name", None) + intent.labels = intent.data.get("labels", None) + intent.description = intent.data.get("description", None) + intent.parameters = intent.data.get("parameters", None) + + meta_file.close() + + except FileNotFoundError: + pass + + def process_language_codes( + self, intent: types.Intent, stats: types.AgentData): + """Process all training phrase lang_code files.""" + + for lang_code in intent.training_phrases: + tp_file = intent.training_phrases[lang_code]["file_path"] + + stats = self.check_lang_code(lang_code, stats) + + with open(tp_file, "r", encoding="UTF-8") as tps: + data = json.load(tps) + data['name'] = f"{stats.agent_id}/intents/{intent.resource_id}" + data['display_name'] = intent.display_name + data['labels'] = intent.labels + data['description'] = intent.description + data['parameters'] = intent.parameters + stats.intents[lang_code].append(data) + stats.total_training_phrases += len(data["trainingPhrases"]) + + tps.close() + + return stats + + def process_training_phrases( + self, intent: types.Intent, stats: types.AgentData): + """Process the Training Phrase dir for a single Intent.""" + if "trainingPhrases" in os.listdir(intent.dir_path): + self.build_lang_code_paths(intent) + stats = self.process_language_codes(intent, stats) + + return stats + + def process_intent(self, intent: types.Intent, stats: types.AgentData): + """Process a single Intent directory and associated files.""" + intent.display_name = self.common.parse_filepath(intent.dir_path, "intent") + + self.process_intent_metadata(intent) + stats = self.process_training_phrases(intent, stats) + stats.total_intents += 1 + + return stats + + def process_intents_directory( + self, agent_local_path: str, stats: types.AgentData): + """Processing the top level Intents Dir in the JSON Package structure. + + The following files/dirs exist under the `intents` dir: + - Directory + - trainingPhrases + - .json + - Object + + In Dialogflow CX, the Training Phrases of each Intent are stored in + individual .json files by language code under each Intent Display + Name. In this method, we will process all Intent dirs, including the + training phrase files and metadata objects for each Intent. + """ + # Create a list of all Intent paths to iter through + intent_paths = self.build_intent_path_list(agent_local_path) + + for intent_path in intent_paths: + intent = types.Intent() + intent.dir_path = intent_path + + stats = self.process_intent(intent, stats) + full_intent_id = f"{stats.agent_id}/intents/{intent.resource_id}" + stats.intents_map[intent.display_name] = full_intent_id + + return stats diff --git a/src/dfcx_scrapi/agent_extract/pages.py b/src/dfcx_scrapi/agent_extract/pages.py new file mode 100644 index 00000000..45b1f540 --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/pages.py @@ -0,0 +1,135 @@ +"""Pages processing methods and functions.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os + +from typing import Dict, Any + +from dfcx_scrapi.agent_extract import common +from dfcx_scrapi.agent_extract import types #import Flow, Page, AgentData, FormParameter +from dfcx_scrapi.agent_extract import routes #import Fulfillments + + +class Pages: + """Pages processing methods and functions.""" + + def __init__(self): + self.common = common.Common() + self.routes = routes.Fulfillments() + + @staticmethod + def build_page_path_list(flow_path: str): + """Builds a list of files, each representing a Page. + + Ex: /path/to/agent/flows//pages/.json + """ + pages_path = f"{flow_path}/pages" + + page_paths = [] + + for page in os.listdir(pages_path): + page_file_path = f"{pages_path}/{page}" + page_paths.append(page_file_path) + + return page_paths + + @staticmethod + def get_form_parameter_data(param: Dict[str, Any], page: types.Page): + fp = types.FormParameter(page=page) + fp.display_name = param.get("displayName", None) + fp.entity_type = param.get("entityType", None) + fp.required = param.get("required", None) + + fp.fill_behavior = param.get("fillBehavior", None) + + if fp.fill_behavior: + fp.init_fulfillment = fp.fill_behavior.get( + "initialPromptFulfillment", None) + fp.reprompt_handlers = fp.fill_behavior.get( + "repromptEventHandlers", None) + + fp.advanced_settings = page.form.get("advancedSettings", None) + + if fp.advanced_settings: + fp.dtmf_settings = fp.advanced_settings.get("dtmfSettings", None) + + return fp + + def process_form(self, page: types.Page, stats: types.AgentData): + """Process the Form and sub-resources within it for the Page.""" + parameters = page.form.get("parameters", None) + + if parameters: + for param in parameters: + fp = self.get_form_parameter_data(param, page) + stats = self.routes.process_reprompt_handlers(fp, stats) + + return stats + + + def process_page(self, page: types.Page, stats: types.AgentData): + """Process a Single Page file.""" + page.display_name = self.common.parse_filepath(page.page_file, "page") + page.display_name = self.common.clean_display_name(page.display_name) + + page.flow.graph.add_node(page.display_name) + + page.flow.all_pages.add(page.display_name) + + with open(page.page_file, "r", encoding="UTF-8") as page_file: + page.data = json.load(page_file) + # page.entry = page.data.get("entryFulfillment", None) + # page.events = page.data.get("eventHandlers", None) + page.form = page.data.get("form", None) + # page.routes = page.data.get("transitionRoutes", None) + # page.route_groups = page.data.get("transitionRouteGroups", None) + + page.resource_id = page.data.get("name", None) + # page.flow.data[page.display_name] = page.resource_id + + stats = self.process_form(page, stats) + + if page.route_groups: + page = self.routes.set_route_group_targets(page) + + page_file.close() + + full_flow_id = f"{stats.agent_id}/flows/{page.flow.resource_id}" + full_page_id = f"{full_flow_id}/pages/{page.resource_id}" + stats.pages[page.flow.display_name].append(page.data) + stats.flow_page_map[ + page.flow.display_name]['pages'][page.display_name] = full_page_id + + return stats + + def process_pages_directory(self, flow: types.Flow, stats: types.AgentData): + """Process the Pages dir inside a specific Flow dir. + + Some Flows may not contain Pages, so we check for the existence + of the directory before traversing + """ + if "pages" in os.listdir(flow.dir_path): + page_paths = self.build_page_path_list(flow.dir_path) + + for page_path in page_paths: + page = types.Page(flow=flow) + page.agent_id = flow.agent_id + page.page_file = page_path + stats.total_pages += 1 + stats = self.process_page(page, stats) + + return stats diff --git a/src/dfcx_scrapi/agent_extract/route_groups.py b/src/dfcx_scrapi/agent_extract/route_groups.py new file mode 100644 index 00000000..f69f4a9b --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/route_groups.py @@ -0,0 +1,100 @@ +"""Route Groups processing methods and functions.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import json + +from dfcx_scrapi.agent_extract import common +from dfcx_scrapi.agent_extract import types +from dfcx_scrapi.agent_extract import routes + + +class RouteGroups: + """Route Groups processing methods and functions.""" + + def __init__(self): + self.special_pages = [ + "End Session", + "End Flow", + "Start Page", + "Current Page", + "Previous Page", + ] + + self.common = common.Common() + self.routes = routes.Fulfillments() + + @staticmethod + def build_route_group_path_list(flow_local_path: str): + """Builds a list of files, each representing a Route Group. + + Ex: /path/to/agent/flows//transitionRouteGroups/ + """ + root_dir = flow_local_path + "/transitionRouteGroups" + + if "transitionRouteGroups" in os.listdir(flow_local_path): + rg_paths = [] + + for rg_file in os.listdir(root_dir): + rg_file_path = f"{root_dir}/{rg_file}" + rg_paths.append(rg_file_path) + + return rg_paths + + def process_route_group(self, rg: types.RouteGroup, stats: types.AgentData): + """Process a single Route Group.""" + rg.display_name = self.common.parse_filepath(rg.rg_file, "route_group") + rg.display_name = self.common.clean_display_name(rg.display_name) + + with open(rg.rg_file, "r", encoding="UTF-8") as route_group_file: + rg.data = json.load(route_group_file) + rg.resource_id = rg.data.get("name", None) + rg.display_name = rg.data.get("displayName", None) + rg.routes = rg.data.get("transitionRoutes", None) + + # stats = self.routes.process_routes(rg, stats) + + route_group_file.close() + + full_flow_id = f"{stats.agent_id}/flows/{rg.flow.resource_id}" + full_rg_id = f"{full_flow_id}/transitionRouteGroups/{rg.resource_id}" + stats.route_groups_map[ + rg.flow.display_name]["route_groups"][rg.display_name] = full_rg_id + stats.route_groups[rg.flow.display_name].append(rg.data) + + return stats + + def process_route_groups_directory( + self, flow: types.Flow, stats: types.AgentData): + """Process Route Groups dir in the JSON Package structure.""" + if "transitionRouteGroups" in os.listdir(flow.dir_path): + # Create a list of all Route Group paths to iter through + rg_paths = self.build_route_group_path_list(flow.dir_path) + stats.total_route_groups += len(rg_paths) + + full_flow_id = f"{stats.agent_id}/flows/{flow.resource_id}" + stats.route_groups_map[flow.display_name] = { + "id": full_flow_id, + "route_groups": {} + } + stats.route_groups[flow.display_name] = [] + + for rg_path in rg_paths: + rg = types.RouteGroup(flow=flow) + rg.rg_file = rg_path + stats = self.process_route_group(rg, stats) + + return stats diff --git a/src/dfcx_scrapi/agent_extract/routes.py b/src/dfcx_scrapi/agent_extract/routes.py new file mode 100644 index 00000000..6ff69774 --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/routes.py @@ -0,0 +1,285 @@ +"""Fulfillment routes processing methods and functions.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, Any + +from dfcx_scrapi.agent_extract import common +from dfcx_scrapi.agent_extract import types + + +class Fulfillments: + """Fulfillment routes processing methods and functions.""" + + def __init__(self): + self.common = common.Common() + self.route_parameters = {} + + @staticmethod + def check_for_webhook(page: types.Page, path: Dict[str, Any]): + """Check the current route for existence of webhook.""" + if "webhook" in path: + page.has_webhook = True + + @staticmethod + def check_for_webhook_event_handlers(route: types.Fulfillment): + """Check for Webhook Error Event Handler on Page. + + In this method, we're interested in the following conditions: + - Page is currently flagged w/webhook = True + - Page HAS NOT been flagged w/having a webhook error handler + - The trigger MATCHES the pattern 'webhook.error' + + If a Page and its Route meet all the criteria, we'll flip the bit. + Otherwise, the webhook handler bit will remain False, causing a rule + flag.""" + + if all( + [ + route.page.has_webhook, + not route.page.has_webhook_event_handler, + "webhook.error" in route.trigger, + ] + ): + route.page.has_webhook_event_handler = True + + def collect_transition_route_trigger(self, route): + """Inspect route and return all Intent/Condition info.""" + + trigger = [] + intent_name = None + + if "intent" in route.data: + trigger.append("intent") + intent_name = route.data.get("intent", None) + + if "condition" in route.data: + trigger.append("condition") + + if len(trigger) > 0: + trigger = "+".join(trigger) + + else: + return trigger + + def get_trigger_info(self, route): + """Extract trigger info from route based on primary key.""" + + if route.fulfillment_type == "event": + trigger = f"event : {route.data.get('event', None)}" + + if route.fulfillment_type == "reprompt_handler": + trigger = f"{route.parameter} : event : "\ + f"{route.data.get('event', None)}" + + if route.fulfillment_type == "transition_route": + intent_condition = self.collect_transition_route_trigger(route) + trigger = f"route : {intent_condition}" + + return trigger + + def set_route_group_targets(self, page: types.Page): + """Determine Route Targets for Route Group routes.""" + current_page = page.display_name + + for route_group in page.route_groups: + page.flow.graph.add_edge(current_page, route_group) + page.flow.graph.add_used_node(route_group) + + return page + + def set_route_targets(self, route: types.Fulfillment): + """Determine the Route Targets for the specified route. + + Primary function is to build out the graph structure for the + Flow based on the current page and where the routes are pointing to. + The graph structure can then be traversed later to determine any errors + or inconsistencies in design. + """ + current_page = route.page.display_name + + route.target_flow = route.data.get("targetFlow", None) + route.target_page = route.data.get("targetPage", None) + + if route.target_page: + route.page.flow.graph.add_edge(current_page, route.target_page) + route.page.flow.graph.add_used_node(route.target_page) + + if route.target_flow: + route.page.flow.graph.add_edge( + current_page, f"FLOW: {route.target_flow}" + ) + route.page.flow.graph.add_used_node(f"FLOW: {route.target_flow}") + + return route + + def update_route_parameters( + self, route: types.Fulfillment, item: Dict[str, str]): + """Update the Route Parameters map based on new info.""" + flow_name = route.page.flow.display_name + page_name = route.page.display_name + + flow_data = self.route_parameters.get(flow_name, None) + page_data = None + + if flow_data: + page_data = flow_data.get(page_name, None) + + # Flow and Page already exists, append to existing list. + if page_data: + self.route_parameters[flow_name][page_name].append(item) + + # Flow data exists, but not Page, so only create the Page list. + elif flow_data and not page_data: + self.route_parameters[flow_name][page_name] = [item] + + # Neither the Flow or Page data exists, so create it all. + else: + self.route_parameters[flow_name] = {page_name: [item]} + + + def process_fulfillment_type( + self, stats: types.AgentData, route: types.Fulfillment, path: object, + key: str): + """Parse through specific fulfillment types.""" + fulfillment_data = path.get(key, None) + + if fulfillment_data: + for item in fulfillment_data: + # This is where each message type will exist + # text, custom payload, etc. + + # TODO pmarlow: create sub-method parsers per type + if "text" in item: + for text in item["text"]["text"]: + route.text = text + + if "parameter" in item: + self.update_route_parameters(route, item) + + return stats + + def process_reprompt_handlers( + self, fp: types.FormParameter, stats: types.AgentData): + """Processing for Reprompt Event Handlers inside Form parameters. + + While Reprompt Event Handlers are technically Events, they differ from + standard Page level Events because they act on the FormParameter data + structure, not Fulfillment Route data structure as standard Events do. + """ + if not fp.reprompt_handlers: + return stats + + for handler in fp.reprompt_handlers: + route = types.Fulfillment(page=fp.page) + route.data = handler + route.agent_id = fp.page.agent_id + route.fulfillment_type = "reprompt_handler" + route.parameter = fp.display_name + route.trigger = self.get_trigger_info(route) + route = self.set_route_targets(route) + path = route.data.get("triggerFulfillment", None) + event = route.data.get("event", None) + + if not path and not event: + continue + + # Flag for Webhook Handler + self.check_for_webhook(fp.page, path) + + stats = self.process_fulfillment_type(stats, route, path, "messages") + + return stats + + def process_events(self, page: types.Page, stats: types.AgentData): + """Parse through all Page Event Handlers.""" + if not page.events: + return stats + + for route_data in page.events: + route = types.Fulfillment(page=page) + route.data = route_data + route.agent_id = page.agent_id + route.fulfillment_type = "event" + route.trigger = self.get_trigger_info(route) + route = self.set_route_targets(route) + path = route.data.get("triggerFulfillment", None) + event = route.data.get("event", None) + + if not path and not event: + continue + + # Flag for Webhook Handler + self.check_for_webhook_event_handlers(route) + + stats = self.process_fulfillment_type(stats, route, path, "messages") + + return stats + + def process_routes(self, page: types.Page, stats: types.AgentData): + """Parse through all Transition Routes.""" + tf_key = "triggerFulfillment" + + if not page.routes: + return stats + + for route_data in page.routes: + route = types.Fulfillment(page=page) + route.data = route_data + route.agent_id = page.agent_id + route.fulfillment_type = "transition_route" + route.trigger = self.get_trigger_info(route) + route = self.set_route_targets(route) + + path = route.data.get(tf_key, None) + + if not path: + continue + + # Flag for Webhook Handler + self.check_for_webhook(page, path) + + stats = self.process_fulfillment_type(stats, route, path, "messages") + + # Preset Params processed here + stats = self.process_fulfillment_type( + stats, route, path, "setParameterActions" + ) + + return stats + + def process_entry(self, page: types.Page, stats: types.AgentData): + """Process Entry Fulfillment on a single page file. + + The Entry Fulfillment to a Page only has 1 "route" (i.e. itself) so + there is no need to loop through multiple routes, as they don't + exist for Entry Fulfillment. + """ + + if not page.entry: + return stats + + route = types.Fulfillment(page=page) + route.data = page.entry + route.agent_id = page.agent_id + route.fulfillment_type = "entry" + route.trigger = "entry" + path = route.data + + self.check_for_webhook(page, path) + + stats = self.process_fulfillment_type(stats, route, path, "messages") + + return stats diff --git a/src/dfcx_scrapi/agent_extract/types.py b/src/dfcx_scrapi/agent_extract/types.py new file mode 100644 index 00000000..178bbbb3 --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/types.py @@ -0,0 +1,207 @@ +"""Collection of Type Classes used for offline processing.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, List, Any +from dataclasses import dataclass, field + +from dfcx_scrapi.agent_extract import graph as graph_class + +@dataclass +class AgentMetadata: + """Used to track the current Agent Metadata attrinbutes.""" + + default_language_code: str = None + dtmf_settings: bool = False + logging_enabled: bool = False + speech_adaptation: bool = False + + +@dataclass +class Flow: + """Used to track current Flow Attributes.""" + + agent_id: str = None + all_pages: set = field(default_factory=set) + active_pages: set = field(default_factory=set) + data: Dict[str, Any] = field(default_factory=dict) + dangling_pages: set = field(default_factory=set) + dir_path: str = None # Full Directory Path for this Flow + display_name: str = None # Flow Display Name (removed special chars) + file_name: str = None # Original Name of Flow (includes special chars) + graph: graph_class.Graph = None + resource_id: str = None + resource_type: str = "flow" + start_page_file: str = None # File Path Location of START_PAGE + unreachable_pages: set = field(default_factory=set) + unused_pages: set = field(default_factory=set) + +@dataclass +class Page: + """Used to track current Page Attributes.""" + + agent_id: str = None + data: Dict[str, Any] = None + display_name: str = None + entry: Dict[str, Any] = None + events: List[object] = None + flow: Flow = None + form: Dict[str, Any] = None + has_webhook: bool = False + has_webhook_event_handler: bool = False + page_file: str = None + resource_id: str = None + resource_type: str = "page" + routes: List[object] = None + route_groups: List[str] = None + +@dataclass +class FormParameter: + """Tracks Form Paramter attributes within a Page.""" + + advanced_settings: str = None + agent_id: str = None + data: Dict[str, Any] = None + display_name: str = None + dtmf_settings: str = None + entity_type: str = None + fill_behavior: Dict[str, Any] = None + init_fulfillment: Dict[str, Any] = None + page: Page = None + reprompt_handlers: Dict[str, Any] = None + required: bool = True + + +@dataclass +class RouteGroup: + """Used to track current RouteGroup Attributes.""" + + agent_id: str = None + data: Dict[str, Any] = None + display_name: str = None + flow: Flow = None + resource_id: str = None + resource_type: str = "route_group" + rg_file: str = None + routes: List[object] = None + +@dataclass +class Fulfillment: + """Used to track current Fulfillment Attributes.""" + + agent_id: str = None + data: Dict[str, Any] = None + display_name: str = None # Inherit from Page easy logging + fulfillment_type: str = None # transition_route | event + page: Page = None + parameter: str = None # Used for Reprompt Event Handlers + target_flow: str = None + target_page: str = None + text: str = None + trigger: str = None + resource_type: str = "fulfillment" + +@dataclass +class Intent: + """Used to track current Intent Attributes.""" + + agent_id: str = None + data: Dict[str, Any] = None + description: str = None + display_name: str = None + dir_path: str = None + labels: Dict[str, str] = None + metadata_file: str = None + parameters: List[Dict[str, str]] = field(default_factory=list) + resource_id: str = None + resource_type: str = "intent" + training_phrases: Dict[str, Any] = field(default_factory=dict) + +@dataclass +class EntityType: + """Used to track current Flow Attributes.""" + + agent_id: str = None + auto_expansion: str = None + data: Dict[str, Any] = None + dir_path: str = None # Full Directory Path for this Entity Type + display_name: str = None # Entity Type Display Name + entities: Dict[str, Any] = field(default_factory=dict) # Map + excluded_phrases: Dict[str, Any] = field(default_factory=dict) # Map + fuzzy_extraction: bool = False + kind: str = None # The kind of Entity Type represented + resource_id: str = None + resource_type: str = "entity_type" + +@dataclass +class TestCase: + """Used to track current Test Case Attributes.""" + + associated_intent_data: Dict[str, Any] = None + agent_id: str = None + agent_path: str = None + conversation_turns: List[Any] = None + data: Dict[str, Any] = None + dir_path: str = None + display_name: str = None + has_invalid_intent: bool = False + intent_data: List[str] = None + qualified: bool = False + resource_id: str = None + resource_type: str = "test_case" + tags: List[str] = None + test_config: Dict[str, Any] = None + +@dataclass +class Webhook: + """Used to track current Webhook attributes.""" + + agent_id: str = None + agent_path: str = None + data: Dict[str, Any] = None + dir_path: str = None + display_name: str = None + resource_id: str = None + resource_type: str = "webhook" + service_type: str = None + timeout: int = 0 + +@dataclass +class AgentData: + """Used to track agent data for each section processed.""" + agent_id: str = None + graph: graph_class.Graph = None + flows: List[Dict[str, Any]] = field(default_factory=list) + pages: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) + intents: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) + entity_types: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) + route_groups: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) + test_cases: List[Dict[str, Any]] = field(default_factory=list) + webhooks: List[Dict[str, Any]] = field(default_factory=list) + flows_map: Dict[str, Any] = field(default_factory=dict) + flow_page_map: Dict[str, Any] = field(default_factory=dict) + intents_map: Dict[str, Any] = field(default_factory=dict) + entity_types_map: Dict[str, Any] = field(default_factory=dict) + route_groups_map: Dict[str, Any] = field(default_factory=dict) + webhooks_map: Dict[str, Any] = field(default_factory=dict) + + total_flows: int = 0 + total_pages: int = 0 + total_intents: int = 0 + total_training_phrases: int = 0 + total_entity_types: int = 0 + total_route_groups: int = 0 + total_test_cases: int = 0 + total_webhooks: int = 0 diff --git a/src/dfcx_scrapi/agent_extract/webhooks.py b/src/dfcx_scrapi/agent_extract/webhooks.py new file mode 100644 index 00000000..38467080 --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/webhooks.py @@ -0,0 +1,95 @@ +"""Webhook processing methods and functions.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os + +from dfcx_scrapi.agent_extract import common +from dfcx_scrapi.agent_extract import types + +class Webhooks: + """Webhook linter methods and functions.""" + + def __init__(self): + self.common = common.Common() + + @staticmethod + def build_webhook_path_list(agent_local_path: str): + """Builds a list of webhook file locations.""" + root_dir = agent_local_path + "/webhooks" + + webhook_paths = [] + + for webhook_file in os.listdir(root_dir): + webhook_file_path = f"{root_dir}/{webhook_file}" + webhook_paths.append(webhook_file_path) + + return webhook_paths + + @staticmethod + def get_service_type(webhook: types.Webhook) -> str: + """Get the type of Webhook Service that is cofigured.""" + if "genericWebService" in webhook.data: + webhook.service_type = "Generic Web Service" + + else: + webhook.service_type = "Other" + + return webhook.service_type + + def process_webhook(self, webhook: types.Webhook, stats: types.AgentData + ) -> types.AgentData: + """Process a single Webhook file.""" + + with open(webhook.dir_path, "r", encoding="UTF-8") as webhook_file: + webhook.data = json.load(webhook_file) + webhook.resource_id = webhook.data.get("name", None) + webhook.display_name = webhook.data.get("displayName", None) + webhook.service_type = self.get_service_type(webhook) + + timeout_dict = webhook.data.get("timeout", None) + if timeout_dict: + webhook.timeout = timeout_dict.get("seconds", None) + + webhook_file.close() + + full_webhook_id = f"{stats.agent_id}/webhooks/{webhook.resource_id}" + webhook.data["name"] = full_webhook_id + stats.webhooks.append(webhook.data) + stats.total_webhooks += 1 + + return stats + + def process_webhooks_directory(self, agent_local_path: str, + stats: types.AgentData) -> types.AgentData: + """Processing the top level Webhooks Dir in the JSON Package structure. + + The following files exist under the `webhooks` dir: + - .json + """ + # Create a list of all Webhook paths to iter through + webhook_paths = self.build_webhook_path_list(agent_local_path) + + for webhook_path in webhook_paths: + webhook = types.Webhook() + webhook.dir_path = webhook_path + + stats = self.process_webhook(webhook, stats) + + full_webhook_id = f"{stats.agent_id}/webhooks/{webhook.resource_id}" + stats.webhooks_map[webhook.display_name] = full_webhook_id + + return stats From dbbf9fefe0eb84a95772da73d72521e7b0891386 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Mon, 21 Aug 2023 21:02:58 -0500 Subject: [PATCH 106/151] feat: add test case parsing --- src/dfcx_scrapi/agent_extract/test_cases.py | 191 ++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 src/dfcx_scrapi/agent_extract/test_cases.py diff --git a/src/dfcx_scrapi/agent_extract/test_cases.py b/src/dfcx_scrapi/agent_extract/test_cases.py new file mode 100644 index 00000000..b06671d2 --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/test_cases.py @@ -0,0 +1,191 @@ +"""Test Case processing methods and functions.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os + +from typing import Dict, List, Any + +from dfcx_scrapi.agent_extract import common +from dfcx_scrapi.agent_extract import types + + +class TestCases: + """Test Case processing methods and functions.""" + + def __init__(self): + self.common = common.Common() + + @staticmethod + def build_test_case_path_list(agent_local_path: str): + """Builds a list of files, each representing a test case.""" + root_dir = agent_local_path + "/testCases" + + test_case_paths = [] + + for test_case in os.listdir(root_dir): + end = test_case.split(".")[-1] + if end == "json": + test_case_path = f"{root_dir}/{test_case}" + test_case_paths.append(test_case_path) + + return test_case_paths + + @staticmethod + def get_test_case_intent_phrase_pair( + tc: types.TestCase) -> List[Dict[str, str]]: + """Parse Test Case and return a list of intents in use. + + This method will produce a List of Dicts where the contents of each + dict is the Training Phrase and associated Triggered Intent as listed + in the Test Case Conversation Turn. This information is used to compare + the User Input training phrase with the actual training phrases that + exist in the Intent resource. + + The dict format is as follows: + { + training_phrase: , + intent: + } + """ + intent_data = [] + + if tc.conversation_turns: + for turn in tc.conversation_turns: + user = turn["userInput"] + agent = turn["virtualAgentOutput"] + intent = agent.get("triggeredIntent", None) + phrase = user.get("input", None) + + text = phrase.get("text", None) + # TODO pmarlow: Add DTMF user inputs + + if text: + text = text["text"] + + if intent and text: + intent_data.append( + { + "user_utterance": text, + "intent": intent["name"], + "status": "valid", + "training_phrases": [], + } + ) + + return intent_data + + @staticmethod + def get_test_case_intent_data(agent_local_path: str): + """Collect all Intent Files and Training Phrases for Test Case.""" + # TODO (pmarlow) consolidate into build_intent_paths + + intents_path = agent_local_path + "/intents" + + intent_paths = [] + + for intent_dir in os.listdir(intents_path): + intent_dir_path = f"{intents_path}/{intent_dir}" + intent_paths.append( + {"intent": intent_dir, "file_path": intent_dir_path} + ) + + return intent_paths + + @staticmethod + def flatten_tp_data(tp_data: List[Any]): + """Flatten the Training Phrase proto to a list of strings.""" + cleaned_tps = [] + + for tp in tp_data["trainingPhrases"]: + parts_list = [part["text"].lower() for part in tp["parts"]] + cleaned_tps.append("".join(parts_list)) + + return cleaned_tps + + def gather_intent_tps(self, tc: types.TestCase): + # TODO Refactor + """Collect all TPs associated with Intent data in Test Case.""" + tc.associated_intent_data = {} + + for i, pair in enumerate(tc.intent_data): + intent_dir = tc.agent_path + "/intents/" + pair["intent"] + + try: + if "trainingPhrases" in os.listdir(intent_dir): + training_phrases_path = intent_dir + "/trainingPhrases" + + for lang_file in os.listdir(training_phrases_path): + # lang_code = lang_file.split(".")[0] + lang_code_path = f"{training_phrases_path}/{lang_file}" + + with open( + lang_code_path, "r", encoding="UTF-8" + ) as tp_file: + tp_data = json.load(tp_file) + cleaned_tps = self.flatten_tp_data(tp_data) + + tp_file.close() + + # TODO pmarlow: refactor to use tc.intent_data instead + # Need to create another level inside the Intent Dict + # that contains the language files as well. + tc.intent_data[i]["training_phrases"].extend( + cleaned_tps + ) + tc.associated_intent_data[pair["intent"]] = cleaned_tps + + except FileNotFoundError: + tc.intent_data[i]["status"] = "invalid_intent" + tc.has_invalid_intent = True + continue + + return tc + + def process_test_case(self, tc: types.TestCase, stats: types.AgentData): + """Process a single Test Case file.""" + + with open(tc.dir_path, "r", encoding="UTF-8") as tc_file: + tc.data = json.load(tc_file) + tc.resource_id = tc.data.get("name", None) + tc.display_name = tc.data.get("displayName", None) + tc.tags = tc.data.get("tags", None) + tc.conversation_turns = tc.data.get( + "testCaseConversationTurns", None + ) + tc.test_config = tc.data.get("testConfig", None) + + full_tc_id = f"{stats.agent_id}/testCases/{tc.resource_id}" + tc.data["name"] = full_tc_id + stats.test_cases.append(tc.data) + + tc_file.close() + + return stats + + def process_test_cases_directory( + self, agent_local_path: str, stats: types.AgentData): + """Processing the test cases dir in the JSON package structure.""" + test_case_paths = self.build_test_case_path_list(agent_local_path) + stats.total_test_cases = len(test_case_paths) + + for test_case in test_case_paths: + tc = types.TestCase() + tc.dir_path = test_case + tc.agent_path = agent_local_path + stats = self.process_test_case(tc, stats) + + return stats From 96f98e27ba967e5f16c16c4dbabbad97131dfae5 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Mon, 21 Aug 2023 21:03:05 -0500 Subject: [PATCH 107/151] fix: linting --- src/dfcx_scrapi/agent_extract/agents.py | 8 ++-- src/dfcx_scrapi/agent_extract/entity_types.py | 7 +-- src/dfcx_scrapi/agent_extract/intents.py | 13 +++--- src/dfcx_scrapi/agent_extract/pages.py | 2 +- src/dfcx_scrapi/agent_extract/routes.py | 45 +++---------------- 5 files changed, 23 insertions(+), 52 deletions(-) diff --git a/src/dfcx_scrapi/agent_extract/agents.py b/src/dfcx_scrapi/agent_extract/agents.py index 0188276d..49c1c0fe 100644 --- a/src/dfcx_scrapi/agent_extract/agents.py +++ b/src/dfcx_scrapi/agent_extract/agents.py @@ -17,13 +17,13 @@ import time import os from typing import Dict -import tempfile from dfcx_scrapi.core import agents from dfcx_scrapi.core import scrapi_base from dfcx_scrapi.agent_extract import flows from dfcx_scrapi.agent_extract import intents from dfcx_scrapi.agent_extract import entity_types +from dfcx_scrapi.agent_extract import test_cases from dfcx_scrapi.agent_extract import webhooks from dfcx_scrapi.agent_extract import gcs_utils from dfcx_scrapi.agent_extract import types @@ -51,12 +51,13 @@ def __init__( self.intents = intents.Intents() self.etypes = entity_types.EntityTypes() self.webhooks = webhooks.Webhooks() + self.tcs = test_cases.TestCases() def process_agent(self, agent_id: str, gcs_bucket_uri: str, environment_display_name: str = None): """Process the specified Agent for offline data gathering.""" - agent_local_path = 'tmp/agent' - lro = self._core_agents.export_agent( + agent_local_path = "tmp/agent" + _ = self._core_agents.export_agent( agent_id=agent_id,gcs_bucket_uri=gcs_bucket_uri, data_format="JSON", environment_display_name=environment_display_name) @@ -76,5 +77,6 @@ def process_agent(self, agent_id: str, gcs_bucket_uri: str, data = self.etypes.process_entity_types_directory( agent_local_path, data) data = self.webhooks.process_webhooks_directory(agent_local_path, data) + data = self.tcs.process_test_cases_directory(agent_local_path, data) return data diff --git a/src/dfcx_scrapi/agent_extract/entity_types.py b/src/dfcx_scrapi/agent_extract/entity_types.py index 084184ab..fa494edd 100644 --- a/src/dfcx_scrapi/agent_extract/entity_types.py +++ b/src/dfcx_scrapi/agent_extract/entity_types.py @@ -124,9 +124,10 @@ def process_language_codes( with open(ent_file_path, "r", encoding="UTF-8") as ent_file: data = json.load(ent_file) - data['name'] = f"{stats.agent_id}/entityTypes/{etype.resource_id}" - data['display_name'] = etype.display_name - data['kind'] = etype.kind + data["name"] = f"{stats.agent_id}/entityTypes/"\ + f"{etype.resource_id}" + data["display_name"] = etype.display_name + data["kind"] = etype.kind data["entities"] = data.get("entities", None) data = self.process_excluded_phrases(etype, lang_code, data) stats.entity_types[lang_code].append(data) diff --git a/src/dfcx_scrapi/agent_extract/intents.py b/src/dfcx_scrapi/agent_extract/intents.py index 5398fec3..be10aee3 100644 --- a/src/dfcx_scrapi/agent_extract/intents.py +++ b/src/dfcx_scrapi/agent_extract/intents.py @@ -110,11 +110,11 @@ def process_language_codes( with open(tp_file, "r", encoding="UTF-8") as tps: data = json.load(tps) - data['name'] = f"{stats.agent_id}/intents/{intent.resource_id}" - data['display_name'] = intent.display_name - data['labels'] = intent.labels - data['description'] = intent.description - data['parameters'] = intent.parameters + data["name"] = f"{stats.agent_id}/intents/{intent.resource_id}" + data["display_name"] = intent.display_name + data["labels"] = intent.labels + data["description"] = intent.description + data["parameters"] = intent.parameters stats.intents[lang_code].append(data) stats.total_training_phrases += len(data["trainingPhrases"]) @@ -133,7 +133,8 @@ def process_training_phrases( def process_intent(self, intent: types.Intent, stats: types.AgentData): """Process a single Intent directory and associated files.""" - intent.display_name = self.common.parse_filepath(intent.dir_path, "intent") + intent.display_name = self.common.parse_filepath( + intent.dir_path, "intent") self.process_intent_metadata(intent) stats = self.process_training_phrases(intent, stats) diff --git a/src/dfcx_scrapi/agent_extract/pages.py b/src/dfcx_scrapi/agent_extract/pages.py index 45b1f540..9ae23ff1 100644 --- a/src/dfcx_scrapi/agent_extract/pages.py +++ b/src/dfcx_scrapi/agent_extract/pages.py @@ -112,7 +112,7 @@ def process_page(self, page: types.Page, stats: types.AgentData): full_page_id = f"{full_flow_id}/pages/{page.resource_id}" stats.pages[page.flow.display_name].append(page.data) stats.flow_page_map[ - page.flow.display_name]['pages'][page.display_name] = full_page_id + page.flow.display_name]["pages"][page.display_name] = full_page_id return stats diff --git a/src/dfcx_scrapi/agent_extract/routes.py b/src/dfcx_scrapi/agent_extract/routes.py index 6ff69774..8a9a5a34 100644 --- a/src/dfcx_scrapi/agent_extract/routes.py +++ b/src/dfcx_scrapi/agent_extract/routes.py @@ -55,41 +55,6 @@ def check_for_webhook_event_handlers(route: types.Fulfillment): ): route.page.has_webhook_event_handler = True - def collect_transition_route_trigger(self, route): - """Inspect route and return all Intent/Condition info.""" - - trigger = [] - intent_name = None - - if "intent" in route.data: - trigger.append("intent") - intent_name = route.data.get("intent", None) - - if "condition" in route.data: - trigger.append("condition") - - if len(trigger) > 0: - trigger = "+".join(trigger) - - else: - return trigger - - def get_trigger_info(self, route): - """Extract trigger info from route based on primary key.""" - - if route.fulfillment_type == "event": - trigger = f"event : {route.data.get('event', None)}" - - if route.fulfillment_type == "reprompt_handler": - trigger = f"{route.parameter} : event : "\ - f"{route.data.get('event', None)}" - - if route.fulfillment_type == "transition_route": - intent_condition = self.collect_transition_route_trigger(route) - trigger = f"route : {intent_condition}" - - return trigger - def set_route_group_targets(self, page: types.Page): """Determine Route Targets for Route Group routes.""" current_page = page.display_name @@ -188,7 +153,6 @@ def process_reprompt_handlers( route.agent_id = fp.page.agent_id route.fulfillment_type = "reprompt_handler" route.parameter = fp.display_name - route.trigger = self.get_trigger_info(route) route = self.set_route_targets(route) path = route.data.get("triggerFulfillment", None) event = route.data.get("event", None) @@ -199,7 +163,8 @@ def process_reprompt_handlers( # Flag for Webhook Handler self.check_for_webhook(fp.page, path) - stats = self.process_fulfillment_type(stats, route, path, "messages") + stats = self.process_fulfillment_type( + stats, route, path, "messages") return stats @@ -224,7 +189,8 @@ def process_events(self, page: types.Page, stats: types.AgentData): # Flag for Webhook Handler self.check_for_webhook_event_handlers(route) - stats = self.process_fulfillment_type(stats, route, path, "messages") + stats = self.process_fulfillment_type( + stats, route, path, "messages") return stats @@ -251,7 +217,8 @@ def process_routes(self, page: types.Page, stats: types.AgentData): # Flag for Webhook Handler self.check_for_webhook(page, path) - stats = self.process_fulfillment_type(stats, route, path, "messages") + stats = self.process_fulfillment_type( + stats, route, path, "messages") # Preset Params processed here stats = self.process_fulfillment_type( From 3e4934ca23f7473a7a958b0fcf7797b34cf79b69 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Mon, 21 Aug 2023 22:24:59 -0500 Subject: [PATCH 108/151] feat: Implement graph structure --- src/dfcx_scrapi/agent_extract/agents.py | 2 + src/dfcx_scrapi/agent_extract/flows.py | 269 ++++++++++++------------ src/dfcx_scrapi/agent_extract/pages.py | 18 +- src/dfcx_scrapi/agent_extract/routes.py | 61 ++++-- 4 files changed, 190 insertions(+), 160 deletions(-) diff --git a/src/dfcx_scrapi/agent_extract/agents.py b/src/dfcx_scrapi/agent_extract/agents.py index 49c1c0fe..190c5946 100644 --- a/src/dfcx_scrapi/agent_extract/agents.py +++ b/src/dfcx_scrapi/agent_extract/agents.py @@ -20,6 +20,7 @@ from dfcx_scrapi.core import agents from dfcx_scrapi.core import scrapi_base +from dfcx_scrapi.agent_extract import graph from dfcx_scrapi.agent_extract import flows from dfcx_scrapi.agent_extract import intents from dfcx_scrapi.agent_extract import entity_types @@ -71,6 +72,7 @@ def process_agent(self, agent_id: str, gcs_bucket_uri: str, self.gcs.unzip(agent_file, agent_local_path) data = types.AgentData() + data.graph = graph.Graph() data.agent_id = agent_id data = self.flows.process_flows_directory(agent_local_path, data) data = self.intents.process_intents_directory(agent_local_path, data) diff --git a/src/dfcx_scrapi/agent_extract/flows.py b/src/dfcx_scrapi/agent_extract/flows.py index caf28f4e..d519a6dc 100644 --- a/src/dfcx_scrapi/agent_extract/flows.py +++ b/src/dfcx_scrapi/agent_extract/flows.py @@ -20,7 +20,6 @@ from typing import List from dfcx_scrapi.agent_extract import common -from dfcx_scrapi.agent_extract import graph from dfcx_scrapi.agent_extract import types from dfcx_scrapi.agent_extract import pages from dfcx_scrapi.agent_extract import routes @@ -83,132 +82,132 @@ def remove_flow_pages_from_set(input_set: set) -> set: return filtered_set - def find_unreachable_pages(self, flow: types.Flow): - """Find Unreachable Pages in the graph. - - An Unreachable Page is defined as: - - A Page which has no incoming edge when traversed from Start Page. - That is, it is unreachable in the graph by any practical means. - - A Page which is connected to a root unreachable page. That is, a - page that could have both incoming or outgoing routes, but due to - its connectedness to the root orphan page, is unreachable in the - graph. - - Here we will compute the symmetric difference of 2 sets: - - Active Pages (i.e. Pages that were reachable in the graph) - - Used Pages (i.e. Pages that were used by some Route) - - If an Unreachable Page has children that it routes to, those children - will appear in Used Pages, although they will ultimately be - unreachable. It's possible for an Unreachable Page to route back to an - Active Page in the graph. For these instances, we don't want to count - those pages as unreachable, because they are reachable via other - sections of the graph. - """ - filtered_set = flow.active_pages.symmetric_difference( - flow.graph.used_nodes - ) - filtered_set = self.remove_flow_pages_from_set(filtered_set) - flow.unreachable_pages.update(filtered_set) - - return flow - - def find_unused_pages(self, flow: types.Flow): - """Find Unused Pages in the graph. - - An Unused Page is defined as: - - A Page which has no incoming or outgoing edge AND - - A Page which exists in the Agent design time, but which is not - present anywhere in the graph, either visible or non-visible. - - Here we will compute the difference of 2 sets: - - All Pages (i.e. Pages that exist in the Agent Design Time) - - Used Pages (i.e. Pages that were used by some Route) - - The resulting set will consist of 2 types of Pages: - - Truly Unused Pages - - Unreachable Root Pages - - Unreachable Root Pages end up in the results due to the fact that no - other Active Page is pointing to them. We remove these from the - resulting set before presenting the Truly Unused Pages. - """ - - # Discard special pages as they are non-relevant for final outcome - for page in self.special_pages: - flow.all_pages.discard(page) - - prelim_unused = flow.all_pages.difference(flow.graph.used_nodes) - - # Filter out Unreachable Root Pages - filtered_set = set() - - for page in prelim_unused: - if page not in flow.graph.edges: - filtered_set.add(page) - else: - flow.unreachable_pages.add(page) - - flow.unused_pages = filtered_set - - return flow - - def recurse_edges( - self, edges: List, page: types.Page, dangling: set, visited: set - ): - """Recursive method searching graph edges for Active / Dangling Pages. - - A byproduct of searching for Dangling Pages in the graph is that we can - produce a set of Active Pages in the graph. These are pages that are - reachable when traversing from the Start Page. These can then be used - to determine Unreachable Pages in another method. - """ - if page in edges: - for inner_page in edges[page]: - if inner_page not in visited: - visited.add(inner_page) - dangling, visited = self.recurse_edges( - edges, inner_page, dangling, visited - ) - - else: - dangling.add(page) - - return dangling, visited - - def find_dangling_pages(self, flow: types.Flow): - """Find Dangling Pages in the graph. - - Dangling Page is defined as: - - Any page that exists in the graph that has no outgoing edge - Active Page is defined as: - - Any page that is reachable via an active route in the graph and can - be traced back to the Start Page. - - These pages can result in a conversational "dead end" which is - potentially unrecoverable. - A byproduct of searching for the dangling pages is locating all of the - "active" pages. These are the pages that are "visited" as we traverse - the graph. We'll also return Active Pages in this method since they - will be used for downstream tasks. - """ - - flow.dangling_pages, flow.active_pages = self.recurse_edges( - flow.graph.edges, - "Start Page", - flow.dangling_pages, - flow.active_pages, - ) - - # Clean up Special Pages - for page in self.special_pages: - flow.dangling_pages.discard(page) - - flow.dangling_pages = self.remove_flow_pages_from_set( - flow.dangling_pages - ) - - return flow + # def find_unreachable_pages(self, flow: types.Flow): + # """Find Unreachable Pages in the graph. + + # An Unreachable Page is defined as: + # - A Page which has no incoming edge when traversed from Start Page. + # That is, it is unreachable in the graph by any practical means. + # - A Page which is connected to a root unreachable page. That is, a + # page that could have both incoming or outgoing routes, but due to + # its connectedness to the root orphan page, is unreachable in the + # graph. + + # Here we will compute the symmetric difference of 2 sets: + # - Active Pages (i.e. Pages that were reachable in the graph) + # - Used Pages (i.e. Pages that were used by some Route) + + # If an Unreachable Page has children that it routes to, those children + # will appear in Used Pages, although they will ultimately be + # unreachable. It's possible for an Unreachable Page to route back to an + # Active Page in the graph. For these instances, we don't want to count + # those pages as unreachable, because they are reachable via other + # sections of the graph. + # """ + # filtered_set = flow.active_pages.symmetric_difference( + # flow.graph.used_nodes + # ) + # filtered_set = self.remove_flow_pages_from_set(filtered_set) + # flow.unreachable_pages.update(filtered_set) + + # return flow + + # def find_unused_pages(self, flow: types.Flow): + # """Find Unused Pages in the graph. + + # An Unused Page is defined as: + # - A Page which has no incoming or outgoing edge AND + # - A Page which exists in the Agent design time, but which is not + # present anywhere in the graph, either visible or non-visible. + + # Here we will compute the difference of 2 sets: + # - All Pages (i.e. Pages that exist in the Agent Design Time) + # - Used Pages (i.e. Pages that were used by some Route) + + # The resulting set will consist of 2 types of Pages: + # - Truly Unused Pages + # - Unreachable Root Pages + + # Unreachable Root Pages end up in the results due to the fact that no + # other Active Page is pointing to them. We remove these from the + # resulting set before presenting the Truly Unused Pages. + # """ + + # # Discard special pages as they are non-relevant for final outcome + # for page in self.special_pages: + # flow.all_pages.discard(page) + + # prelim_unused = flow.all_pages.difference(flow.graph.used_nodes) + + # # Filter out Unreachable Root Pages + # filtered_set = set() + + # for page in prelim_unused: + # if page not in flow.graph.edges: + # filtered_set.add(page) + # else: + # flow.unreachable_pages.add(page) + + # flow.unused_pages = filtered_set + + # return flow + + # def recurse_edges( + # self, edges: List, page: types.Page, dangling: set, visited: set + # ): + # """Recursive method searching graph edges for Active / Dangling Pages. + + # A byproduct of searching for Dangling Pages in the graph is that we can + # produce a set of Active Pages in the graph. These are pages that are + # reachable when traversing from the Start Page. These can then be used + # to determine Unreachable Pages in another method. + # """ + # if page in edges: + # for inner_page in edges[page]: + # if inner_page not in visited: + # visited.add(inner_page) + # dangling, visited = self.recurse_edges( + # edges, inner_page, dangling, visited + # ) + + # else: + # dangling.add(page) + + # return dangling, visited + + # def find_dangling_pages(self, flow: types.Flow): + # """Find Dangling Pages in the graph. + + # Dangling Page is defined as: + # - Any page that exists in the graph that has no outgoing edge + # Active Page is defined as: + # - Any page that is reachable via an active route in the graph and can + # be traced back to the Start Page. + + # These pages can result in a conversational "dead end" which is + # potentially unrecoverable. + # A byproduct of searching for the dangling pages is locating all of the + # "active" pages. These are the pages that are "visited" as we traverse + # the graph. We'll also return Active Pages in this method since they + # will be used for downstream tasks. + # """ + + # flow.dangling_pages, flow.active_pages = self.recurse_edges( + # flow.graph.edges, + # "Start Page", + # flow.dangling_pages, + # flow.active_pages, + # ) + + # # Clean up Special Pages + # for page in self.special_pages: + # flow.dangling_pages.discard(page) + + # flow.dangling_pages = self.remove_flow_pages_from_set( + # flow.dangling_pages + # ) + + # return flow def process_start_page(self, flow: types.Flow, stats: types.AgentData): """Process a single Flow Path file.""" @@ -216,23 +215,16 @@ def process_start_page(self, flow: types.Flow, stats: types.AgentData): page = types.Page(flow=flow) page.display_name = "Start Page" - flow.graph.add_node(page.display_name) + stats.graph.add_node(page.display_name) page.data = json.load(flow_file) stats.flows.append(page.data) - # page.events = page.data.get("eventHandlers", None) - # page.routes = page.data.get("transitionRoutes", None) - # page.route_groups = page.data.get("transitionRouteGroups", None) - flow.resource_id = page.data.get("name", None) - # page.agent_id = flow.agent_id - # page.resource_id = "START_PAGE" - # flow.data[page.display_name] = page.resource_id - # # Order of processing is important - # stats = self.routes.process_routes(page, stats) - # stats = self.routes.process_events(page, stats) + # Order of processing is important + stats = self.routes.process_routes(page, stats) + stats = self.routes.process_events(page, stats) if page.route_groups: page = self.routes.set_route_group_targets(page) @@ -287,7 +279,6 @@ def process_flows_directory( for flow_path in flow_paths: flow = types.Flow() - flow.graph = graph.Graph() flow.dir_path = flow_path stats = self.process_flow(flow, stats) diff --git a/src/dfcx_scrapi/agent_extract/pages.py b/src/dfcx_scrapi/agent_extract/pages.py index 9ae23ff1..f76d6e37 100644 --- a/src/dfcx_scrapi/agent_extract/pages.py +++ b/src/dfcx_scrapi/agent_extract/pages.py @@ -86,25 +86,27 @@ def process_page(self, page: types.Page, stats: types.AgentData): page.display_name = self.common.parse_filepath(page.page_file, "page") page.display_name = self.common.clean_display_name(page.display_name) - page.flow.graph.add_node(page.display_name) + stats.graph.add_node(page.display_name) page.flow.all_pages.add(page.display_name) with open(page.page_file, "r", encoding="UTF-8") as page_file: page.data = json.load(page_file) - # page.entry = page.data.get("entryFulfillment", None) - # page.events = page.data.get("eventHandlers", None) + page.entry = page.data.get("entryFulfillment", None) + page.events = page.data.get("eventHandlers", None) page.form = page.data.get("form", None) - # page.routes = page.data.get("transitionRoutes", None) - # page.route_groups = page.data.get("transitionRouteGroups", None) - + page.routes = page.data.get("transitionRoutes", None) + page.route_groups = page.data.get("transitionRouteGroups", None) page.resource_id = page.data.get("name", None) - # page.flow.data[page.display_name] = page.resource_id + # Order of linting is important here + stats = self.routes.process_entry(page, stats) + stats = self.routes.process_routes(page, stats) + stats = self.routes.process_events(page, stats) stats = self.process_form(page, stats) if page.route_groups: - page = self.routes.set_route_group_targets(page) + page, stats = self.routes.set_route_group_targets(page, stats) page_file.close() diff --git a/src/dfcx_scrapi/agent_extract/routes.py b/src/dfcx_scrapi/agent_extract/routes.py index 8a9a5a34..b76905e5 100644 --- a/src/dfcx_scrapi/agent_extract/routes.py +++ b/src/dfcx_scrapi/agent_extract/routes.py @@ -55,17 +55,51 @@ def check_for_webhook_event_handlers(route: types.Fulfillment): ): route.page.has_webhook_event_handler = True - def set_route_group_targets(self, page: types.Page): + def collect_transition_route_trigger(self, route): + """Inspect route and return all Intent/Condition info.""" + + trigger = [] + + if "intent" in route.data: + trigger.append("intent") + + if "condition" in route.data: + trigger.append("condition") + + if len(trigger) > 0: + trigger = "+".join(trigger) + + else: + return trigger + + def get_trigger_info(self, route): + """Extract trigger info from route based on primary key.""" + + if route.fulfillment_type == "event": + trigger = f"event : {route.data.get('event', None)}" + + if route.fulfillment_type == "reprompt_handler": + trigger = f"{route.parameter} : event : "\ + f"{route.data.get('event', None)}" + + if route.fulfillment_type == "transition_route": + intent_condition = self.collect_transition_route_trigger(route) + trigger = f"route : {intent_condition}" + + return trigger + + def set_route_group_targets(self, page: types.Page, stats: types.AgentData): """Determine Route Targets for Route Group routes.""" current_page = page.display_name for route_group in page.route_groups: - page.flow.graph.add_edge(current_page, route_group) - page.flow.graph.add_used_node(route_group) + stats.graph.add_edge(current_page, route_group) + stats.graph.add_used_node(route_group) - return page + return page, stats - def set_route_targets(self, route: types.Fulfillment): + def set_route_targets( + self, route: types.Fulfillment, stats: types.AgentData): """Determine the Route Targets for the specified route. Primary function is to build out the graph structure for the @@ -79,16 +113,16 @@ def set_route_targets(self, route: types.Fulfillment): route.target_page = route.data.get("targetPage", None) if route.target_page: - route.page.flow.graph.add_edge(current_page, route.target_page) - route.page.flow.graph.add_used_node(route.target_page) + stats.graph.add_edge(current_page, route.target_page) + stats.graph.add_used_node(route.target_page) if route.target_flow: - route.page.flow.graph.add_edge( + stats.graph.add_edge( current_page, f"FLOW: {route.target_flow}" ) - route.page.flow.graph.add_used_node(f"FLOW: {route.target_flow}") + stats.graph.add_used_node(f"FLOW: {route.target_flow}") - return route + return route, stats def update_route_parameters( self, route: types.Fulfillment, item: Dict[str, str]): @@ -153,7 +187,8 @@ def process_reprompt_handlers( route.agent_id = fp.page.agent_id route.fulfillment_type = "reprompt_handler" route.parameter = fp.display_name - route = self.set_route_targets(route) + route.trigger = self.get_trigger_info(route) + route, stats = self.set_route_targets(route, stats) path = route.data.get("triggerFulfillment", None) event = route.data.get("event", None) @@ -179,7 +214,7 @@ def process_events(self, page: types.Page, stats: types.AgentData): route.agent_id = page.agent_id route.fulfillment_type = "event" route.trigger = self.get_trigger_info(route) - route = self.set_route_targets(route) + route, stats = self.set_route_targets(route, stats) path = route.data.get("triggerFulfillment", None) event = route.data.get("event", None) @@ -207,7 +242,7 @@ def process_routes(self, page: types.Page, stats: types.AgentData): route.agent_id = page.agent_id route.fulfillment_type = "transition_route" route.trigger = self.get_trigger_info(route) - route = self.set_route_targets(route) + route, stats = self.set_route_targets(route, stats) path = route.data.get(tf_key, None) From 5a66d8b480d7c597efec49c250291eb3819fd855 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Wed, 23 Aug 2023 20:51:17 -0500 Subject: [PATCH 109/151] feat: refactor graph recursion into extract class for finding graph discrepancies --- src/dfcx_scrapi/agent_extract/flows.py | 276 ++++++++++---------- src/dfcx_scrapi/agent_extract/pages.py | 1 + src/dfcx_scrapi/agent_extract/routes.py | 10 + src/dfcx_scrapi/tools/agent_checker_util.py | 203 ++------------ 4 files changed, 177 insertions(+), 313 deletions(-) diff --git a/src/dfcx_scrapi/agent_extract/flows.py b/src/dfcx_scrapi/agent_extract/flows.py index d519a6dc..a49cb13d 100644 --- a/src/dfcx_scrapi/agent_extract/flows.py +++ b/src/dfcx_scrapi/agent_extract/flows.py @@ -19,6 +19,7 @@ from typing import List +from dfcx_scrapi.agent_extract import graph from dfcx_scrapi.agent_extract import common from dfcx_scrapi.agent_extract import types from dfcx_scrapi.agent_extract import pages @@ -82,142 +83,151 @@ def remove_flow_pages_from_set(input_set: set) -> set: return filtered_set - # def find_unreachable_pages(self, flow: types.Flow): - # """Find Unreachable Pages in the graph. - - # An Unreachable Page is defined as: - # - A Page which has no incoming edge when traversed from Start Page. - # That is, it is unreachable in the graph by any practical means. - # - A Page which is connected to a root unreachable page. That is, a - # page that could have both incoming or outgoing routes, but due to - # its connectedness to the root orphan page, is unreachable in the - # graph. - - # Here we will compute the symmetric difference of 2 sets: - # - Active Pages (i.e. Pages that were reachable in the graph) - # - Used Pages (i.e. Pages that were used by some Route) - - # If an Unreachable Page has children that it routes to, those children - # will appear in Used Pages, although they will ultimately be - # unreachable. It's possible for an Unreachable Page to route back to an - # Active Page in the graph. For these instances, we don't want to count - # those pages as unreachable, because they are reachable via other - # sections of the graph. - # """ - # filtered_set = flow.active_pages.symmetric_difference( - # flow.graph.used_nodes - # ) - # filtered_set = self.remove_flow_pages_from_set(filtered_set) - # flow.unreachable_pages.update(filtered_set) - - # return flow - - # def find_unused_pages(self, flow: types.Flow): - # """Find Unused Pages in the graph. - - # An Unused Page is defined as: - # - A Page which has no incoming or outgoing edge AND - # - A Page which exists in the Agent design time, but which is not - # present anywhere in the graph, either visible or non-visible. - - # Here we will compute the difference of 2 sets: - # - All Pages (i.e. Pages that exist in the Agent Design Time) - # - Used Pages (i.e. Pages that were used by some Route) - - # The resulting set will consist of 2 types of Pages: - # - Truly Unused Pages - # - Unreachable Root Pages - - # Unreachable Root Pages end up in the results due to the fact that no - # other Active Page is pointing to them. We remove these from the - # resulting set before presenting the Truly Unused Pages. - # """ - - # # Discard special pages as they are non-relevant for final outcome - # for page in self.special_pages: - # flow.all_pages.discard(page) - - # prelim_unused = flow.all_pages.difference(flow.graph.used_nodes) - - # # Filter out Unreachable Root Pages - # filtered_set = set() - - # for page in prelim_unused: - # if page not in flow.graph.edges: - # filtered_set.add(page) - # else: - # flow.unreachable_pages.add(page) - - # flow.unused_pages = filtered_set - - # return flow - - # def recurse_edges( - # self, edges: List, page: types.Page, dangling: set, visited: set - # ): - # """Recursive method searching graph edges for Active / Dangling Pages. - - # A byproduct of searching for Dangling Pages in the graph is that we can - # produce a set of Active Pages in the graph. These are pages that are - # reachable when traversing from the Start Page. These can then be used - # to determine Unreachable Pages in another method. - # """ - # if page in edges: - # for inner_page in edges[page]: - # if inner_page not in visited: - # visited.add(inner_page) - # dangling, visited = self.recurse_edges( - # edges, inner_page, dangling, visited - # ) - - # else: - # dangling.add(page) - - # return dangling, visited - - # def find_dangling_pages(self, flow: types.Flow): - # """Find Dangling Pages in the graph. - - # Dangling Page is defined as: - # - Any page that exists in the graph that has no outgoing edge - # Active Page is defined as: - # - Any page that is reachable via an active route in the graph and can - # be traced back to the Start Page. - - # These pages can result in a conversational "dead end" which is - # potentially unrecoverable. - # A byproduct of searching for the dangling pages is locating all of the - # "active" pages. These are the pages that are "visited" as we traverse - # the graph. We'll also return Active Pages in this method since they - # will be used for downstream tasks. - # """ - - # flow.dangling_pages, flow.active_pages = self.recurse_edges( - # flow.graph.edges, - # "Start Page", - # flow.dangling_pages, - # flow.active_pages, - # ) - - # # Clean up Special Pages - # for page in self.special_pages: - # flow.dangling_pages.discard(page) - - # flow.dangling_pages = self.remove_flow_pages_from_set( - # flow.dangling_pages - # ) - - # return flow + def find_unreachable_pages(self, flow: types.Flow): + """Find Unreachable Pages in the graph. + + An Unreachable Page is defined as: + - A Page which has no incoming edge when traversed from Start Page. + That is, it is unreachable in the graph by any practical means. + - A Page which is connected to a root unreachable page. That is, a + page that could have both incoming or outgoing routes, but due to + its connectedness to the root orphan page, is unreachable in the + graph. + + Here we will compute the symmetric difference of 2 sets: + - Active Pages (i.e. Pages that were reachable in the graph) + - Used Pages (i.e. Pages that were used by some Route) + + If an Unreachable Page has children that it routes to, those children + will appear in Used Pages, although they will ultimately be + unreachable. It's possible for an Unreachable Page to route back to an + Active Page in the graph. For these instances, we don't want to count + those pages as unreachable, because they are reachable via other + sections of the graph. + """ + filtered_set = flow.active_pages.symmetric_difference( + flow.graph.used_nodes + ) + filtered_set = self.remove_flow_pages_from_set(filtered_set) + flow.unreachable_pages.update(filtered_set) + + return flow + + def find_unused_pages(self, flow: types.Flow): + """Find Unused Pages in the graph. + + An Unused Page is defined as: + - A Page which has no incoming or outgoing edge AND + - A Page which exists in the Agent design time, but which is not + present anywhere in the graph, either visible or non-visible. + + Here we will compute the difference of 2 sets: + - All Pages (i.e. Pages that exist in the Agent Design Time) + - Used Pages (i.e. Pages that were used by some Route) + + The resulting set will consist of 2 types of Pages: + - Truly Unused Pages + - Unreachable Root Pages + + Unreachable Root Pages end up in the results due to the fact that no + other Active Page is pointing to them. We remove these from the + resulting set before presenting the Truly Unused Pages. + """ + + # Discard special pages as they are non-relevant for final outcome + for page in self.special_pages: + flow.all_pages.discard(page) + + prelim_unused = flow.all_pages.difference(flow.graph.used_nodes) + + # Filter out Unreachable Root Pages + filtered_set = set() + + for page in prelim_unused: + if page not in flow.graph.edges: + filtered_set.add(page) + else: + flow.unreachable_pages.add(page) + + flow.unused_pages = filtered_set + + return flow + + def recurse_edges( + self, edges: List, page: types.Page, dangling: set, visited: set + ): + """Recursive method searching graph edges for Active / Dangling Pages. + + A byproduct of searching for Dangling Pages in the graph is that we can + produce a set of Active Pages in the graph. These are pages that are + reachable when traversing from the Start Page. These can then be used + to determine Unreachable Pages in another method. + """ + # For Flow Start Pages, we prepend the Flow name for later + # identification. For this section, we'll need to strip it off to + # compare with the other sets. + if page in edges: + for inner_page in edges[page]: + if inner_page not in visited: + visited.add(inner_page) + dangling, visited = self.recurse_edges( + edges, inner_page, dangling, visited + ) + + else: + dangling.add(page) + + return dangling, visited + + def find_dangling_pages(self, flow: types.Flow): + """Find Dangling Pages in the graph. + + Dangling Page is defined as: + - Any page that exists in the graph that has no outgoing edge + Active Page is defined as: + - Any page that is reachable via an active route in the graph and can + be traced back to the Start Page. + + These pages can result in a conversational "dead end" which is + potentially unrecoverable. + A byproduct of searching for the dangling pages is locating all of the + "active" pages. These are the pages that are "visited" as we traverse + the graph. We'll also return Active Pages in this method since they + will be used for downstream tasks. + """ + + flow.dangling_pages, flow.active_pages = self.recurse_edges( + flow.graph.edges, + f"{flow.display_name}: Start Page", + flow.dangling_pages, + flow.active_pages, + ) + + # Clean up Special Pages + for page in self.special_pages: + flow.dangling_pages.discard(page) + + flow.dangling_pages = self.remove_flow_pages_from_set( + flow.dangling_pages + ) + + return flow def process_start_page(self, flow: types.Flow, stats: types.AgentData): """Process a single Flow Path file.""" with open(flow.start_page_file, "r", encoding="UTF-8") as flow_file: page = types.Page(flow=flow) - page.display_name = "Start Page" + page.display_name = f"{flow.display_name}: Start Page" + # We keep track of an instance specific Flow graph for the current + # Flow, and then a main Graph for the entire agent. + flow.graph.add_node(page.display_name) stats.graph.add_node(page.display_name) page.data = json.load(flow_file) + page.events = page.data.get("eventHandlers", None) + page.routes = page.data.get("transitionRoutes", None) + page.route_groups = page.data.get("transitionRouteGroups", None) stats.flows.append(page.data) flow.resource_id = page.data.get("name", None) @@ -227,7 +237,7 @@ def process_start_page(self, flow: types.Flow, stats: types.AgentData): stats = self.routes.process_events(page, stats) if page.route_groups: - page = self.routes.set_route_group_targets(page) + page, stats = self.routes.set_route_group_targets(page, stats) flow_file.close() @@ -253,9 +263,12 @@ def process_flow(self, flow: types.Flow, stats: types.AgentData): stats = self.rgs.process_route_groups_directory(flow, stats) # Order of Find Operations is important here! - # flow = self.find_unused_pages(flow) - # flow = self.find_dangling_pages(flow) - # flow = self.find_unreachable_pages(flow) + flow = self.find_unused_pages(flow) + flow = self.find_dangling_pages(flow) + flow = self.find_unreachable_pages(flow) + + stats.unused_pages[flow.display_name] = flow.unused_pages + stats.unreachable_pages[flow.display_name] = flow.unreachable_pages return stats @@ -279,6 +292,7 @@ def process_flows_directory( for flow_path in flow_paths: flow = types.Flow() + flow.graph = graph.Graph() flow.dir_path = flow_path stats = self.process_flow(flow, stats) diff --git a/src/dfcx_scrapi/agent_extract/pages.py b/src/dfcx_scrapi/agent_extract/pages.py index f76d6e37..27616cbe 100644 --- a/src/dfcx_scrapi/agent_extract/pages.py +++ b/src/dfcx_scrapi/agent_extract/pages.py @@ -87,6 +87,7 @@ def process_page(self, page: types.Page, stats: types.AgentData): page.display_name = self.common.clean_display_name(page.display_name) stats.graph.add_node(page.display_name) + page.flow.graph.add_node(page.display_name) page.flow.all_pages.add(page.display_name) diff --git a/src/dfcx_scrapi/agent_extract/routes.py b/src/dfcx_scrapi/agent_extract/routes.py index b76905e5..03b0b309 100644 --- a/src/dfcx_scrapi/agent_extract/routes.py +++ b/src/dfcx_scrapi/agent_extract/routes.py @@ -93,6 +93,9 @@ def set_route_group_targets(self, page: types.Page, stats: types.AgentData): current_page = page.display_name for route_group in page.route_groups: + page.flow.graph.add_edge(current_page, route_group) + page.flow.graph.add_used_node(route_group) + stats.graph.add_edge(current_page, route_group) stats.graph.add_used_node(route_group) @@ -113,10 +116,17 @@ def set_route_targets( route.target_page = route.data.get("targetPage", None) if route.target_page: + route.page.flow.graph.add_edge(current_page, route.target_page) + route.page.flow.graph.add_used_node(route.target_page) + stats.graph.add_edge(current_page, route.target_page) stats.graph.add_used_node(route.target_page) if route.target_flow: + route.page.flow.graph.add_edge( + current_page, f"FLOW: {route.target_flow}") + route.page.flow.graph.add_used_node(f"FLOW: {route.target_flow}") + stats.graph.add_edge( current_page, f"FLOW: {route.target_flow}" ) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 841c7e8e..78c03501 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -22,20 +22,16 @@ import pandas as pd from collections import defaultdict -import google.cloud.dialogflowcx_v3beta1.types as dfcx_types +from google.cloud.dialogflowcx_v3beta1 import types -from dfcx_scrapi.core.scrapi_base import ScrapiBase -from dfcx_scrapi.core.intents import Intents -from dfcx_scrapi.core.entity_types import EntityTypes -from dfcx_scrapi.core.flows import Flows -from dfcx_scrapi.core.pages import Pages -from dfcx_scrapi.core.webhooks import Webhooks -from dfcx_scrapi.core.transition_route_groups import TransitionRouteGroups +from dfcx_scrapi.core import scrapi_base +from dfcx_scrapi.agent_extract import agents +from dfcx_scrapi.agent_extract import types as etypes # Type aliases -DFCXFlow = dfcx_types.flow.Flow -DFCXPage = dfcx_types.page.Page -DFCXRoute = dfcx_types.page.TransitionRoute +DFCXFlow = types.flow.Flow +DFCXPage = types.page.Page +DFCXRoute = types.page.TransitionRoute # logging config logging.basicConfig( @@ -44,51 +40,18 @@ datefmt="%Y-%m-%d %H:%M:%S", ) -# TODO: Methods to implement: -# - Run test cases and store results, and give a report -# - Need to include a reference agent for this to give useful info -# about new failing test cases -# - Get condensed changelog compared to a reference -# - Ideally include test case changes, to include info that the CX UI -# can't provide -# - Find unreachable/unused pages, intents, route groups, and maybe routes -# - Finding unreachable routes is hard, but the other problems have -# already been figured out -# - Find invalid test cases -# - Test cases referencing pages or intents that don't exist, -# for example -# - Check true routes -# - Pages with only conditional routes, and no intents or parameter -# filling, should have the last route be "true" to prevent getting -# stuck on the page -# - Check events -# - Pages with user input should have a no-input-default and -# no-match-default event handler. -# - Not sure if this applies to all agents in the same way -# - Check infinite loops -# - Not possible to solve in general because of webhooks, -# but can find likely candidates -# - Probably other things - -class AgentCheckerUtil(ScrapiBase): +class AgentCheckerUtil(scrapi_base.ScrapiBase): """Utility class for checking DFCX Agents.""" def __init__( self, agent_id: str, + gcs_bucket_uri: str, creds_path: str = None, creds_dict: Dict = None, creds=None, scope=False, - delay: float = 1.0 ): - """ - Args: - agent_id (required): The agent ID - delay (optional): The time in seconds to wait between CX API calls, - if you need to limit the rate. The number of API calls used in this - initialization is 2*(number of flows) + 2. - """ super().__init__( creds_path=creds_path, creds_dict=creds_dict, @@ -97,80 +60,15 @@ def __init__( ) self.agent_id = agent_id - - self._intents = Intents(creds=self.creds, agent_id=self.agent_id) - self._entities = EntityTypes(creds=self.creds, agent_id=self.agent_id) - self._flows = Flows(creds=self.creds, agent_id=self.agent_id) - self._pages = Pages(creds=self.creds) - self._webhooks = Webhooks(creds=self.creds, agent_id=self.agent_id) - self._route_groups = TransitionRouteGroups( - creds=self.creds, agent_id=self.agent_id - ) - - # Intent data (1 API call) - self._intent_data = self._intents.list_intents(agent_id=self.agent_id) - # Intents map (0 API calls) - self._intents_map = { - intent.name: intent.display_name for intent in self._intent_data - } - - # Flow data (1 API call) - self._flow_data = self._get_all_flow_data(delay) - # Flows maps (0 API calls) - self._flows_map = { - flow.name: flow.display_name for flow in self._flow_data.values() - } - self._flows_map_rev = { - flow.display_name: flow.name for flow in self._flow_data.values() - } - - # Page data (len(flows) API calls) - self._page_data = self._get_all_page_data(delay) - - # Route group data (len(flows) API calls) - self._route_group_data = self._get_all_route_group_data(delay) - - # Pages and route groups maps (0 API calls) - self._pages_map = {} - self._pages_map_rev = {} - self._route_groups_map = {} - for fid in self._flows_map.keys(): - self._pages_map[fid] = { - page.name: page.display_name - for page in self._page_data[fid].values() - } - self._pages_map_rev[fid] = { - page.display_name: page.name - for page in self._page_data[fid].values() - } - self._route_groups_map[fid] = { - rg.name: rg.display_name - for rg in self._route_group_data[fid].values() - } - # Total API calls: 2*len(flows) + 2 - - def _get_all_flow_data(self, delay): - flow_list = self._flows.list_flows(self.agent_id) - time.sleep(delay) - return {flow.name: flow for flow in flow_list} - - def _get_all_page_data(self, delay): - page_data = {} - for flow_id in self._flows_map.keys(): - page_list = self._pages.list_pages(flow_id=flow_id) - page_data[flow_id] = {page.name: page for page in page_list} - time.sleep(delay) - return page_data - - def _get_all_route_group_data(self, delay): - route_group_data = {} - for flow_id in self._flows_map.keys(): - group_list = self._route_groups.list_transition_route_groups( - flow_id=flow_id - ) - route_group_data[flow_id] = {rg.name: rg for rg in group_list} - time.sleep(delay) - return route_group_data + self.extract = agents.Agents(agent_id) + self.data = self.extract.process_agent(agent_id, gcs_bucket_uri) + self.special_pages = [ + "End Session", + "End Flow", + "Start Page", + "Current Page", + "Previous Page", + ] def _get_intent_parameters(self, intent_name): """Gets the parameters for a particular intent, by display name""" @@ -213,9 +111,6 @@ def _get_page( raise KeyError(f"Page not found: {page_name}") return self._page_data[flow_id][page_id] - # Changelogs - - # Reachable and unreachable pages def _continue_page_recursion( self, @@ -591,37 +486,6 @@ def find_reachable_pages( self._find_reachable_pages_rec(page_data, params, is_initial=True) return reachable - def find_unreachable_pages( - self, - flow_name: str, - include_groups: bool = True, - verbose: bool = False, - ) -> List[str]: - """Finds all pages which are unreachable by transition routes, - starting from the start page of a given flow. Either flow_id or - flow_name must be used. - - Args: - flow_id: The ID of the flow to find unreachable pages for - flow_name: The display name of the flow to find unreachable pages for - include_groups: (Optional) If true, intents from transition route - groups will be included, but only if they are actually referenced - on some page - verbose: (Optional) If true, print debug information about - route traversal - - Returns: - The list of unreachable pages in this flow - """ - flow_id = self._flows_map_rev.get(flow_name, None) - if not flow_id: - raise KeyError(f"Flow not found: {flow_name}") - - reachable = self.find_reachable_pages( - flow_name, include_groups=include_groups, verbose=verbose - ) - return list(set(self._pages_map[flow_id].values()) - set(reachable)) - def find_all_reachable_pages( self, include_groups: bool = True, @@ -650,34 +514,6 @@ def find_all_reachable_pages( page_names.extend(reachable) return pd.DataFrame({"flow_name": flow_names, "page_name": page_names}) - def find_all_unreachable_pages( - self, - include_groups: bool = True, - verbose: bool = False, - ): - """Gets a dataframe of all unreachable pages in this agent - - Args: - include_groups: whether or not to consider route group routes - as being reachable. Defaults to True. - verbose: whether to display debug info in the agent structure - traversal. Defaults to False. - - Returns: - A dataframe with columns flow_name and page_name - """ - flow_names = [] - page_names = [] - for flow_name in self._flows_map_rev: - unreachable = self.find_unreachable_pages( - flow_name=flow_name, - include_groups=include_groups, - verbose=verbose - ) - flow_names.extend([flow_name for _ in unreachable]) - page_names.extend(unreachable) - return pd.DataFrame({"flow_name": flow_names, "page_name": page_names}) - def _get_intents_from_routes( self, transition_list: List[DFCXRoute], @@ -811,6 +647,9 @@ def find_all_reachable_intents(self) -> pd.DataFrame: "flows": intents.values() }) + # TODO: Break this into 2 methods + # get_unused_intents() // i.e. intents not in use in the agent + # get_unreachable_intents // i.e. intents that overlap with unreachable pages def find_all_unreachable_intents(self) -> List[str]: """Finds all unreachable intents, either because they are on unreachable pages or they are unused in the agent. Note that From 8d2631688dc829ff9705fdf3efd373d9ff57b956 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Wed, 23 Aug 2023 20:51:40 -0500 Subject: [PATCH 110/151] fix: re sort AgentData class; add new fields and types --- src/dfcx_scrapi/agent_extract/types.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/dfcx_scrapi/agent_extract/types.py b/src/dfcx_scrapi/agent_extract/types.py index 178bbbb3..fd7ee19a 100644 --- a/src/dfcx_scrapi/agent_extract/types.py +++ b/src/dfcx_scrapi/agent_extract/types.py @@ -181,20 +181,25 @@ class Webhook: @dataclass class AgentData: """Used to track agent data for each section processed.""" + active_intents: Dict[str, set] = field(default_factory=dict) + active_pages: Dict[str, set] = field(default_factory=dict) agent_id: str = None - graph: graph_class.Graph = None + entity_types: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) + entity_types_map: Dict[str, Any] = field(default_factory=dict) + flow_page_map: Dict[str, Any] = field(default_factory=dict) flows: List[Dict[str, Any]] = field(default_factory=list) - pages: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) + flows_map: Dict[str, Any] = field(default_factory=dict) + graph: graph_class.Graph = None intents: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) - entity_types: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) + intents_map: Dict[str, Any] = field(default_factory=dict) + pages: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) route_groups: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) + route_groups_map: Dict[str, Any] = field(default_factory=dict) test_cases: List[Dict[str, Any]] = field(default_factory=list) + unreachable_intents: set = field(default_factory=set) + unreachable_pages: Dict[str, set] = field(default_factory=dict) + unused_pages: Dict[str, set] = field(default_factory=dict) webhooks: List[Dict[str, Any]] = field(default_factory=list) - flows_map: Dict[str, Any] = field(default_factory=dict) - flow_page_map: Dict[str, Any] = field(default_factory=dict) - intents_map: Dict[str, Any] = field(default_factory=dict) - entity_types_map: Dict[str, Any] = field(default_factory=dict) - route_groups_map: Dict[str, Any] = field(default_factory=dict) webhooks_map: Dict[str, Any] = field(default_factory=dict) total_flows: int = 0 From cc75794ccccde18c793d4b4f0c4c6f9740053dad Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Wed, 23 Aug 2023 21:09:01 -0500 Subject: [PATCH 111/151] fix: lint fixes --- src/dfcx_scrapi/agent_extract/flows.py | 1 + src/dfcx_scrapi/agent_extract/routes.py | 3 +-- src/dfcx_scrapi/core/agents.py | 6 +++--- src/dfcx_scrapi/core/flows.py | 2 +- src/dfcx_scrapi/tools/agent_checker_util.py | 6 ++---- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/dfcx_scrapi/agent_extract/flows.py b/src/dfcx_scrapi/agent_extract/flows.py index a49cb13d..ab088c1b 100644 --- a/src/dfcx_scrapi/agent_extract/flows.py +++ b/src/dfcx_scrapi/agent_extract/flows.py @@ -267,6 +267,7 @@ def process_flow(self, flow: types.Flow, stats: types.AgentData): flow = self.find_dangling_pages(flow) flow = self.find_unreachable_pages(flow) + stats.active_pages[flow.display_name] = flow.active_pages stats.unused_pages[flow.display_name] = flow.unused_pages stats.unreachable_pages[flow.display_name] = flow.unreachable_pages diff --git a/src/dfcx_scrapi/agent_extract/routes.py b/src/dfcx_scrapi/agent_extract/routes.py index 03b0b309..5eaa5cce 100644 --- a/src/dfcx_scrapi/agent_extract/routes.py +++ b/src/dfcx_scrapi/agent_extract/routes.py @@ -69,8 +69,7 @@ def collect_transition_route_trigger(self, route): if len(trigger) > 0: trigger = "+".join(trigger) - else: - return trigger + return trigger def get_trigger_info(self, route): """Extract trigger info from route based on primary key.""" diff --git a/src/dfcx_scrapi/core/agents.py b/src/dfcx_scrapi/core/agents.py index bdfdf2b1..7e441528 100644 --- a/src/dfcx_scrapi/core/agents.py +++ b/src/dfcx_scrapi/core/agents.py @@ -368,7 +368,7 @@ def export_agent( data_format: str = "BLOB", git_branch: str = None, git_commit_message: str = None, - include_bigquery_export_settings: bool = False + include_bq_export_settings: bool = False ) -> str: """Exports the specified CX agent to Google Cloud Storage bucket. @@ -399,9 +399,9 @@ def export_agent( request = types.agent.ExportAgentRequest() request.name = agent_id request.agent_uri = gcs_bucket_uri - request.include_bigquery_export_settings = include_bigquery_export_settings + request.include_bigquery_export_settings = include_bq_export_settings - if data_format == "JSON" or "ZIP" or "JSON_PACKAGE": + if data_format in ["JSON", "ZIP", "JSON_PACKAGE"]: request.data_format = json_format else: request.data_format = blob_format diff --git a/src/dfcx_scrapi/core/flows.py b/src/dfcx_scrapi/core/flows.py index 08d7967d..da236136 100644 --- a/src/dfcx_scrapi/core/flows.py +++ b/src/dfcx_scrapi/core/flows.py @@ -159,7 +159,7 @@ def get_flow_page_map( for flow in flows_map: pages_map = self.pages.get_pages_map( flows_map[flow], reverse=True) - flow_page_map[flow] = {'id': flows_map[flow], 'pages': pages_map} + flow_page_map[flow] = {"id": flows_map[flow], "pages": pages_map} time.sleep(rate_limit) return flow_page_map diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 78c03501..f326f29e 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -16,17 +16,14 @@ from __future__ import annotations -import time import logging from typing import Dict, List, Optional, Union import pandas as pd from collections import defaultdict from google.cloud.dialogflowcx_v3beta1 import types - from dfcx_scrapi.core import scrapi_base from dfcx_scrapi.agent_extract import agents -from dfcx_scrapi.agent_extract import types as etypes # Type aliases DFCXFlow = types.flow.Flow @@ -649,7 +646,8 @@ def find_all_reachable_intents(self) -> pd.DataFrame: # TODO: Break this into 2 methods # get_unused_intents() // i.e. intents not in use in the agent - # get_unreachable_intents // i.e. intents that overlap with unreachable pages + # get_unreachable_intents // i.e. intents that overlap with unreachable + # pages def find_all_unreachable_intents(self) -> List[str]: """Finds all unreachable intents, either because they are on unreachable pages or they are unused in the agent. Note that From 63e135620d8f943056ca7de019c2c22dfc4a98f9 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Wed, 23 Aug 2023 21:09:40 -0500 Subject: [PATCH 112/151] fix: formatting --- src/dfcx_scrapi/agent_extract/types.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/dfcx_scrapi/agent_extract/types.py b/src/dfcx_scrapi/agent_extract/types.py index fd7ee19a..2a60d8f9 100644 --- a/src/dfcx_scrapi/agent_extract/types.py +++ b/src/dfcx_scrapi/agent_extract/types.py @@ -32,7 +32,6 @@ class AgentMetadata: @dataclass class Flow: """Used to track current Flow Attributes.""" - agent_id: str = None all_pages: set = field(default_factory=set) active_pages: set = field(default_factory=set) From c11721953efc2ef42c2f5473dd6dc4f5b840b72d Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Fri, 25 Aug 2023 08:25:55 -0500 Subject: [PATCH 113/151] feat: add processing for intents; cleanup old code --- src/dfcx_scrapi/agent_extract/flows.py | 1 + src/dfcx_scrapi/agent_extract/routes.py | 34 + src/dfcx_scrapi/agent_extract/types.py | 1 + src/dfcx_scrapi/tools/agent_checker_util.py | 692 +++----------------- 4 files changed, 139 insertions(+), 589 deletions(-) diff --git a/src/dfcx_scrapi/agent_extract/flows.py b/src/dfcx_scrapi/agent_extract/flows.py index ab088c1b..686d9b59 100644 --- a/src/dfcx_scrapi/agent_extract/flows.py +++ b/src/dfcx_scrapi/agent_extract/flows.py @@ -258,6 +258,7 @@ def process_flow(self, flow: types.Flow, stats: types.AgentData): flow.start_page_file = f"{flow.dir_path}/{flow.file_name}.json" stats.pages[flow.display_name] = [] + stats.active_intents[flow.display_name] = set() stats = self.process_start_page(flow, stats) stats = self.pages.process_pages_directory(flow, stats) stats = self.rgs.process_route_groups_directory(flow, stats) diff --git a/src/dfcx_scrapi/agent_extract/routes.py b/src/dfcx_scrapi/agent_extract/routes.py index 5eaa5cce..e530933d 100644 --- a/src/dfcx_scrapi/agent_extract/routes.py +++ b/src/dfcx_scrapi/agent_extract/routes.py @@ -55,6 +55,34 @@ def check_for_webhook_event_handlers(route: types.Fulfillment): ): route.page.has_webhook_event_handler = True + @staticmethod + def check_for_intent(route: types.Fulfillment): + """Check route data to see if Intent is present.""" + intent = None + if "intent" in route.data: + intent = route.data["intent"] + + return intent + + @staticmethod + def check_intent_map(intent: str, stats: types.AgentData): + """Check to see if intent is currently in map.""" + res = stats.intents_page_map.get(intent) + if not res: + stats.intents_page_map[intent] = set() + + def process_intents_in_routes( + self, route: types.Fulfillment, stats: types.AgentData): + intent = self.check_for_intent(route) + if intent: + stats.active_intents[ + route.page.flow.display_name].add(intent) + + self.check_intent_map(intent, stats) + stats.intents_page_map[intent].add(route.page.display_name) + + return stats + def collect_transition_route_trigger(self, route): """Inspect route and return all Intent/Condition info.""" @@ -201,6 +229,8 @@ def process_reprompt_handlers( path = route.data.get("triggerFulfillment", None) event = route.data.get("event", None) + stats = self.process_intents_in_routes(route, stats) + if not path and not event: continue @@ -227,6 +257,8 @@ def process_events(self, page: types.Page, stats: types.AgentData): path = route.data.get("triggerFulfillment", None) event = route.data.get("event", None) + stats = self.process_intents_in_routes(route, stats) + if not path and not event: continue @@ -253,6 +285,8 @@ def process_routes(self, page: types.Page, stats: types.AgentData): route.trigger = self.get_trigger_info(route) route, stats = self.set_route_targets(route, stats) + stats = self.process_intents_in_routes(route, stats) + path = route.data.get(tf_key, None) if not path: diff --git a/src/dfcx_scrapi/agent_extract/types.py b/src/dfcx_scrapi/agent_extract/types.py index 2a60d8f9..beefdaa0 100644 --- a/src/dfcx_scrapi/agent_extract/types.py +++ b/src/dfcx_scrapi/agent_extract/types.py @@ -191,6 +191,7 @@ class AgentData: graph: graph_class.Graph = None intents: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) intents_map: Dict[str, Any] = field(default_factory=dict) + intents_page_map: Dict[str, set] = field(default_factory=dict) pages: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) route_groups: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) route_groups_map: Dict[str, Any] = field(default_factory=dict) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index f326f29e..64208cfb 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -1,4 +1,4 @@ -"""A set of Utility methods to check DFCX Agents.""" +"""A set of Utility methods to check resources stats on DFCX Agents.""" # Copyright 2023 Google LLC # @@ -17,9 +17,8 @@ from __future__ import annotations import logging -from typing import Dict, List, Optional, Union +from typing import Dict, List import pandas as pd -from collections import defaultdict from google.cloud.dialogflowcx_v3beta1 import types from dfcx_scrapi.core import scrapi_base @@ -66,565 +65,83 @@ def __init__( "Current Page", "Previous Page", ] - - def _get_intent_parameters(self, intent_name): - """Gets the parameters for a particular intent, by display name""" - for intent in self._intent_data: - if intent.display_name == intent_name: - return intent.parameters - return None - - def _get_page( - self, - flow_name: str, - page_name: str - ) -> Union[DFCXPage, DFCXFlow]: - """Gets the page data for a specified page within - a specified flow. The flow and page can be specified - by ID or by display name. - - Args: - flow_name: The display name of the flow - page_name: The display name of the page - - Returns: - A DFCX Page object for this page, - or DFCX Flow object if it's the start page - - Raises: - KeyError, if the page is not found - """ - # Look up flow ID - flow_id = self._flows_map_rev.get(flow_name, None) - if not flow_id: - raise KeyError(f"Flow not found: {flow_name}") - # Now that flow_id is set, look up the page - # Special case for the start page - if page_name == "Start": - return self._flow_data[flow_id] - # Look up page ID - page_id = self._pages_map_rev[flow_id].get(page_name, None) - if not page_id: - raise KeyError(f"Page not found: {page_name}") - return self._page_data[flow_id][page_id] - - - def _continue_page_recursion( - self, - page: Union[DFCXPage, DFCXFlow], - page_name: str, - route: DFCXRoute, - target_page: str, - params: Dict - ) -> None: - if page_name not in params["reachable"]: - params["reachable"].append(page_name) - params["min_intent_counts"].append(params["intent_route_count"]) - else: - # Better route found, traverse from here - params["min_intent_counts"][ - params["reachable"].index(page_name) - ] = params["intent_route_count"] - - params["conversation_path"].append(page_name) - if params["verbose"]: - logging.info(params["conversation_path"], - params["intent_route_count"]) - - old_presets = params["presets"].copy() - new_presets = self._get_new_presets(params["presets"], page, route) - if "START_PAGE" in target_page: - next_page = self._flow_data[params["flow_id"]] - else: - next_page = self._page_data[params["flow_id"]][target_page] - params["presets"] = new_presets - - self._find_reachable_pages_rec(next_page, params) # is_initial=False - - params["conversation_path"].pop(-1) - # pop presets since we can't if we're passing a params dict like this - params["presets"] = old_presets - - def _handle_meta_page( - self, - page: Union[DFCXPage, DFCXFlow], - target_page: str, - params: Dict - ) -> None: - page_name = page.display_name - if "END_SESSION" in target_page: - page_name = "END SESSION" - elif "END_FLOW" in target_page: - page_name = "END FLOW" - elif "PREVIOUS_PAGE" in target_page: - page_name = "PREVIOUS PAGE" - #elif "CURRENT_PAGE" in target_page: - # page_name = page.display_name - - if params["verbose"]: - logging.info(page.display_name, "->", page_name) - # Only include the special "pages" like END SESSION - # if the include_meta parameter is set. - if page_name == page.display_name or params["include_meta"]: - if page_name not in params["reachable"]: - params["reachable"].append(page_name) - params["min_intent_counts"].append( - params["intent_route_count"] - ) - elif ( - page_name in params["reachable"] - and params["intent_route_count"] - < params["min_intent_counts"][ - params["reachable"].index(page_name) - ] - ): - params["min_intent_counts"][ - params["reachable"].index(page_name) - ] = params["intent_route_count"] - - def _find_reachable_pages_rec_helper( - self, - page: Union[DFCXPage, DFCXFlow], - route: DFCXRoute, - params: Dict, - # Having a default of False is absolutely critical - is_initial: bool = False - ) -> None: - """Helper function for the recursion involved in - finding reachable pages - """ - target_page = route.target_page - target_flow = route.target_flow - if ( - getattr(route, "intent", "") != "" - and params["intent_route_limit"] - and params["intent_route_count"] >= params["intent_route_limit"] - ): - return - if isinstance(page, DFCXPage): - for parameter in page.form.parameters: - parameter_name = parameter.display_name - # Need to also account for parameters being - # set by intents (or by webhooks...) - if ( - parameter_name not in params["presets"] - or params["presets"][parameter_name] == "NULL" - ): - # This page has an unfilled parameter - if (params["limit_intent_to_initial"] - and not is_initial - ): - return - if getattr(route, "intent", "") != "": - if params["limit_intent_to_initial"] and not is_initial: - # Don't continue on this path - return - params["intent_route_count"] += 1 - if target_page in self._page_data[params["flow_id"]]: - page_name = self._page_data[params["flow_id"]][ - target_page - ].display_name - if params["verbose"]: - logging.info(page.display_name, "->", page_name) - # Move to this page (this is also the recursion limiting step - # to prevent infinite loops) - if ( - page_name not in params["reachable"] - or (page_name in params["reachable"] - and params["intent_route_count"] - < params["min_intent_counts"][ - params["reachable"].index(page_name) - ]) - ): - self._continue_page_recursion(page, page_name, route, - target_page, params) - elif ("END_FLOW" in target_page - or "END_SESSION" in target_page - or "PREVIOUS_PAGE" in target_page - or "CURRENT_PAGE" in target_page - ): - self._handle_meta_page(page, target_page, params) - elif "START_PAGE" in target_page: - if params["verbose"]: - logging.info(page.display_name, "-> START PAGE") - page_name = "Start" - if (page_name not in params["reachable"] - or (page_name in params["reachable"] - and params["intent_route_count"] - < params["min_intent_counts"][ - params["reachable"].index(page_name) - ]) - ): - self._continue_page_recursion(page, page_name, route, - target_page, params) - elif len(target_page) > 0: - logging.info(page.display_name, "->", target_page) - # This should not happen, and if it does it needs to be fixed - logging.error(f"Page target not in list of pages: {target_page}") - elif len(target_flow) > 0: - flow_name = self._flows_map[route.target_flow] - if params["verbose"]: - logging.info(page.display_name, "->", flow_name) - if flow_name not in params["reachable"]: - params["reachable"].append(flow_name) - params["min_intent_counts"].append( - params["intent_route_count"] - ) - elif ( - flow_name in params["reachable"] - and params["intent_route_count"] - < params["min_intent_counts"][ - params["reachable"].index(flow_name) - ] - ): - params["min_intent_counts"][ - params["reachable"].index(flow_name) - ] = params["intent_route_count"] - else: - if params["verbose"]: - logging.info(page.display_name, "->", - route.target_flow, "(empty)") - page_name = page.display_name - if ( - page_name in params["reachable"] - and params["intent_route_count"] - < params["min_intent_counts"][ - params["reachable"].index(page_name) - ] - ): - params["min_intent_counts"][ - params["reachable"].index(page_name) - ] = params["intent_route_count"] - - def _get_new_presets(self, presets, page, route): - """Gets parameter presets that have been added on a given route. - """ - new_presets = presets.copy() - if isinstance(page, DFCXPage): - for preset in page.entry_fulfillment.set_parameter_actions: - new_presets[preset.parameter] = preset.value - for parameter in page.form.parameters: - ipf = parameter.fill_behavior.initial_prompt_fulfillment - for preset in ipf.set_parameter_actions: - new_presets[preset.parameter] = preset.value - for preset in route.trigger_fulfillment.set_parameter_actions: - new_presets[preset.parameter] = preset.value - if getattr(route, "intent", "") != "": - # Check the entities annotated on this intent - intent_name = self._intents_map[route.intent] - intent_params = self._get_intent_parameters(intent_name) - for param in intent_params: - new_presets[param.id] = f"(potentially set by {intent_name})" - return new_presets - - def _find_reachable_pages_rec( - self, - page: Union[DFCXPage, DFCXFlow], - params: Dict, - # Having a default of False is absolutely critical - is_initial: bool = False - ) -> None: - """Recursive function to find reachable pages within a given flow, - starting at a particular page. Other parameters here are used for - more general traversal options.""" - if isinstance(page, DFCXPage): - for parameter in page.form.parameters: - self._process_form_parameter_for_reachable_pages( - page, - parameter, - params, - is_initial=is_initial) - for event_handler in page.event_handlers: - if params["limit_intent_to_initial"] and not is_initial: - continue - if (event_handler.target_page != "" - or event_handler.target_flow != ""): - self._find_reachable_pages_rec_helper(page, - event_handler, - params, - is_initial=is_initial) - for route in page.transition_routes: - self._find_reachable_pages_rec_helper(page, - route, - params, - is_initial=is_initial) - if params["include_groups"]: - for route_group in page.transition_route_groups: - for route in self._route_group_data[params["flow_id"]][ - route_group - ].transition_routes: - self._find_reachable_pages_rec_helper(page, - route, - params, - is_initial=is_initial - ) - # Start page routes and route groups are also accessible from this page - if ( - params["include_start_page_routes"] - and page.display_name != params["flow_name"] - and (not params["limit_intent_to_initial"] or is_initial) - ): - self._process_start_page_routes_for_reachable_pages(params, - is_initial=is_initial) - - def _process_form_parameter_for_reachable_pages( - self, - page: Union[DFCXPage, DFCXFlow], - parameter, # TODO: Data type for DFCX Parameter - params: Dict, - is_initial: bool = False - ) -> None: - for event_handler in parameter.fill_behavior.reprompt_event_handlers: - if params["limit_intent_to_initial"] and not is_initial: - continue - if (event_handler.target_page != "" - or event_handler.target_flow != ""): - self._find_reachable_pages_rec_helper(page, - event_handler, - params, - is_initial=is_initial) - - def _process_start_page_routes_for_reachable_pages( - self, - params: Dict, - is_initial: bool = False - ): - page = self._flow_data[params["flow_id"]] - for event_handler in page.event_handlers: - if (event_handler.target_page != "" - or event_handler.target_flow != ""): - self._find_reachable_pages_rec_helper(page, - event_handler, - params, - is_initial=is_initial) - for route in page.transition_routes: - if route.intent: - self._find_reachable_pages_rec_helper( - page, route, params, is_initial=is_initial) - if params["include_groups"]: - for route_group in page.transition_route_groups: - for route in self._route_group_data[params["flow_id"]][ - route_group - ].transition_routes: - if route.intent: - self._find_reachable_pages_rec_helper( - page, route, params, is_initial=is_initial) - - def find_reachable_pages( - self, - flow_name: str, - from_page: str = "Start", - intent_route_limit: Optional[int] = None, - include_groups: bool = True, - include_start_page_routes: bool = True, - include_meta: bool = False, - verbose: bool = False, - ) -> List[str]: - """Finds all pages which are reachable by transition routes, - starting from a given page in a given flow. Either flow_id or - flow_name must be used. - - Args: - flow_name: The display name of the flow. - from_page: (Optional) The page to start from. If left blank, it will - start on the Start Page of the given flow. - intent_route_limit: (Optional) Default None. The maximum number of - intent routes to take. This can be used to answer questions like - "which pages can I reach within N turns, starting at this page?" - include_groups: (Optional) If true, intents from transition route - groups will be included, but only if they are actually referenced - on each given page in the traversal. - include_start_page_routes: (Optional) Default true. If true, intent - routes on the start page are always considered in scope. This is - how DFCX normally behaves. - include_meta: (Optional) Default False. If true, includes special - transition targets like End Session, End Flow, etc. as if they - are actual pages. - verbose: (Optional) If true, prints debug information about - route traversal. - - Returns: - The list of reachable pages in this flow - """ - flow_id = self._flows_map_rev.get(flow_name, None) - if not flow_id: - raise KeyError(f"Flow not found: {flow_name}") - - # Start at the start page... - reachable = [from_page] - conversation_path = [from_page] - # Technically this could be [0] or [1], or very rarely more than 1, - # depending on the routes that lead to current page... - min_intent_counts = [25] - presets = {} - page_data = self._get_page( - flow_name=flow_name, - page_name=from_page - ) - params = { - "flow_id": flow_id, - "flow_name": flow_name, - "reachable": reachable, - "conversation_path": conversation_path, - "min_intent_counts": min_intent_counts, - "presets": presets, - "intent_route_limit": intent_route_limit, - "intent_route_count": 0, - "include_groups": include_groups, - "include_start_page_routes": include_start_page_routes, - "limit_intent_to_initial": False, - # This can't be stored here unless I want to add a lot of complex - # conditions to change it to False and back depending on the level - # of recursion - #"is_initial": True, - "include_meta": include_meta, - "verbose": verbose - } - self._find_reachable_pages_rec(page_data, params, is_initial=True) - return reachable - - def find_all_reachable_pages( - self, - include_groups: bool = True, - verbose: bool = False, - ): - """Gets a dataframe of all reachable pages in this agent - - Args: - include_groups: whether or not to consider route group routes - as being reachable. Defaults to True. - verbose: whether to display debug info in the agent structure - traversal. Defaults to False. - - Returns: - A dataframe with columns flow_name and page_name - """ - flow_names = [] - page_names = [] - for flow_name in self._flows_map_rev: - reachable = self.find_reachable_pages( - flow_name=flow_name, - include_groups=include_groups, - verbose=verbose - ) - flow_names.extend([flow_name for _ in reachable]) - page_names.extend(reachable) - return pd.DataFrame({"flow_name": flow_names, "page_name": page_names}) - - def _get_intents_from_routes( - self, - transition_list: List[DFCXRoute], - route_group - ) -> Dict[str, List[str]]: - """Helper function which adds intents from routes to a list of intents - - Args: - transition_list: The list of transition routes - route_group (Optional): The route group where the route is - located. - - Returns: - A dictionary with keys 'intents' and 'routegroups' which each contain - a list of intent/route group names to be added - """ - intents = [] - routegroups = [] - for route in transition_list: - # Ignore empty intents (such as the true condition) - if len(route.intent) == 0: - continue - intent = self._intents_map[route.intent] - if intent not in intents: - intents.append(intent) - if route_group is not None: - routegroups.append(route_group.display_name) - else: - routegroups.append("") - return { - "intents": intents, - "routegroups": routegroups - } - - def _get_page_intents( - self, - flow_name: str, - page_name: str, - include_groups: bool = True - ) -> List[str]: - """Get the list of intents for a given page of this flow. - - Args: - flow_name: The display name of the flow - page_name: The display name of the page - include_groups (Optional): If true, intents from transition route - groups on the given page will be included - - Returns: - List of intent names - """ - page = self._get_page(flow_name=flow_name, - page_name=page_name) - - page_intents = [] - page_routegroups = [] - transition_list = page.transition_routes - route_intent_dict = self._get_intents_from_routes(transition_list,None) - page_intents.extend(route_intent_dict["intents"]) - page_routegroups.extend(route_intent_dict["routegroups"]) - - flow_id = self._flows_map_rev.get(flow_name, None) - if not flow_id: - raise KeyError(f"Flow not found: {flow_name}") - - # Get intents in transition route groups - if include_groups: - for route_group_id in page.transition_route_groups: - route_group = self._route_group_data[flow_id][route_group_id] - route_intent_dict = self._get_intents_from_routes( - route_group.transition_routes, - route_group - ) - page_intents.extend(route_intent_dict["intents"]) - page_routegroups.extend(route_intent_dict["routegroups"]) - - return pd.DataFrame({ - "route group": page_routegroups, - "intent": page_intents - }) - - def find_reachable_intents( - self, - flow_name: str, - include_groups: bool = True - ) -> List[str]: - """Finds all intents which are on reachable pages, starting from the - start page of the given flow. - - Args: - flow_name: The name of the flow to check for reachable intents. - include_groups (Optional): If true, intents from transition route - groups will be included, but only if they are actually referenced - on some page. - - Returns: - The list of intents on reachable pages in this flow - """ - intents = set() - reachable_pages = self.find_reachable_pages( - flow_name=flow_name, - include_groups=include_groups) - for page_name in reachable_pages: - if page_name not in self._flows_map_rev: - page_intents = set(self._get_page_intents( - flow_name=flow_name, - page_name=page_name, - include_groups=include_groups - )["intent"]) - intents.update(page_intents) - return list(intents) - - def find_all_reachable_intents(self) -> pd.DataFrame: - """Finds all intents referenced in the agent, across all flows, + self.active_intents_df = pd.DataFrame() + + # def find_reachable_pages( + # self, + # flow_name: str, + # from_page: str = "Start", + # intent_route_limit: Optional[int] = None, + # include_groups: bool = True, + # include_start_page_routes: bool = True, + # include_meta: bool = False, + # verbose: bool = False, + # ) -> List[str]: + # """Finds all pages which are reachable by transition routes, + # starting from a given page in a given flow. Either flow_id or + # flow_name must be used. + + # Args: + # flow_name: The display name of the flow. + # from_page: (Optional) The page to start from. If left blank, it will + # start on the Start Page of the given flow. + # intent_route_limit: (Optional) Default None. The maximum number of + # intent routes to take. This can be used to answer questions like + # "which pages can I reach within N turns, starting at this page?" + # include_groups: (Optional) If true, intents from transition route + # groups will be included, but only if they are actually referenced + # on each given page in the traversal. + # include_start_page_routes: (Optional) Default true. If true, intent + # routes on the start page are always considered in scope. This is + # how DFCX normally behaves. + # include_meta: (Optional) Default False. If true, includes special + # transition targets like End Session, End Flow, etc. as if they + # are actual pages. + # verbose: (Optional) If true, prints debug information about + # route traversal. + + # Returns: + # The list of reachable pages in this flow + # """ + # flow_id = self._flows_map_rev.get(flow_name, None) + # if not flow_id: + # raise KeyError(f"Flow not found: {flow_name}") + + # # Start at the start page... + # reachable = [from_page] + # conversation_path = [from_page] + # # Technically this could be [0] or [1], or very rarely more than 1, + # # depending on the routes that lead to current page... + # min_intent_counts = [25] + # presets = {} + # page_data = self._get_page( + # flow_name=flow_name, + # page_name=from_page + # ) + # params = { + # "flow_id": flow_id, + # "flow_name": flow_name, + # "reachable": reachable, + # "conversation_path": conversation_path, + # "min_intent_counts": min_intent_counts, + # "presets": presets, + # "intent_route_limit": intent_route_limit, + # "intent_route_count": 0, + # "include_groups": include_groups, + # "include_start_page_routes": include_start_page_routes, + # "limit_intent_to_initial": False, + # # This can't be stored here unless I want to add a lot of complex + # # conditions to change it to False and back depending on the level + # # of recursion + # #"is_initial": True, + # "include_meta": include_meta, + # "verbose": verbose + # } + # self._find_reachable_pages_rec(page_data, params, is_initial=True) + # return reachable + + def active_intents_to_dataframe(self) -> pd.DataFrame: + """Gets all intents referenced in the agent, across all flows, and produces a dataframe listing which flows reference each intent. Returns: @@ -632,34 +149,31 @@ def find_all_reachable_intents(self) -> pd.DataFrame: intent - the intent display name flows - a list of flow display names that use this intent """ - intents = defaultdict(lambda: []) - for flow_name in self._flows_map_rev: - flow_intents = self.find_reachable_intents(flow_name=flow_name, - include_groups=True) - for intent in flow_intents: - intents[intent].append(flow_name) + df = pd.DataFrame({"intent": [], "flow": []}) + for flow in self.data.active_intents: + for intent in self.data.active_intents[flow]: + temp = pd.DataFrame({"intent": [intent], "flow": [flow]}) + df = pd.concat([df, temp]) - return pd.DataFrame({ - "intent": intents.keys(), - "flows": intents.values() - }) + self.active_intents_df = df.reset_index(drop=True) - # TODO: Break this into 2 methods - # get_unused_intents() // i.e. intents not in use in the agent - # get_unreachable_intents // i.e. intents that overlap with unreachable - # pages - def find_all_unreachable_intents(self) -> List[str]: - """Finds all unreachable intents, either because they are on - unreachable pages or they are unused in the agent. Note that - Default Negative Intent will always show up here. + return self.active_intents_df - Returns: - A list of unreachable intent display names + def get_unused_intents(self) -> List: + """Get all unused Intents across the agent.""" + if self.active_intents_df.empty: + self.active_intents_df = self.active_intents_to_dataframe() + active_intents_set = set(self.active_intents_df.intent.to_list()) + all_intents_set = set(self.data.intents_map.keys()) + + return list(all_intents_set.difference(active_intents_set)) + + def get_unreachable_intents(self) -> List: + """Get all unreachable Intents across the agent. + + An Intent is unreachable if it resides on a page that is also + unreachable. """ - all_reachable_intents = set() - for flow_name in self._flows_map_rev: - flow_intents = self.find_reachable_intents(flow_name=flow_name, - include_groups=True) - all_reachable_intents.update(set(flow_intents)) - all_intents = {intent.display_name for intent in self._intent_data} - return list(all_intents - all_reachable_intents) + # Get Page / Intent mapping + # Find all unreachable pages + # From 81a78590ebb6066b66f1a3084992de0343dc95a6 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Fri, 25 Aug 2023 08:26:33 -0500 Subject: [PATCH 114/151] fix: add logging; handle lros; add lang_code support --- src/dfcx_scrapi/agent_extract/agents.py | 55 ++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/src/dfcx_scrapi/agent_extract/agents.py b/src/dfcx_scrapi/agent_extract/agents.py index 190c5946..4cd53c4b 100644 --- a/src/dfcx_scrapi/agent_extract/agents.py +++ b/src/dfcx_scrapi/agent_extract/agents.py @@ -14,11 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import time import os from typing import Dict from dfcx_scrapi.core import agents +from dfcx_scrapi.core import operations from dfcx_scrapi.core import scrapi_base from dfcx_scrapi.agent_extract import graph from dfcx_scrapi.agent_extract import flows @@ -29,11 +31,19 @@ from dfcx_scrapi.agent_extract import gcs_utils from dfcx_scrapi.agent_extract import types +# logging config +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)-8s %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", +) + class Agents(scrapi_base.ScrapiBase): """Agent Metadata methods and functions.""" def __init__( self, agent_id: str, + lang_code: str = "en", creds_path: str = None, creds_dict: Dict = None, creds=None, @@ -46,6 +56,7 @@ def __init__( scope=scope, ) self.agent_id = agent_id + self.lang_code = lang_code self._core_agents = agents.Agents(creds=creds) self.gcs = gcs_utils.GcsUtils() self.flows = flows.Flows() @@ -53,26 +64,59 @@ def __init__( self.etypes = entity_types.EntityTypes() self.webhooks = webhooks.Webhooks() self.tcs = test_cases.TestCases() + self.ops = operations.Operations() - def process_agent(self, agent_id: str, gcs_bucket_uri: str, + def await_lro(self, lro: str): + """Wait for long running operation to complete.""" + try: + i = 0 + while not self.ops.get_lro(lro).done: + time.sleep(1) + i += 1 + if i == 20: + break + + except UserWarning as uw: + uw("LRO Failed.") + + return True + + def export_agent(self, agent_id: str, gcs_bucket_uri: str, environment_display_name: str = None): - """Process the specified Agent for offline data gathering.""" - agent_local_path = "tmp/agent" - _ = self._core_agents.export_agent( + """Handle the agent export, LRO and logging.""" + logging.info("Exporting agent...") + lro = self._core_agents.export_agent( agent_id=agent_id,gcs_bucket_uri=gcs_bucket_uri, data_format="JSON", environment_display_name=environment_display_name) + + self.await_lro(lro) + logging.info("Export Complete.") + + def download_and_extract(self, agent_local_path: str, gcs_bucket_uri: str): + """Handle download from GCS and extracting ZIP file.""" if not os.path.exists(agent_local_path): os.makedirs(agent_local_path) - time.sleep(2) + logging.info("Downloading agent file from GCS Bucket...") agent_file = self.gcs.download_gcs( gcs_path=gcs_bucket_uri, local_path=agent_local_path) + logging.info("Download complete.") self.gcs.unzip(agent_file, agent_local_path) + + def process_agent(self, agent_id: str, gcs_bucket_uri: str, + environment_display_name: str = None): + """Process the specified Agent for offline data gathering.""" + agent_local_path = "tmp/agent" + self.export_agent(agent_id, gcs_bucket_uri, environment_display_name) + self.download_and_extract(agent_local_path, gcs_bucket_uri) + + logging.info("Processing Agent...") data = types.AgentData() data.graph = graph.Graph() + data.lang_code = self.lang_code data.agent_id = agent_id data = self.flows.process_flows_directory(agent_local_path, data) data = self.intents.process_intents_directory(agent_local_path, data) @@ -80,5 +124,6 @@ def process_agent(self, agent_id: str, gcs_bucket_uri: str, agent_local_path, data) data = self.webhooks.process_webhooks_directory(agent_local_path, data) data = self.tcs.process_test_cases_directory(agent_local_path, data) + logging.info("Processing Complete.") return data From be7fd26ae7d296d44d3a1316830fbd41f4a107e2 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Fri, 25 Aug 2023 14:04:01 -0500 Subject: [PATCH 115/151] fix: added dir cleanup to avoid local file conflicts --- src/dfcx_scrapi/agent_extract/agents.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/dfcx_scrapi/agent_extract/agents.py b/src/dfcx_scrapi/agent_extract/agents.py index 4cd53c4b..043577ca 100644 --- a/src/dfcx_scrapi/agent_extract/agents.py +++ b/src/dfcx_scrapi/agent_extract/agents.py @@ -17,6 +17,7 @@ import logging import time import os +import shutil from typing import Dict from dfcx_scrapi.core import agents @@ -66,6 +67,17 @@ def __init__( self.tcs = test_cases.TestCases() self.ops = operations.Operations() + @staticmethod + def prep_local_dir(agent_local_path: str): + """Prepare the local directory for agent zip file.""" + if os.path.isdir(agent_local_path): + logging.info("Cleaning up old directory...") + shutil.rmtree(agent_local_path) + logging.info(f"Making temp directory: {agent_local_path}") + os.mkdir(agent_local_path) + else: + os.mkdir(agent_local_path) + def await_lro(self, lro: str): """Wait for long running operation to complete.""" try: @@ -84,6 +96,7 @@ def await_lro(self, lro: str): def export_agent(self, agent_id: str, gcs_bucket_uri: str, environment_display_name: str = None): """Handle the agent export, LRO and logging.""" + EXPORT_START = time.time() logging.info("Exporting agent...") lro = self._core_agents.export_agent( agent_id=agent_id,gcs_bucket_uri=gcs_bucket_uri, data_format="JSON", @@ -92,16 +105,19 @@ def export_agent(self, agent_id: str, gcs_bucket_uri: str, self.await_lro(lro) logging.info("Export Complete.") + logging.debug(f"EXPORT: {time.time() - EXPORT_START}") def download_and_extract(self, agent_local_path: str, gcs_bucket_uri: str): """Handle download from GCS and extracting ZIP file.""" if not os.path.exists(agent_local_path): os.makedirs(agent_local_path) + DOWNLOAD_START = time.time() logging.info("Downloading agent file from GCS Bucket...") agent_file = self.gcs.download_gcs( gcs_path=gcs_bucket_uri, local_path=agent_local_path) logging.info("Download complete.") + logging.debug(f"DOWNLOAD: {time.time() - DOWNLOAD_START}") self.gcs.unzip(agent_file, agent_local_path) @@ -110,6 +126,7 @@ def process_agent(self, agent_id: str, gcs_bucket_uri: str, environment_display_name: str = None): """Process the specified Agent for offline data gathering.""" agent_local_path = "tmp/agent" + self.prep_local_dir(agent_local_path) self.export_agent(agent_id, gcs_bucket_uri, environment_display_name) self.download_and_extract(agent_local_path, gcs_bucket_uri) From 5f035d719c934383bb5d3df09ca882d33fda803b Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Fri, 25 Aug 2023 14:04:37 -0500 Subject: [PATCH 116/151] fix: fixed display_name parsing for Intents/Entity Types --- src/dfcx_scrapi/agent_extract/common.py | 7 +++++++ src/dfcx_scrapi/agent_extract/entity_types.py | 16 +++++----------- src/dfcx_scrapi/agent_extract/intents.py | 17 ++++++----------- 3 files changed, 18 insertions(+), 22 deletions(-) diff --git a/src/dfcx_scrapi/agent_extract/common.py b/src/dfcx_scrapi/agent_extract/common.py index 4b1a6605..26b86ff3 100644 --- a/src/dfcx_scrapi/agent_extract/common.py +++ b/src/dfcx_scrapi/agent_extract/common.py @@ -16,6 +16,7 @@ import logging import re +from dfcx_scrapi.agent_extract import types # logging config logging.basicConfig( @@ -53,6 +54,7 @@ def clean_display_name(display_name: str): "%27": "'", "%28": "(", "%29": ")", + "%2b": "+", "%2c": ",", "%2f": "/", "%3a": ":", @@ -71,3 +73,8 @@ def clean_display_name(display_name: str): display_name = display_name.replace(key, value) return display_name + + @staticmethod + def check_lang_code(lang_code: str, stats: types.AgentData): + """Check to see if file lang_code matches user input lang_code.""" + return stats.lang_code == lang_code diff --git a/src/dfcx_scrapi/agent_extract/entity_types.py b/src/dfcx_scrapi/agent_extract/entity_types.py index fa494edd..17469b58 100644 --- a/src/dfcx_scrapi/agent_extract/entity_types.py +++ b/src/dfcx_scrapi/agent_extract/entity_types.py @@ -87,15 +87,6 @@ def process_entity_type_metadata(etype: types.EntityType): etype_file.close() - @staticmethod - def check_lang_code(lang_code: str, stats: types.AgentData): - """Check to see if lang_code already exists in dict, or create it.""" - res = stats.entity_types.get(lang_code, None) - if not res: - stats.entity_types[lang_code] = [] - - return stats - def process_excluded_phrases_language_codes( self, data: Dict[str, str], lang_code_path: str): """Process all ecluded phrases lang_code files.""" @@ -120,7 +111,9 @@ def process_language_codes( """Process all Entity Type lang_code files.""" for lang_code in etype.entities: ent_file_path = etype.entities[lang_code]["file_path"] - stats = self.check_lang_code(lang_code, stats) + + if not self.common.check_lang_code(lang_code, stats): + continue with open(ent_file_path, "r", encoding="UTF-8") as ent_file: data = json.load(ent_file) @@ -130,7 +123,7 @@ def process_language_codes( data["kind"] = etype.kind data["entities"] = data.get("entities", None) data = self.process_excluded_phrases(etype, lang_code, data) - stats.entity_types[lang_code].append(data) + stats.entity_types.append(data) ent_file.close() @@ -150,6 +143,7 @@ def process_entity_type( etype.display_name = self.common.parse_filepath( etype.dir_path, "entity_type") + etype.display_name = self.common.clean_display_name(etype.display_name) self.process_entity_type_metadata(etype) stats = self.process_entities(etype, stats) diff --git a/src/dfcx_scrapi/agent_extract/intents.py b/src/dfcx_scrapi/agent_extract/intents.py index be10aee3..e8d46dca 100644 --- a/src/dfcx_scrapi/agent_extract/intents.py +++ b/src/dfcx_scrapi/agent_extract/intents.py @@ -72,15 +72,6 @@ def build_intent_path_list(agent_local_path: str): return intent_paths - @staticmethod - def check_lang_code(lang_code: str, stats: types.AgentData): - """Check to see if lang_code already exists in dict, or create it.""" - res = stats.intents.get(lang_code, None) - if not res: - stats.intents[lang_code] = [] - - return stats - def process_intent_metadata( self, intent: types.Intent): """Process the metadata file for a single Intent.""" @@ -106,7 +97,8 @@ def process_language_codes( for lang_code in intent.training_phrases: tp_file = intent.training_phrases[lang_code]["file_path"] - stats = self.check_lang_code(lang_code, stats) + if not self.common.check_lang_code(lang_code, stats): + continue with open(tp_file, "r", encoding="UTF-8") as tps: data = json.load(tps) @@ -115,7 +107,7 @@ def process_language_codes( data["labels"] = intent.labels data["description"] = intent.description data["parameters"] = intent.parameters - stats.intents[lang_code].append(data) + stats.intents.append(data) stats.total_training_phrases += len(data["trainingPhrases"]) tps.close() @@ -135,6 +127,8 @@ def process_intent(self, intent: types.Intent, stats: types.AgentData): """Process a single Intent directory and associated files.""" intent.display_name = self.common.parse_filepath( intent.dir_path, "intent") + intent.display_name = self.common.clean_display_name( + intent.display_name) self.process_intent_metadata(intent) stats = self.process_training_phrases(intent, stats) @@ -159,6 +153,7 @@ def process_intents_directory( """ # Create a list of all Intent paths to iter through intent_paths = self.build_intent_path_list(agent_local_path) + stats.intents = [] for intent_path in intent_paths: intent = types.Intent() From c73992091b6bc0ca3695de3cdb1c4ac4d136e6c2 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Fri, 25 Aug 2023 14:05:26 -0500 Subject: [PATCH 117/151] fix: add lang_code support; fix class type outputs --- src/dfcx_scrapi/agent_extract/types.py | 5 +++-- src/dfcx_scrapi/tools/agent_checker_util.py | 7 +++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/dfcx_scrapi/agent_extract/types.py b/src/dfcx_scrapi/agent_extract/types.py index beefdaa0..060481d4 100644 --- a/src/dfcx_scrapi/agent_extract/types.py +++ b/src/dfcx_scrapi/agent_extract/types.py @@ -183,15 +183,16 @@ class AgentData: active_intents: Dict[str, set] = field(default_factory=dict) active_pages: Dict[str, set] = field(default_factory=dict) agent_id: str = None - entity_types: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) + entity_types: List[Dict[str, Any]] = field(default_factory=list) entity_types_map: Dict[str, Any] = field(default_factory=dict) flow_page_map: Dict[str, Any] = field(default_factory=dict) flows: List[Dict[str, Any]] = field(default_factory=list) flows_map: Dict[str, Any] = field(default_factory=dict) graph: graph_class.Graph = None - intents: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) + intents: List[Dict[str, Any]] = field(default_factory=list) intents_map: Dict[str, Any] = field(default_factory=dict) intents_page_map: Dict[str, set] = field(default_factory=dict) + lang_code: str = "en" pages: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) route_groups: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) route_groups_map: Dict[str, Any] = field(default_factory=dict) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 64208cfb..81fa2e4c 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -14,9 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import annotations - import logging +import time from typing import Dict, List import pandas as pd @@ -55,9 +54,13 @@ def __init__( scope=scope, ) + STARTUP_TIME = time.time() self.agent_id = agent_id self.extract = agents.Agents(agent_id) + PROCESSING_TIME = time.time() + logging.debug(f"STARTUP: {PROCESSING_TIME - STARTUP_TIME}") self.data = self.extract.process_agent(agent_id, gcs_bucket_uri) + logging.debug(f"TOTAL PROCESSING: {time.time() - PROCESSING_TIME}") self.special_pages = [ "End Session", "End Flow", From 66f75780a50e800d5eb8e8f9aadd27943e434fcc Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Fri, 25 Aug 2023 15:28:22 -0500 Subject: [PATCH 118/151] feat: implement recursion method for finding reachable pages in graph --- src/dfcx_scrapi/tools/agent_checker_util.py | 155 ++++++++++---------- 1 file changed, 75 insertions(+), 80 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 81fa2e4c..df84a61b 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -53,14 +53,8 @@ def __init__( creds=creds, scope=scope, ) - - STARTUP_TIME = time.time() self.agent_id = agent_id - self.extract = agents.Agents(agent_id) - PROCESSING_TIME = time.time() - logging.debug(f"STARTUP: {PROCESSING_TIME - STARTUP_TIME}") - self.data = self.extract.process_agent(agent_id, gcs_bucket_uri) - logging.debug(f"TOTAL PROCESSING: {time.time() - PROCESSING_TIME}") + self.active_intents_df = pd.DataFrame() self.special_pages = [ "End Session", "End Flow", @@ -68,80 +62,81 @@ def __init__( "Current Page", "Previous Page", ] - self.active_intents_df = pd.DataFrame() - # def find_reachable_pages( - # self, - # flow_name: str, - # from_page: str = "Start", - # intent_route_limit: Optional[int] = None, - # include_groups: bool = True, - # include_start_page_routes: bool = True, - # include_meta: bool = False, - # verbose: bool = False, - # ) -> List[str]: - # """Finds all pages which are reachable by transition routes, - # starting from a given page in a given flow. Either flow_id or - # flow_name must be used. - - # Args: - # flow_name: The display name of the flow. - # from_page: (Optional) The page to start from. If left blank, it will - # start on the Start Page of the given flow. - # intent_route_limit: (Optional) Default None. The maximum number of - # intent routes to take. This can be used to answer questions like - # "which pages can I reach within N turns, starting at this page?" - # include_groups: (Optional) If true, intents from transition route - # groups will be included, but only if they are actually referenced - # on each given page in the traversal. - # include_start_page_routes: (Optional) Default true. If true, intent - # routes on the start page are always considered in scope. This is - # how DFCX normally behaves. - # include_meta: (Optional) Default False. If true, includes special - # transition targets like End Session, End Flow, etc. as if they - # are actual pages. - # verbose: (Optional) If true, prints debug information about - # route traversal. - - # Returns: - # The list of reachable pages in this flow - # """ - # flow_id = self._flows_map_rev.get(flow_name, None) - # if not flow_id: - # raise KeyError(f"Flow not found: {flow_name}") - - # # Start at the start page... - # reachable = [from_page] - # conversation_path = [from_page] - # # Technically this could be [0] or [1], or very rarely more than 1, - # # depending on the routes that lead to current page... - # min_intent_counts = [25] - # presets = {} - # page_data = self._get_page( - # flow_name=flow_name, - # page_name=from_page - # ) - # params = { - # "flow_id": flow_id, - # "flow_name": flow_name, - # "reachable": reachable, - # "conversation_path": conversation_path, - # "min_intent_counts": min_intent_counts, - # "presets": presets, - # "intent_route_limit": intent_route_limit, - # "intent_route_count": 0, - # "include_groups": include_groups, - # "include_start_page_routes": include_start_page_routes, - # "limit_intent_to_initial": False, - # # This can't be stored here unless I want to add a lot of complex - # # conditions to change it to False and back depending on the level - # # of recursion - # #"is_initial": True, - # "include_meta": include_meta, - # "verbose": verbose - # } - # self._find_reachable_pages_rec(page_data, params, is_initial=True) - # return reachable + STARTUP_TIME = time.time() + self.extract = agents.Agents(agent_id) + PROCESSING_TIME = time.time() + logging.debug(f"STARTUP: {PROCESSING_TIME - STARTUP_TIME}") + + self.data = self.extract.process_agent(agent_id, gcs_bucket_uri) + logging.debug(f"TOTAL PROCESSING: {time.time() - PROCESSING_TIME}") + + def filter_special_pages(self, page: str, filter_special_pages: bool): + """Recursion helper to check for special page match.""" + if filter_special_pages and page in self.special_pages: + return True + + return False + + def recurse_edges(self, edges: Dict[str, List[str]], page: str, + visited: set, depth: int, max_depth: int, + filter_special_pages: bool): + """Recursion method used to traverse the agent graph for page data. + + Args: + edges: The set of graph edges collected from the agent. + page: The current Page Display Name + visited: A set of visited Page nodes + depth: The current recursion depth + max_depth: The max recursion depth + filter_special_pages: Will discard all self.special_pages from output + if set to False. + """ + if depth == max_depth: + return visited + + if page in edges: + for inner_page in edges[page]: + if self.filter_special_pages(inner_page, filter_special_pages): + return visited + + if inner_page not in visited: + visited.add(inner_page) + visited = self.recurse_edges( + edges, inner_page, visited, depth+1, max_depth, + filter_special_pages) + + return visited + + def get_reachable_pages( + self, + flow_display_name: str, + page_display_name: str = "Start Page", + max_depth: int = 1, + filter_special_pages = True) -> List[str]: + """Get all pages in the graph that are reachable via transition routes, + starting from a given Flow and Page. + + Args: + flow_display_name: The display name of the flow + page_display_name: The display name of the page. Defaults to + "Start Page" + max_depth: The max recursion depth to search the graph from the + provided starting point. For example, a max_depth of 2 would produce + all reachable Pages that are 2 transition routes away from the + starting Flow/Page. Defaults to 1. + filter_special_pages: Will filter out all self.special_pages. Defaults + to True. + """ + if page_display_name in ["START", "START_PAGE", "Start", "Start Page"]: + page_display_name = "Start Page" + page_display_name = f"{flow_display_name}: {page_display_name}" + + visited = self.recurse_edges( + self.data.graph.edges, page_display_name, set(), 0, max_depth, + filter_special_pages) + + return list(visited) def active_intents_to_dataframe(self) -> pd.DataFrame: """Gets all intents referenced in the agent, across all flows, From f676476f323f8d8582990933139029cbdee6d532 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Fri, 25 Aug 2023 15:32:23 -0500 Subject: [PATCH 119/151] fix: lint fixes --- src/dfcx_scrapi/agent_extract/agents.py | 12 ++++++------ src/dfcx_scrapi/tools/agent_checker_util.py | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/dfcx_scrapi/agent_extract/agents.py b/src/dfcx_scrapi/agent_extract/agents.py index 043577ca..eaa4153d 100644 --- a/src/dfcx_scrapi/agent_extract/agents.py +++ b/src/dfcx_scrapi/agent_extract/agents.py @@ -88,15 +88,15 @@ def await_lro(self, lro: str): if i == 20: break - except UserWarning as uw: - uw("LRO Failed.") + except UserWarning: + print("LRO Failed.") return True def export_agent(self, agent_id: str, gcs_bucket_uri: str, environment_display_name: str = None): """Handle the agent export, LRO and logging.""" - EXPORT_START = time.time() + export_start = time.time() logging.info("Exporting agent...") lro = self._core_agents.export_agent( agent_id=agent_id,gcs_bucket_uri=gcs_bucket_uri, data_format="JSON", @@ -105,19 +105,19 @@ def export_agent(self, agent_id: str, gcs_bucket_uri: str, self.await_lro(lro) logging.info("Export Complete.") - logging.debug(f"EXPORT: {time.time() - EXPORT_START}") + logging.debug(f"EXPORT: {time.time() - export_start}") def download_and_extract(self, agent_local_path: str, gcs_bucket_uri: str): """Handle download from GCS and extracting ZIP file.""" if not os.path.exists(agent_local_path): os.makedirs(agent_local_path) - DOWNLOAD_START = time.time() + download_start = time.time() logging.info("Downloading agent file from GCS Bucket...") agent_file = self.gcs.download_gcs( gcs_path=gcs_bucket_uri, local_path=agent_local_path) logging.info("Download complete.") - logging.debug(f"DOWNLOAD: {time.time() - DOWNLOAD_START}") + logging.debug(f"DOWNLOAD: {time.time() - download_start}") self.gcs.unzip(agent_file, agent_local_path) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index df84a61b..28a35ed7 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -63,13 +63,13 @@ def __init__( "Previous Page", ] - STARTUP_TIME = time.time() + startup_time = time.time() self.extract = agents.Agents(agent_id) - PROCESSING_TIME = time.time() - logging.debug(f"STARTUP: {PROCESSING_TIME - STARTUP_TIME}") + processing_time = time.time() + logging.debug(f"STARTUP: {processing_time - startup_time}") self.data = self.extract.process_agent(agent_id, gcs_bucket_uri) - logging.debug(f"TOTAL PROCESSING: {time.time() - PROCESSING_TIME}") + logging.debug(f"TOTAL PROCESSING: {time.time() - processing_time}") def filter_special_pages(self, page: str, filter_special_pages: bool): """Recursion helper to check for special page match.""" From 66361c771a0f50695ec68d3c29718e6884d04587 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Fri, 25 Aug 2023 15:34:37 -0500 Subject: [PATCH 120/151] chore: reverting tests due to testing refactor coming soon --- tests/conftest.py | 10 --- tests/test_agent_checker_util.py | 116 ------------------------------- 2 files changed, 126 deletions(-) delete mode 100644 tests/test_agent_checker_util.py diff --git a/tests/conftest.py b/tests/conftest.py index f07da130..70b7e65d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,8 +22,6 @@ def pytest_addoption(parser): parser.addoption("--project_id", action="store") parser.addoption("--gcs_bucket", action="store") parser.addoption("--agent_id", action="store") - parser.addoption("--flow_name", action="store") - parser.addoption("--page_name", action="store") @pytest.fixture(scope="session") @@ -44,11 +42,3 @@ def gcs_bucket(request): @pytest.fixture(scope="session") def agent_id(request): return request.config.getoption("agent_id") - -@pytest.fixture(scope="session") -def flow_name(request): - return request.config.getoption("flow_name") - -@pytest.fixture(scope="session") -def page_name(request): - return request.config.getoption("page_name") diff --git a/tests/test_agent_checker_util.py b/tests/test_agent_checker_util.py deleted file mode 100644 index ec26b547..00000000 --- a/tests/test_agent_checker_util.py +++ /dev/null @@ -1,116 +0,0 @@ -"""Unit Tests for Agent Checker Util Class""" -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import pytest -import pandas as pd -from src.dfcx_scrapi.tools import agent_checker_util - -# logging config -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s %(levelname)-8s %(message)s", - datefmt="%Y-%m-%d %H:%M:%S", -) - -# Note: Each instantiation of the AgentCheckerUtil class with a particular -# agent ID will make 2*num_flows + 2 API calls. Recommended to test in an -# agent which contains only two small flows. - -@pytest.mark.unit -def test_instantiate_agent_checker_util(creds, agent_id): - scrapi_checker = agent_checker_util.AgentCheckerUtil(creds_path=creds, - agent_id=agent_id) - assert isinstance(scrapi_checker, agent_checker_util.AgentCheckerUtil) - assert scrapi_checker.creds_path == creds - -@pytest.mark.unit -def test_find_all_reachable_pages(creds, agent_id): - scrapi_checker = agent_checker_util.AgentCheckerUtil(creds_path=creds, - agent_id=agent_id) - df = scrapi_checker.find_all_reachable_pages() - assert isinstance(df, pd.DataFrame) - assert set(df.columns) == { - "flow_name", - "page_name"} - logging.info("All reachable pages:\n%s", df.to_string()) - -@pytest.mark.unit -def test_find_all_unreachable_pages(creds, agent_id): - scrapi_checker = agent_checker_util.AgentCheckerUtil(creds_path=creds, - agent_id=agent_id) - df = scrapi_checker.find_all_unreachable_pages() - assert isinstance(df, pd.DataFrame) - assert set(df.columns) == { - "flow_name", - "page_name"} - logging.info("All unreachable pages:\n%s", df.to_string()) - -@pytest.mark.unit -def test_find_all_reachable_intents(creds, agent_id): - scrapi_checker = agent_checker_util.AgentCheckerUtil(creds_path=creds, - agent_id=agent_id) - df = scrapi_checker.find_all_reachable_intents() - assert isinstance(df, pd.DataFrame) - assert set(df.columns) == { - "intent", - "flows"} - logging.info("All reachable intents:\n%s", df.to_string()) - -@pytest.mark.unit -def test_find_all_unreachable_intents(creds, agent_id): - scrapi_checker = agent_checker_util.AgentCheckerUtil(creds_path=creds, - agent_id=agent_id) - intents = scrapi_checker.find_all_unreachable_intents() - assert isinstance(intents, list) - logging.info(f"All unreachable intents: {str(intents)}") - -@pytest.mark.unit -def test_find_reachable_intents(creds, agent_id, flow_name): - scrapi_checker = agent_checker_util.AgentCheckerUtil(creds_path=creds, - agent_id=agent_id) - intents = scrapi_checker.find_reachable_intents(flow_name) - assert isinstance(intents, list) - logging.info(f"Reachable intents for flow {flow_name}: {str(intents)}") - -@pytest.mark.unit -def test_find_reachable_pages(creds, agent_id, flow_name, page_name): - scrapi_checker = agent_checker_util.AgentCheckerUtil(creds_path=creds, - agent_id=agent_id) - page_names = scrapi_checker.find_reachable_pages(flow_name=flow_name, - from_page=page_name, - intent_route_limit=None) - assert isinstance(page_names, list) - logging.info(f"Reachable pages for flow {flow_name} starting from \ - {page_name}: {str(page_names)}") - -@pytest.mark.unit -def test_find_one_turn_reachable_pages(creds, agent_id, flow_name, page_name): - scrapi_checker = agent_checker_util.AgentCheckerUtil(creds_path=creds, - agent_id=agent_id) - page_names = scrapi_checker.find_reachable_pages(flow_name=flow_name, - from_page=page_name, - intent_route_limit=1) - assert isinstance(page_names, list) - logging.info(f"Reachable pages for flow {flow_name} starting from \ - {page_name} in one turn: {str(page_names)}") - -@pytest.mark.unit -def test_find_unreachable_pages(creds, agent_id, flow_name): - scrapi_checker = agent_checker_util.AgentCheckerUtil(creds_path=creds, - agent_id=agent_id) - page_names = scrapi_checker.find_unreachable_pages(flow_name=flow_name) - assert isinstance(page_names, list) - logging.info(f"Unreachable pages for flow {flow_name}: {str(page_names)}") From a750daf1641a055e362e1d1643d199511260861c Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Fri, 25 Aug 2023 17:01:24 -0500 Subject: [PATCH 121/151] fix: modify active_intents type for downstream processing --- src/dfcx_scrapi/agent_extract/flows.py | 2 +- src/dfcx_scrapi/agent_extract/routes.py | 13 ++----------- src/dfcx_scrapi/agent_extract/types.py | 7 +++---- 3 files changed, 6 insertions(+), 16 deletions(-) diff --git a/src/dfcx_scrapi/agent_extract/flows.py b/src/dfcx_scrapi/agent_extract/flows.py index 686d9b59..6d991dac 100644 --- a/src/dfcx_scrapi/agent_extract/flows.py +++ b/src/dfcx_scrapi/agent_extract/flows.py @@ -258,7 +258,7 @@ def process_flow(self, flow: types.Flow, stats: types.AgentData): flow.start_page_file = f"{flow.dir_path}/{flow.file_name}.json" stats.pages[flow.display_name] = [] - stats.active_intents[flow.display_name] = set() + stats.active_intents[flow.display_name] = [] stats = self.process_start_page(flow, stats) stats = self.pages.process_pages_directory(flow, stats) stats = self.rgs.process_route_groups_directory(flow, stats) diff --git a/src/dfcx_scrapi/agent_extract/routes.py b/src/dfcx_scrapi/agent_extract/routes.py index e530933d..8a151024 100644 --- a/src/dfcx_scrapi/agent_extract/routes.py +++ b/src/dfcx_scrapi/agent_extract/routes.py @@ -64,22 +64,13 @@ def check_for_intent(route: types.Fulfillment): return intent - @staticmethod - def check_intent_map(intent: str, stats: types.AgentData): - """Check to see if intent is currently in map.""" - res = stats.intents_page_map.get(intent) - if not res: - stats.intents_page_map[intent] = set() - def process_intents_in_routes( self, route: types.Fulfillment, stats: types.AgentData): intent = self.check_for_intent(route) if intent: + pair = (intent, route.page.display_name) stats.active_intents[ - route.page.flow.display_name].add(intent) - - self.check_intent_map(intent, stats) - stats.intents_page_map[intent].add(route.page.display_name) + route.page.flow.display_name].append(pair) return stats diff --git a/src/dfcx_scrapi/agent_extract/types.py b/src/dfcx_scrapi/agent_extract/types.py index 060481d4..ac25c34d 100644 --- a/src/dfcx_scrapi/agent_extract/types.py +++ b/src/dfcx_scrapi/agent_extract/types.py @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Any +from typing import Dict, List, Any, Tuple from dataclasses import dataclass, field from dfcx_scrapi.agent_extract import graph as graph_class @@ -180,7 +180,8 @@ class Webhook: @dataclass class AgentData: """Used to track agent data for each section processed.""" - active_intents: Dict[str, set] = field(default_factory=dict) + active_intents: Dict[str, List[Tuple[str, str]]] = field( + default_factory=dict) active_pages: Dict[str, set] = field(default_factory=dict) agent_id: str = None entity_types: List[Dict[str, Any]] = field(default_factory=list) @@ -191,13 +192,11 @@ class AgentData: graph: graph_class.Graph = None intents: List[Dict[str, Any]] = field(default_factory=list) intents_map: Dict[str, Any] = field(default_factory=dict) - intents_page_map: Dict[str, set] = field(default_factory=dict) lang_code: str = "en" pages: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) route_groups: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) route_groups_map: Dict[str, Any] = field(default_factory=dict) test_cases: List[Dict[str, Any]] = field(default_factory=list) - unreachable_intents: set = field(default_factory=set) unreachable_pages: Dict[str, set] = field(default_factory=dict) unused_pages: Dict[str, set] = field(default_factory=dict) webhooks: List[Dict[str, Any]] = field(default_factory=list) From 85aa288c0caf91822c687313c2a3b93e122edf08 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Fri, 25 Aug 2023 17:02:16 -0500 Subject: [PATCH 122/151] feat: refactor df code; implement get_unreachable_intents --- src/dfcx_scrapi/tools/agent_checker_util.py | 66 +++++++++++++++------ 1 file changed, 49 insertions(+), 17 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 28a35ed7..af50ec4c 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -54,7 +54,6 @@ def __init__( scope=scope, ) self.agent_id = agent_id - self.active_intents_df = pd.DataFrame() self.special_pages = [ "End Session", "End Flow", @@ -71,14 +70,16 @@ def __init__( self.data = self.extract.process_agent(agent_id, gcs_bucket_uri) logging.debug(f"TOTAL PROCESSING: {time.time() - processing_time}") - def filter_special_pages(self, page: str, filter_special_pages: bool): + self.active_intents_df = self.active_intents_to_dataframe() + + def _filter_special_pages(self, page: str, filter_special_pages: bool): """Recursion helper to check for special page match.""" if filter_special_pages and page in self.special_pages: return True return False - def recurse_edges(self, edges: Dict[str, List[str]], page: str, + def _recurse_edges(self, edges: Dict[str, List[str]], page: str, visited: set, depth: int, max_depth: int, filter_special_pages: bool): """Recursion method used to traverse the agent graph for page data. @@ -97,17 +98,26 @@ def recurse_edges(self, edges: Dict[str, List[str]], page: str, if page in edges: for inner_page in edges[page]: - if self.filter_special_pages(inner_page, filter_special_pages): + if self._filter_special_pages(inner_page, filter_special_pages): return visited if inner_page not in visited: visited.add(inner_page) - visited = self.recurse_edges( + visited = self._recurse_edges( edges, inner_page, visited, depth+1, max_depth, filter_special_pages) return visited + def _mark_unreachable_pages(self, df: pd.DataFrame) -> pd.DataFrame: + """Mark dataframe rows True if the page is unreachable in graph.""" + for idx, row in df.iterrows(): + for page in self.data.unreachable_pages[row["flow"]]: + if row['page'] == page: + df.loc[idx, 'unreachable'] = True + + return df + def get_reachable_pages( self, flow_display_name: str, @@ -132,28 +142,49 @@ def get_reachable_pages( page_display_name = "Start Page" page_display_name = f"{flow_display_name}: {page_display_name}" - visited = self.recurse_edges( + visited = self._recurse_edges( self.data.graph.edges, page_display_name, set(), 0, max_depth, filter_special_pages) return list(visited) def active_intents_to_dataframe(self) -> pd.DataFrame: - """Gets all intents referenced in the agent, across all flows, - and produces a dataframe listing which flows reference each intent. + """Gets all intents referenced in the agent, across all flows and pages, + and produces a dataframe listing which flows/pages reference each + intent. Returns: A dataframe with columns intent - the intent display name - flows - a list of flow display names that use this intent + flow - the Flow Display Name where the intent resides + page - the Page Display Name where the intent resides + unreachable - Denotes whether the Flow/Page/Intent combination is + unreachable in the graph. """ - df = pd.DataFrame({"intent": [], "flow": []}) + df = pd.DataFrame({ + "intent": pd.Series(dtype='str'), + "flow": pd.Series(dtype='str'), + "page": pd.Series(dtype='str'), + "unreachable": pd.Series(dtype='bool') + }) + + # Loop over active_intents, create temp dataframe, then concat with the + # main dataframe to build out the complete Flow/Page/Intent dataset. for flow in self.data.active_intents: - for intent in self.data.active_intents[flow]: - temp = pd.DataFrame({"intent": [intent], "flow": [flow]}) + for pair in self.data.active_intents[flow]: + intent = pair[0] + page = pair[1] + temp = pd.DataFrame({ + "intent": [intent], + "flow": [flow], + "page": [page], + "unreachable": [False]}) df = pd.concat([df, temp]) - self.active_intents_df = df.reset_index(drop=True) + df = df.reset_index(drop=True) + + # Finally, determine what rows are unreachable. + self.active_intents_df = self._mark_unreachable_pages(df) return self.active_intents_df @@ -166,12 +197,13 @@ def get_unused_intents(self) -> List: return list(all_intents_set.difference(active_intents_set)) - def get_unreachable_intents(self) -> List: + def get_unreachable_intents(self) -> pd.DataFrame: """Get all unreachable Intents across the agent. An Intent is unreachable if it resides on a page that is also unreachable. """ - # Get Page / Intent mapping - # Find all unreachable pages - # + if self.active_intents_df.empty: + self.active_intents_df = self.active_intents_to_dataframe() + + return self.active_intents_df[self.active_intents_df["unreachable"]] From 67cf89335a2e2b33dafcc39aeec2ce3ce80aa87a Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Fri, 25 Aug 2023 17:04:05 -0500 Subject: [PATCH 123/151] fix: lint fixes --- src/dfcx_scrapi/tools/agent_checker_util.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index af50ec4c..968a7422 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -113,8 +113,8 @@ def _mark_unreachable_pages(self, df: pd.DataFrame) -> pd.DataFrame: """Mark dataframe rows True if the page is unreachable in graph.""" for idx, row in df.iterrows(): for page in self.data.unreachable_pages[row["flow"]]: - if row['page'] == page: - df.loc[idx, 'unreachable'] = True + if row["page"] == page: + df.loc[idx, "unreachable"] = True return df @@ -162,10 +162,10 @@ def active_intents_to_dataframe(self) -> pd.DataFrame: unreachable in the graph. """ df = pd.DataFrame({ - "intent": pd.Series(dtype='str'), - "flow": pd.Series(dtype='str'), - "page": pd.Series(dtype='str'), - "unreachable": pd.Series(dtype='bool') + "intent": pd.Series(dtype="str"), + "flow": pd.Series(dtype="str"), + "page": pd.Series(dtype="str"), + "unreachable": pd.Series(dtype="bool") }) # Loop over active_intents, create temp dataframe, then concat with the From da42187d5579f2fe290be1c0d120808b86e1ddcd Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Sun, 27 Aug 2023 12:58:54 -0500 Subject: [PATCH 124/151] chore: comment cleanup; unused code cleanup --- src/dfcx_scrapi/agent_extract/pages.py | 4 ++-- src/dfcx_scrapi/agent_extract/route_groups.py | 2 +- src/dfcx_scrapi/agent_extract/routes.py | 1 - src/dfcx_scrapi/agent_extract/test_cases.py | 7 ------- src/dfcx_scrapi/tools/agent_checker_util.py | 5 ----- 5 files changed, 3 insertions(+), 16 deletions(-) diff --git a/src/dfcx_scrapi/agent_extract/pages.py b/src/dfcx_scrapi/agent_extract/pages.py index 27616cbe..51a710c6 100644 --- a/src/dfcx_scrapi/agent_extract/pages.py +++ b/src/dfcx_scrapi/agent_extract/pages.py @@ -20,8 +20,8 @@ from typing import Dict, Any from dfcx_scrapi.agent_extract import common -from dfcx_scrapi.agent_extract import types #import Flow, Page, AgentData, FormParameter -from dfcx_scrapi.agent_extract import routes #import Fulfillments +from dfcx_scrapi.agent_extract import types +from dfcx_scrapi.agent_extract import routes class Pages: diff --git a/src/dfcx_scrapi/agent_extract/route_groups.py b/src/dfcx_scrapi/agent_extract/route_groups.py index f69f4a9b..3a73e50a 100644 --- a/src/dfcx_scrapi/agent_extract/route_groups.py +++ b/src/dfcx_scrapi/agent_extract/route_groups.py @@ -65,7 +65,7 @@ def process_route_group(self, rg: types.RouteGroup, stats: types.AgentData): rg.display_name = rg.data.get("displayName", None) rg.routes = rg.data.get("transitionRoutes", None) - # stats = self.routes.process_routes(rg, stats) + stats = self.routes.process_routes(rg, stats) route_group_file.close() diff --git a/src/dfcx_scrapi/agent_extract/routes.py b/src/dfcx_scrapi/agent_extract/routes.py index 8a151024..f91d61ed 100644 --- a/src/dfcx_scrapi/agent_extract/routes.py +++ b/src/dfcx_scrapi/agent_extract/routes.py @@ -188,7 +188,6 @@ def process_fulfillment_type( # This is where each message type will exist # text, custom payload, etc. - # TODO pmarlow: create sub-method parsers per type if "text" in item: for text in item["text"]["text"]: route.text = text diff --git a/src/dfcx_scrapi/agent_extract/test_cases.py b/src/dfcx_scrapi/agent_extract/test_cases.py index b06671d2..ee5be205 100644 --- a/src/dfcx_scrapi/agent_extract/test_cases.py +++ b/src/dfcx_scrapi/agent_extract/test_cases.py @@ -71,7 +71,6 @@ def get_test_case_intent_phrase_pair( phrase = user.get("input", None) text = phrase.get("text", None) - # TODO pmarlow: Add DTMF user inputs if text: text = text["text"] @@ -91,8 +90,6 @@ def get_test_case_intent_phrase_pair( @staticmethod def get_test_case_intent_data(agent_local_path: str): """Collect all Intent Files and Training Phrases for Test Case.""" - # TODO (pmarlow) consolidate into build_intent_paths - intents_path = agent_local_path + "/intents" intent_paths = [] @@ -117,7 +114,6 @@ def flatten_tp_data(tp_data: List[Any]): return cleaned_tps def gather_intent_tps(self, tc: types.TestCase): - # TODO Refactor """Collect all TPs associated with Intent data in Test Case.""" tc.associated_intent_data = {} @@ -140,9 +136,6 @@ def gather_intent_tps(self, tc: types.TestCase): tp_file.close() - # TODO pmarlow: refactor to use tc.intent_data instead - # Need to create another level inside the Intent Dict - # that contains the language files as well. tc.intent_data[i]["training_phrases"].extend( cleaned_tps ) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 968a7422..4beae38f 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -23,11 +23,6 @@ from dfcx_scrapi.core import scrapi_base from dfcx_scrapi.agent_extract import agents -# Type aliases -DFCXFlow = types.flow.Flow -DFCXPage = types.page.Page -DFCXRoute = types.page.TransitionRoute - # logging config logging.basicConfig( level=logging.INFO, From c4edde853c236b599b9fb66df24a9a554aed7ce0 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Sun, 27 Aug 2023 13:11:09 -0500 Subject: [PATCH 125/151] fix: linting --- src/dfcx_scrapi/tools/agent_checker_util.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py index 4beae38f..ff6e331a 100644 --- a/src/dfcx_scrapi/tools/agent_checker_util.py +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -19,7 +19,6 @@ from typing import Dict, List import pandas as pd -from google.cloud.dialogflowcx_v3beta1 import types from dfcx_scrapi.core import scrapi_base from dfcx_scrapi.agent_extract import agents From ad8d01ce3b9bc2de0357f559df1efad3711f08de Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Mon, 28 Aug 2023 14:59:45 -0700 Subject: [PATCH 126/151] fix: remove duplication of START_PAGE rows --- src/dfcx_scrapi/core/conversation.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/dfcx_scrapi/core/conversation.py b/src/dfcx_scrapi/core/conversation.py index 7a65c78d..191d8216 100644 --- a/src/dfcx_scrapi/core/conversation.py +++ b/src/dfcx_scrapi/core/conversation.py @@ -294,19 +294,6 @@ def _page_id_mapper(self): flow_mapped["page_id"] = flow_mapped.index flow_mapped = flow_mapped.rename(columns={0: "page_display_name"}) - - # add start page - start_page_id = flow_id + "/pages/START_PAGE" - flow_mapped = pd.concat( - [ - flow_mapped, - pd.DataFrame( - columns=["page_display_name", "page_id"], - data=[["START_PAGE", start_page_id]], - ), - ] - ) - flow_mapped.insert(0, "flow_display_name", flow_map[flow_id]) agent_pages_map = pd.concat([agent_pages_map, flow_mapped]) From 055452aaafea35fa92fb4e005d04f96b2d4f63b1 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Mon, 28 Aug 2023 15:00:14 -0700 Subject: [PATCH 127/151] chore: set fixed versions for some libraries --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index b6b173fb..f8e90c5b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,9 +3,10 @@ google-cloud-dialogflow-cx google-auth google-oauth oauth2client +pyparsing==2.4.7 pandas tabulate -gspread +gspread==5.10.0 gspread_dataframe numpy requests From 1bdba418f39b4124e4147a1f312910cc7062517d Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Mon, 28 Aug 2023 15:01:05 -0700 Subject: [PATCH 128/151] fix: creds inheritence not being passed correctly --- src/dfcx_scrapi/tools/dataframe_functions.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/dfcx_scrapi/tools/dataframe_functions.py b/src/dfcx_scrapi/tools/dataframe_functions.py index 139d377c..59686b1e 100644 --- a/src/dfcx_scrapi/tools/dataframe_functions.py +++ b/src/dfcx_scrapi/tools/dataframe_functions.py @@ -70,7 +70,10 @@ def __init__( if scope: scopes += scope - if creds_path: + if creds: + self.sheets_client = gspread.authorize(creds) + + elif creds_path: creds = ServiceAccountCredentials.from_json_keyfile_name( filename=creds_path, scopes=scopes ) @@ -89,15 +92,11 @@ def __init__( creds = google.auth.default(scopes=scopes)[0] self.sheets_client = gspread.authorize(creds) - logging.info("create dfcx creds %s", creds_path) - self.entities = EntityTypes(creds_path, creds_dict) - self.intents = Intents(creds_path, creds_dict) - self.flows = Flows(creds_path, creds_dict) - self.pages = Pages(creds_path, creds_dict) - self.route_groups = TransitionRouteGroups( - creds_path, creds_dict - ) - self.creds_path = creds_path + self.entities = EntityTypes(creds=self.creds) + self.intents = Intents(creds=self.creds) + self.flows = Flows(creds=self.creds) + self.pages = Pages(creds=self.creds) + self.route_groups = TransitionRouteGroups(creds=self.creds) @staticmethod def progress_bar(current, total, bar_length=50, type_="Progress"): From 6ea847dea48a682735114746d49815c07fd24a1c Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Mon, 28 Aug 2023 15:47:28 -0700 Subject: [PATCH 129/151] feat: add evals class to expose nlu evlas notebook --- src/dfcx_scrapi/tools/evals_util.py | 433 ++++++++++++++++++++++++++++ 1 file changed, 433 insertions(+) create mode 100644 src/dfcx_scrapi/tools/evals_util.py diff --git a/src/dfcx_scrapi/tools/evals_util.py b/src/dfcx_scrapi/tools/evals_util.py new file mode 100644 index 00000000..079a4bd8 --- /dev/null +++ b/src/dfcx_scrapi/tools/evals_util.py @@ -0,0 +1,433 @@ +"""A set of Utility methods to check resources stats on DFCX Agents.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict + +import logging +import datetime +import pandas as pd +import gspread + +from tabulate import tabulate + +from dfcx_scrapi.core import scrapi_base +from dfcx_scrapi.core import agents +from dfcx_scrapi.core import flows +from dfcx_scrapi.core import pages +from dfcx_scrapi.core import intents +from dfcx_scrapi.core import conversation +from dfcx_scrapi.tools import dataframe_functions + +pd.options.display.max_colwidth = 200 + +GLOBAL_SCOPE = [ + "https://spreadsheets.google.com/feeds", + "https://www.googleapis.com/auth/drive", +] + +# logging config +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)-8s %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", +) + + +class EvalTesting(scrapi_base.ScrapiBase): + """NLU Evaluation Class for Dialogflow CX Testing.""" + def __init__( + self, + agent_id: str, + creds_path: str = None, + creds_dict: Dict[str, str] = None, + creds=None, + ): + + super().__init__( + creds_path=creds_path, + creds_dict=creds_dict, + creds=creds, + scope=GLOBAL_SCOPE + ) + + self.agent_id = agent_id + + self._a = agents.Agents(creds=self.creds) + self._i = intents.Intents(creds=self.creds) + self._f = flows.Flows(creds=self.creds) + self._p = pages.Pages(creds=self.creds) + self._dc = conversation.DialogflowConversation( + creds_path=creds_path, agent_id=agent_id + ) + self._dffx = dataframe_functions.DataframeFunctions(creds=self.creds) + + @staticmethod + def _create_schema(df): + """Reusable DataFrame schema method.""" + df = df[ + [ + "flow_display_name", + "page_display_name", + "utterance", + "expected_intent", + "expected_parameters", + "agent_display_name", + "data_source", + "sheet_source", + ] + ] + + return df + + def _clean_dataframe(self, df): + """Various Dataframe cleaning functions.""" + + df.columns = df.columns.str.lower() + df = df.replace("Start Page", "START_PAGE") + df.rename( + columns={ + "source": "data_source", + }, + inplace=True, + ) + + df = self._add_agent_display_name_column(df) + df = self._create_schema(df) + + return df + + def _add_agent_display_name_column(self, df): + """Add the Agent Display name to the output dataframe.""" + + df["agent_display_name"] = self._a.get_agent(self.agent_id).display_name + + return df + + def _build_sheets_client(self): + client = gspread.authorize(self.creds) + + return client + + def format_preprocessed_conversation_logs( + self, + input_format: str = "gsheet", + gsheet_name: str = None, + gsheet_tab: str = None, + file_path: str = None, + ) -> pd.DataFrame: + """Transforms preprocssed data to dataframe for eval testing. + + The input for this method should be a Google Sheet that contains the + following columns: + flow_display_name: The name of the Dialogflow CX Flow + utterance: The user utterance to test + page_display_name: The display name of the Dialogflow CX page that + the eval test should start on. If not provided, START_PAGE is + assumed. + expected_intent: The Intent Display Name that is expected to trigger + for the given eval test. + expected_parameters: Optional parameters expected to be collected + for the given eval test. + source: Optional source of the eval dataa. + + Args: + input_format: The input format of the file. ONEOF: `csv`, `gsheet` + gsheet_name: Title of the Google Sheet where the data lives + gsheet_tab: Title of the Tab on the Sheet where the data lives + file_path: Optional file path if `csv` format is used + start_page_flow: In the case of a special page like START_PAGE, when no + additional flow information is provided, the script will default to + this Flow Display Name. Default value is Default Start Flow. + + Returns: + A formatted DataFrame ready to be used for multithreaded testing + """ + if input_format == "csv": + if not file_path: + raise ValueError( + "Must provide file_path with `csv` format." + ) + df = pd.read_csv( + file_path, + usecols=[ + "flow_display_name", + "utterance", + "page_display_name", + "expected_intent", + "expected_parameters", + "source" + ], + ) + + elif input_format == "gsheet": + if not gsheet_name and not gsheet_tab: + raise ValueError( + "Must provide `gsheet_name` and `gsheet_tab` with `gsheet` " + "format." + ) + + df = self._dffx.sheets_to_dataframe(gsheet_name, gsheet_tab) + + df["sheet_source"] = gsheet_tab + df = self._clean_dataframe(df) + + return df + + def get_flow_display_name_mapping( + self, + df: pd.DataFrame, + agent_id: str, + start_page_flow: str = "Default Start Flow", + ) -> pd.DataFrame: + """Retrieve Page/Flow Display Name Map. + + If a Flow Display Name is not provided, this method will attempt to + infer the correct Flow Display Name basd on the provided Page Display + Name. If a Flow Display Name is already provided, the method will honor + this user input. + """ + + flows_map = self._f.get_flows_map(agent_id) + + all_pages = {} + all_pages[ + "START_PAGE" + ] = start_page_flow # Case where source_page is the first turn + for flow in flows_map: + temp_pages = list(self._p.get_pages_map(flow, reverse=True).keys()) + for page in temp_pages: + all_pages[page] = flows_map[flow] + + # Fill blank flow names with the inferred one from the list of pages + # Otherwise use the user specified flow name + # NOTE: If multiple pages with the same name exist across different + # flows and Flow Display Name is not provided, the inferred Flow could + # be incorrect as the map will pick the first Flow encountered. + df["flow_display_name"] = df.apply( + lambda row: row["flow_display_name"] + if all([ + row["flow_display_name"] != "", + row["flow_display_name"] is not None + ]) + else all_pages[row["page_display_name"]], + axis=1, + ) + + return df + + def run_tests( + self, df: pd.DataFrame, chunk_size: int = 300, rate_limit: float = 20 + ) -> pd.DataFrame: + """Tests a set of utterances for intent detection against a CX Agent. + + This function uses Python Threading to run tests in parallel to + expedite intent detection testing for Dialogflow CX agents. The default + quota for Text requests/min is 1200. Ref: + https://cloud.google.com/dialogflow/quotas#table + """ + + results = self._dc.run_intent_detection( + test_set=df, chunk_size=chunk_size, rate_limit=rate_limit + ) + + if "agent_display_name" in df.columns: + temp_column = results.pop("agent_display_name") + results.insert(len(results.columns), "agent_display_name", temp_column) + + if "data_source" in df.columns: + temp_column = results.pop("data_source") + results.insert(len(results.columns), "data_source", temp_column) + + if "sheet_source" in df.columns: + temp_column = results.pop("sheet_source") + results.insert(len(results.columns), "sheet_source", temp_column) + + # When a NO_MATCH occurs, the detected_intent field will be blank + # this replaces with NO_MATCH string, which will allow for easier stats + # calculation downstream + results.detected_intent.replace({'': 'NO_MATCH'}, inplace=True) + + return results + + def generate_report( + self, + results: pd.DataFrame, + report_timestamp: datetime.datetime, + ): + """Generates a printable report and dataframe. + + Args: + results: Input dataframe of testing results from run_tests method. + + Returns: + A dataframe with report summary stats. + """ + # Calc fields + failed_df = results[results.detected_intent != results.expected_intent] + no_match_count = ( + results[results.detected_intent == "NO_MATCH"] + .groupby("detected_intent") + .size() + .sum() + ) + no_match_df = results[results.detected_intent == "NO_MATCH"] + no_match_rate = no_match_count / results.shape[0] + pass_count = ( + results[results.detected_intent == results.expected_intent] + .groupby("detected_intent") + .size() + .sum() + ) + pass_rate = pass_count / results.shape[0] + timestamp = report_timestamp + test_agent = results.agent_display_name.unique()[0] + flow_display_name = results.flow_display_name.unique()[0] + data_source = results.sheet_source.unique()[0] + + # Get Failure list of Utterance / Page pairs + failure_list = [] + for _, row in failed_df.iterrows(): + failure_list.append( + [ + row["utterance"], + row["flow_display_name"], + row["page_display_name"], + row["expected_intent"], + row["detected_intent"], + row["expected_parameters"], + row["parameters_set"], + ] + ) + + # Generate Dataframe format + df_report = pd.DataFrame( + columns=[ + "test_run_timestamp", + "total_tests", + "pass_count", + "pass_rate", + "no_match_count", + "no_match_rate", + "test_agent", + "flow_display_name", + "data_source", + ], + data=[ + [ + timestamp, + results.shape[0], + pass_count, + pass_rate, + no_match_count, + no_match_rate, + test_agent, + flow_display_name, + data_source, + ] + ], + ) + + # Printable Report Format + print("---------- RESULTS ----------") + print(f"Test Agent: {test_agent}") + print(f"Total Tests: {results.shape[0]}") + print(f"Pass Count: {pass_count}") + print(f"Pass Rate: {pass_rate:.2%}") + print(f"No Match Count: {no_match_count}") + print(f"No Match Rate: {no_match_rate:.2%}") + print(f"Test Run Timestamp: {timestamp}") + print(f"Test Set Data Source: {data_source}") + print("\n") + + return df_report + + def write_report_summary_to_log( + self, df: pd.DataFrame, sheet_name: str, sheet_tab: str + ): + """Writes the output report summary to Google Sheets.""" + + client = self._build_sheets_client() + gsheet = client.open(sheet_name) + sheet = gsheet.worksheet(sheet_tab) + + df["test_run_timestamp"] = df.test_run_timestamp.astype("str") + + sheet.append_row( + df.values.flatten().tolist(), value_input_option="USER_ENTERED" + ) + + def write_test_results_to_sheets( + self, results: pd.DataFrame, sheet_name: str, sheet_tab: str + ): + """Writes the output result details to Google Sheets.""" + + client = self._build_sheets_client() + gsheet = client.open(sheet_name) + sheet = gsheet.worksheet(sheet_tab) + + sheet.clear() + + self._dffx.dataframe_to_sheets(sheet_name, sheet_tab, results) + + def append_test_results_to_sheets( + self, results: pd.DataFrame, sheet_name: str, sheet_tab: str + ): + """Adds results to an existing Google Sheet collection.""" + + client = self._build_sheets_client() + gsheet = client.open(sheet_name) + sheet = gsheet.worksheet(sheet_tab) + + # Fixes an error that sometimes happens when trying to write parameters + # to the sheet because they are formatted as objects + result_list = results.values.tolist() + result_list = [list(map(str, row)) for row in result_list] + + sheet.append_rows(result_list, value_input_option="USER_ENTERED") + + def run_evals(self, google_sheet_name: str, google_sheet_tab: str, + google_sheet_output_tab: str, google_sheet_summary_tab: str, + eval_run_display_name: str = "Evals", append=False): + """Run the full Eval dataset.""" + logsx = "-" * 10 + + logging.info(f"{logsx} STARTING {eval_run_display_name} {logsx}") + + report_timestamp = datetime.datetime.now() + + df = self.format_preprocessed_conversation_logs( + "gsheet", google_sheet_name, google_sheet_tab + ) + df = self.get_flow_display_name_mapping(df, self.agent_id) + df_results = self.run_tests(df) + df_report = self.generate_report(df_results, report_timestamp) + + self.write_report_summary_to_log( + df_report, google_sheet_name, google_sheet_summary_tab + ) + + if append: + self.append_test_results_to_sheets( + df_results, google_sheet_name, google_sheet_output_tab + ) + + else: + self.write_test_results_to_sheets( + df_results, google_sheet_name, google_sheet_output_tab + ) + logging.info(f"{logsx} {eval_run_display_name} COMPLETE {logsx}") + + return df_results From a46f261a7dd35d3654353ba1d975a0039753b514 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Sun, 10 Sep 2023 19:56:37 -0500 Subject: [PATCH 130/151] fix: rename file; refactor code to be more streamlined --- .../tools/{evals_util.py => nlu_evals.py} | 269 +++++++++--------- 1 file changed, 132 insertions(+), 137 deletions(-) rename src/dfcx_scrapi/tools/{evals_util.py => nlu_evals.py} (68%) diff --git a/src/dfcx_scrapi/tools/evals_util.py b/src/dfcx_scrapi/tools/nlu_evals.py similarity index 68% rename from src/dfcx_scrapi/tools/evals_util.py rename to src/dfcx_scrapi/tools/nlu_evals.py index 079a4bd8..8bd43524 100644 --- a/src/dfcx_scrapi/tools/evals_util.py +++ b/src/dfcx_scrapi/tools/nlu_evals.py @@ -21,8 +21,6 @@ import pandas as pd import gspread -from tabulate import tabulate - from dfcx_scrapi.core import scrapi_base from dfcx_scrapi.core import agents from dfcx_scrapi.core import flows @@ -37,6 +35,32 @@ "https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive", ] +INPUT_SCHEMA_COLUMNS = [ + "flow_display_name", + "page_display_name", + "utterance", + "expected_intent", + "expected_parameters", + "agent_display_name", + "data_source", + "input_source", + ] + +OUTPUT_SCHEMA_COLUMNS = [ + "flow_display_name", + "page_display_name", + "utterance", + "expected_intent", + "expected_parameters", + "target_page", + "match_type", + "confidence", + "parameters_set", + "detected_intent", + "agent_display_name", + "data_source", + "input_source" + ] # logging config logging.basicConfig( @@ -46,7 +70,7 @@ ) -class EvalTesting(scrapi_base.ScrapiBase): +class NluEvals(scrapi_base.ScrapiBase): """NLU Evaluation Class for Dialogflow CX Testing.""" def __init__( self, @@ -74,27 +98,9 @@ def __init__( ) self._dffx = dataframe_functions.DataframeFunctions(creds=self.creds) - @staticmethod - def _create_schema(df): - """Reusable DataFrame schema method.""" - df = df[ - [ - "flow_display_name", - "page_display_name", - "utterance", - "expected_intent", - "expected_parameters", - "agent_display_name", - "data_source", - "sheet_source", - ] - ] - - return df def _clean_dataframe(self, df): """Various Dataframe cleaning functions.""" - df.columns = df.columns.str.lower() df = df.replace("Start Page", "START_PAGE") df.rename( @@ -104,16 +110,11 @@ def _clean_dataframe(self, df): inplace=True, ) - df = self._add_agent_display_name_column(df) - df = self._create_schema(df) - - return df - - def _add_agent_display_name_column(self, df): - """Add the Agent Display name to the output dataframe.""" - df["agent_display_name"] = self._a.get_agent(self.agent_id).display_name + # Validate input schema + df = df[INPUT_SCHEMA_COLUMNS] + return df def _build_sheets_client(self): @@ -121,71 +122,88 @@ def _build_sheets_client(self): return client - def format_preprocessed_conversation_logs( - self, - input_format: str = "gsheet", - gsheet_name: str = None, - gsheet_tab: str = None, - file_path: str = None, - ) -> pd.DataFrame: - """Transforms preprocssed data to dataframe for eval testing. - - The input for this method should be a Google Sheet that contains the - following columns: - flow_display_name: The name of the Dialogflow CX Flow - utterance: The user utterance to test - page_display_name: The display name of the Dialogflow CX page that - the eval test should start on. If not provided, START_PAGE is - assumed. - expected_intent: The Intent Display Name that is expected to trigger - for the given eval test. - expected_parameters: Optional parameters expected to be collected - for the given eval test. - source: Optional source of the eval dataa. - - Args: - input_format: The input format of the file. ONEOF: `csv`, `gsheet` - gsheet_name: Title of the Google Sheet where the data lives - gsheet_tab: Title of the Tab on the Sheet where the data lives - file_path: Optional file path if `csv` format is used - start_page_flow: In the case of a special page like START_PAGE, when no - additional flow information is provided, the script will default to - this Flow Display Name. Default value is Default Start Flow. - - Returns: - A formatted DataFrame ready to be used for multithreaded testing - """ - if input_format == "csv": - if not file_path: - raise ValueError( - "Must provide file_path with `csv` format." - ) - df = pd.read_csv( - file_path, - usecols=[ - "flow_display_name", - "utterance", - "page_display_name", - "expected_intent", - "expected_parameters", - "source" - ], - ) - - elif input_format == "gsheet": - if not gsheet_name and not gsheet_tab: - raise ValueError( - "Must provide `gsheet_name` and `gsheet_tab` with `gsheet` " - "format." - ) + def process_input_csv(self, input_file_path: str): + """Process the input data in CSV format.""" + df = pd.read_csv(input_file_path) + df = df.fillna('') + df["input_source"] = input_file_path + df = self._clean_dataframe(df) - df = self._dffx.sheets_to_dataframe(gsheet_name, gsheet_tab) + return df - df["sheet_source"] = gsheet_tab - df = self._clean_dataframe(df) + def process_input_google_sheet(self, gsheet_name: str, gsheet_tab: str): + """Process the input data in Google Sheets format.""" + df = self._dffx.sheets_to_dataframe(gsheet_name, gsheet_tab) + df["input_source"] = gsheet_tab + df = self._clean_dataframe(df) return df + # def format_preprocessed_conversation_logs( + # self, + # input_format: str = "gsheet", + # gsheet_name: str = None, + # gsheet_tab: str = None, + # file_path: str = None, + # ) -> pd.DataFrame: + # """Transforms preprocssed data to dataframe for eval testing. + + # The input for this method should be a Google Sheet that contains the + # following columns: + # flow_display_name: The name of the Dialogflow CX Flow + # utterance: The user utterance to test + # page_display_name: The display name of the Dialogflow CX page that + # the eval test should start on. If not provided, START_PAGE is + # assumed. + # expected_intent: The Intent Display Name that is expected to trigger + # for the given eval test. + # expected_parameters: Optional parameters expected to be collected + # for the given eval test. + # source: Optional source of the eval dataa. + + # Args: + # input_format: The input format of the file. ONEOF: `csv`, `gsheet` + # gsheet_name: Title of the Google Sheet where the data lives + # gsheet_tab: Title of the Tab on the Sheet where the data lives + # file_path: Optional file path if `csv` format is used + # start_page_flow: In the case of a special page like START_PAGE, when no + # additional flow information is provided, the script will default to + # this Flow Display Name. Default value is Default Start Flow. + + # Returns: + # A formatted DataFrame ready to be used for multithreaded testing + # """ + # if input_format == "csv": + # if not file_path: + # raise ValueError( + # "Must provide file_path with `csv` format." + # ) + # df = pd.read_csv( + # file_path, + # usecols=[ + # "flow_display_name", + # "utterance", + # "page_display_name", + # "expected_intent", + # "expected_parameters", + # "source" + # ], + # ) + + # elif input_format == "gsheet": + # if not gsheet_name and not gsheet_tab: + # raise ValueError( + # "Must provide `gsheet_name` and `gsheet_tab` with `gsheet` " + # "format." + # ) + + # df = self._dffx.sheets_to_dataframe(gsheet_name, gsheet_tab) + + # df["input_source"] = gsheet_tab + # df = self._clean_dataframe(df) + + # return df + def get_flow_display_name_mapping( self, df: pd.DataFrame, @@ -228,40 +246,6 @@ def get_flow_display_name_mapping( return df - def run_tests( - self, df: pd.DataFrame, chunk_size: int = 300, rate_limit: float = 20 - ) -> pd.DataFrame: - """Tests a set of utterances for intent detection against a CX Agent. - - This function uses Python Threading to run tests in parallel to - expedite intent detection testing for Dialogflow CX agents. The default - quota for Text requests/min is 1200. Ref: - https://cloud.google.com/dialogflow/quotas#table - """ - - results = self._dc.run_intent_detection( - test_set=df, chunk_size=chunk_size, rate_limit=rate_limit - ) - - if "agent_display_name" in df.columns: - temp_column = results.pop("agent_display_name") - results.insert(len(results.columns), "agent_display_name", temp_column) - - if "data_source" in df.columns: - temp_column = results.pop("data_source") - results.insert(len(results.columns), "data_source", temp_column) - - if "sheet_source" in df.columns: - temp_column = results.pop("sheet_source") - results.insert(len(results.columns), "sheet_source", temp_column) - - # When a NO_MATCH occurs, the detected_intent field will be blank - # this replaces with NO_MATCH string, which will allow for easier stats - # calculation downstream - results.detected_intent.replace({'': 'NO_MATCH'}, inplace=True) - - return results - def generate_report( self, results: pd.DataFrame, @@ -295,7 +279,7 @@ def generate_report( timestamp = report_timestamp test_agent = results.agent_display_name.unique()[0] flow_display_name = results.flow_display_name.unique()[0] - data_source = results.sheet_source.unique()[0] + data_source = results.input_source.unique()[0] # Get Failure list of Utterance / Page pairs failure_list = [] @@ -398,21 +382,11 @@ def append_test_results_to_sheets( sheet.append_rows(result_list, value_input_option="USER_ENTERED") - def run_evals(self, google_sheet_name: str, google_sheet_tab: str, + def generate_report(google_sheet_name: str, google_sheet_tab: str, google_sheet_output_tab: str, google_sheet_summary_tab: str, eval_run_display_name: str = "Evals", append=False): - """Run the full Eval dataset.""" - logsx = "-" * 10 - - logging.info(f"{logsx} STARTING {eval_run_display_name} {logsx}") - + """""" report_timestamp = datetime.datetime.now() - - df = self.format_preprocessed_conversation_logs( - "gsheet", google_sheet_name, google_sheet_tab - ) - df = self.get_flow_display_name_mapping(df, self.agent_id) - df_results = self.run_tests(df) df_report = self.generate_report(df_results, report_timestamp) self.write_report_summary_to_log( @@ -428,6 +402,27 @@ def run_evals(self, google_sheet_name: str, google_sheet_tab: str, self.write_test_results_to_sheets( df_results, google_sheet_name, google_sheet_output_tab ) + + def run_evals(self, df: pd.DataFrame, chunk_size: int = 300, + rate_limit: float = 10.0, + eval_run_display_name: str = "Evals"): + """Run the full Eval dataset.""" + logsx = "-" * 10 + + logging.info(f"{logsx} STARTING {eval_run_display_name} {logsx}") + results = self._dc.run_intent_detection( + test_set=df, chunk_size=chunk_size, rate_limit=rate_limit + ) + + # Reorder Columns + results = results.reindex(columns=OUTPUT_SCHEMA_COLUMNS) + + # When a NO_MATCH occurs, the detected_intent field will be blank + # this replaces with NO_MATCH string, which will allow for easier stats + # calculation downstream + results.detected_intent.replace({'': 'NO_MATCH'}, inplace=True) + logging.info(f"{logsx} {eval_run_display_name} COMPLETE {logsx}") - return df_results + return results + From 97e05e86cd937a9da1013dde5f46dda29ecbff52 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Sun, 10 Sep 2023 19:56:52 -0500 Subject: [PATCH 131/151] feat: add sample input dataset --- data/nlu_evals_sample.csv | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 data/nlu_evals_sample.csv diff --git a/data/nlu_evals_sample.csv b/data/nlu_evals_sample.csv new file mode 100644 index 00000000..6bedd5c4 --- /dev/null +++ b/data/nlu_evals_sample.csv @@ -0,0 +1,19 @@ +flow_display_name,utterance,page_display_name,expected_intent,expected_parameters,source +Default Start Flow,I need to get my order status,START_PAGE,head_intent.order_status,,Demo Tests +Default Start Flow,Trying to check the status of my order,START_PAGE,head_intent.order_status,,Demo Tests +Default Start Flow,I hate this order status agent!,START_PAGE,head_intent.order_status,,Demo Tests +Default Start Flow,Wha'ts the point of ordering anything?,START_PAGE,NO_MATCH,,Demo Tests +Default Start Flow,I was looking at the order of operations yesterday but couldn't figure it out,START_PAGE,NO_MATCH,,Demo Tests +Default Start Flow,Thanks for getting my that status so quickly!,START_PAGE,head_intent.order_status,,Demo Tests +Default Start Flow,I need to make a payment,START_PAGE,head_intent.pay_arrangement,,Demo Tests +Default Start Flow,I'm trying to setup a new payment,START_PAGE,head_intent.pay_arrangement,,Demo Tests +Default Start Flow,Did you get the pavement,START_PAGE,NO_MATCH,,Demo Tests +Default Start Flow,I've been trying to pay my bill all day!,START_PAGE,head_intent.pay_arrangement,,Demo Tests +Default Start Flow,Why can't I get my bill paid at all online?,START_PAGE,head_intent.pay_arrangement,,Demo Tests +Default Start Flow,Why can't I get my bill paid at all online? Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?,START_PAGE,head_intent.pay_arrangement,,Demo Tests +Date Collection,12/13/2022,Collect Date,-,"{'collected_date': +{ ""day"": 13, ""month"": 12, ""year"": 2022 }, 'formatted_date': '13/12/2022'}}",Demo Tests +Proper Names Demo,Patrick Marlow,names,-,"{'people_names': +{ ""original"": ""Patrick Marlow"", ""name"": ""Patrick Marlow"" }}",Demo Tests +Proper Names Demo,Andrew Smith,names,,"{'people_names': +{ ""original"": ""Andrew Smith"", ""name"": ""Andrew Smith"" }}",Demo Tests From c5ba92ae568b05410b162d25ae73c563b752c842 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Sun, 10 Sep 2023 20:24:38 -0500 Subject: [PATCH 132/151] fix: replace print statements with logging.info --- src/dfcx_scrapi/core/conversation.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/dfcx_scrapi/core/conversation.py b/src/dfcx_scrapi/core/conversation.py index 191d8216..5e8a9da1 100644 --- a/src/dfcx_scrapi/core/conversation.py +++ b/src/dfcx_scrapi/core/conversation.py @@ -159,10 +159,9 @@ def progress_bar(current, total, bar_length=50, type_="Progress"): percent = float(current) * 100 / total arrow = "-" * int(percent / 100 * bar_length - 1) + ">" spaces = " " * (bar_length - len(arrow)) - print( - f"{type_}({current}/{total})" + f"[{arrow}{spaces}] {percent:.2f}%", - end="\r", - ) + logging.info( + f"{type_}({current}/{total})" + f"[{arrow}{spaces}] {percent:.2f}%" + ) @staticmethod def _build_query_params_object(parameters, current_page, disable_webhook): @@ -371,7 +370,7 @@ def set_agent_env(self, param, value): self.agent_env[param] = value def checkpoint(self, msg=None, start=False): - """Print a checkpoint to time progress and debug bottleneck""" + """Log a checkpoint to time progress and debug bottleneck""" if start: start_time = time.perf_counter() self.start_time = start_time @@ -380,7 +379,7 @@ def checkpoint(self, msg=None, start=False): duration = round((time.perf_counter() - start_time), 2) if duration > 2: if msg: - print(f"{duration:0.2f}s {msg}") + logging.info(f"{duration:0.2f}s {msg}") @scrapi_base.api_call_counter_decorator def reply( From afe3ee1df2b1d8213566bd35fa65a0697f185734 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Sun, 10 Sep 2023 20:25:03 -0500 Subject: [PATCH 133/151] fix: replace default input schema columns --- data/nlu_evals_sample.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/nlu_evals_sample.csv b/data/nlu_evals_sample.csv index 6bedd5c4..f6d1e45e 100644 --- a/data/nlu_evals_sample.csv +++ b/data/nlu_evals_sample.csv @@ -1,4 +1,4 @@ -flow_display_name,utterance,page_display_name,expected_intent,expected_parameters,source +flow_display_name,utterance,page_display_name,expected_intent,expected_parameters,description Default Start Flow,I need to get my order status,START_PAGE,head_intent.order_status,,Demo Tests Default Start Flow,Trying to check the status of my order,START_PAGE,head_intent.order_status,,Demo Tests Default Start Flow,I hate this order status agent!,START_PAGE,head_intent.order_status,,Demo Tests From 4f7a41e780fb2ec68ad702ba26634baa8789e9c9 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Sun, 10 Sep 2023 20:25:46 -0500 Subject: [PATCH 134/151] fix: clean up dataframe schemas; drop unused code --- src/dfcx_scrapi/tools/nlu_evals.py | 87 +++++------------------------- 1 file changed, 12 insertions(+), 75 deletions(-) diff --git a/src/dfcx_scrapi/tools/nlu_evals.py b/src/dfcx_scrapi/tools/nlu_evals.py index 8bd43524..1e84956d 100644 --- a/src/dfcx_scrapi/tools/nlu_evals.py +++ b/src/dfcx_scrapi/tools/nlu_evals.py @@ -41,9 +41,7 @@ "utterance", "expected_intent", "expected_parameters", - "agent_display_name", - "data_source", - "input_source", + "description", ] OUTPUT_SCHEMA_COLUMNS = [ @@ -58,7 +56,7 @@ "parameters_set", "detected_intent", "agent_display_name", - "data_source", + "description", "input_source" ] @@ -105,15 +103,19 @@ def _clean_dataframe(self, df): df = df.replace("Start Page", "START_PAGE") df.rename( columns={ - "source": "data_source", + "source": "description", }, inplace=True, ) - df["agent_display_name"] = self._a.get_agent(self.agent_id).display_name - # Validate input schema - df = df[INPUT_SCHEMA_COLUMNS] + try: + df = df[INPUT_SCHEMA_COLUMNS] + except KeyError: + raise UserWarning("Ensure your input data contains the following "\ + f"columns: {INPUT_SCHEMA_COLUMNS}") + + df["agent_display_name"] = self._a.get_agent(self.agent_id).display_name return df @@ -126,84 +128,19 @@ def process_input_csv(self, input_file_path: str): """Process the input data in CSV format.""" df = pd.read_csv(input_file_path) df = df.fillna('') - df["input_source"] = input_file_path df = self._clean_dataframe(df) + df["input_source"] = input_file_path return df def process_input_google_sheet(self, gsheet_name: str, gsheet_tab: str): """Process the input data in Google Sheets format.""" df = self._dffx.sheets_to_dataframe(gsheet_name, gsheet_tab) - df["input_source"] = gsheet_tab df = self._clean_dataframe(df) + df["input_source"] = gsheet_tab return df - # def format_preprocessed_conversation_logs( - # self, - # input_format: str = "gsheet", - # gsheet_name: str = None, - # gsheet_tab: str = None, - # file_path: str = None, - # ) -> pd.DataFrame: - # """Transforms preprocssed data to dataframe for eval testing. - - # The input for this method should be a Google Sheet that contains the - # following columns: - # flow_display_name: The name of the Dialogflow CX Flow - # utterance: The user utterance to test - # page_display_name: The display name of the Dialogflow CX page that - # the eval test should start on. If not provided, START_PAGE is - # assumed. - # expected_intent: The Intent Display Name that is expected to trigger - # for the given eval test. - # expected_parameters: Optional parameters expected to be collected - # for the given eval test. - # source: Optional source of the eval dataa. - - # Args: - # input_format: The input format of the file. ONEOF: `csv`, `gsheet` - # gsheet_name: Title of the Google Sheet where the data lives - # gsheet_tab: Title of the Tab on the Sheet where the data lives - # file_path: Optional file path if `csv` format is used - # start_page_flow: In the case of a special page like START_PAGE, when no - # additional flow information is provided, the script will default to - # this Flow Display Name. Default value is Default Start Flow. - - # Returns: - # A formatted DataFrame ready to be used for multithreaded testing - # """ - # if input_format == "csv": - # if not file_path: - # raise ValueError( - # "Must provide file_path with `csv` format." - # ) - # df = pd.read_csv( - # file_path, - # usecols=[ - # "flow_display_name", - # "utterance", - # "page_display_name", - # "expected_intent", - # "expected_parameters", - # "source" - # ], - # ) - - # elif input_format == "gsheet": - # if not gsheet_name and not gsheet_tab: - # raise ValueError( - # "Must provide `gsheet_name` and `gsheet_tab` with `gsheet` " - # "format." - # ) - - # df = self._dffx.sheets_to_dataframe(gsheet_name, gsheet_tab) - - # df["input_source"] = gsheet_tab - # df = self._clean_dataframe(df) - - # return df - def get_flow_display_name_mapping( self, df: pd.DataFrame, From b479d3f2a24bcebc1e22c318e51a0084feb0ed0c Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Sun, 10 Sep 2023 20:26:23 -0500 Subject: [PATCH 135/151] chore: cleanup unused code --- src/dfcx_scrapi/tools/nlu_evals.py | 42 ------------------------------ 1 file changed, 42 deletions(-) diff --git a/src/dfcx_scrapi/tools/nlu_evals.py b/src/dfcx_scrapi/tools/nlu_evals.py index 1e84956d..9e7e5cc6 100644 --- a/src/dfcx_scrapi/tools/nlu_evals.py +++ b/src/dfcx_scrapi/tools/nlu_evals.py @@ -141,48 +141,6 @@ def process_input_google_sheet(self, gsheet_name: str, gsheet_tab: str): return df - def get_flow_display_name_mapping( - self, - df: pd.DataFrame, - agent_id: str, - start_page_flow: str = "Default Start Flow", - ) -> pd.DataFrame: - """Retrieve Page/Flow Display Name Map. - - If a Flow Display Name is not provided, this method will attempt to - infer the correct Flow Display Name basd on the provided Page Display - Name. If a Flow Display Name is already provided, the method will honor - this user input. - """ - - flows_map = self._f.get_flows_map(agent_id) - - all_pages = {} - all_pages[ - "START_PAGE" - ] = start_page_flow # Case where source_page is the first turn - for flow in flows_map: - temp_pages = list(self._p.get_pages_map(flow, reverse=True).keys()) - for page in temp_pages: - all_pages[page] = flows_map[flow] - - # Fill blank flow names with the inferred one from the list of pages - # Otherwise use the user specified flow name - # NOTE: If multiple pages with the same name exist across different - # flows and Flow Display Name is not provided, the inferred Flow could - # be incorrect as the map will pick the first Flow encountered. - df["flow_display_name"] = df.apply( - lambda row: row["flow_display_name"] - if all([ - row["flow_display_name"] != "", - row["flow_display_name"] is not None - ]) - else all_pages[row["page_display_name"]], - axis=1, - ) - - return df - def generate_report( self, results: pd.DataFrame, From a8022a0ce2a436ba6e030703059f3369b9c259d3 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Sun, 10 Sep 2023 21:46:17 -0500 Subject: [PATCH 136/151] feat: add support for CSV as input; refactor code --- src/dfcx_scrapi/tools/nlu_evals.py | 303 ++++++++++++++--------------- 1 file changed, 146 insertions(+), 157 deletions(-) diff --git a/src/dfcx_scrapi/tools/nlu_evals.py b/src/dfcx_scrapi/tools/nlu_evals.py index 9e7e5cc6..33496e97 100644 --- a/src/dfcx_scrapi/tools/nlu_evals.py +++ b/src/dfcx_scrapi/tools/nlu_evals.py @@ -15,6 +15,7 @@ # limitations under the License. from typing import Dict +from dataclasses import dataclass import logging import datetime @@ -60,6 +61,18 @@ "input_source" ] +SUMMARY_SCHEMA_COLUMNS = [ + "test_run_timestamp", + "total_tests", + "pass_count", + "pass_rate", + "no_match_count", + "no_match_rate", + "test_agent", + "flow_display_name", + "data_source" + ] + # logging config logging.basicConfig( level=logging.INFO, @@ -68,6 +81,18 @@ ) +@dataclass +class Stats: + """Dataclass for the summary stats.""" + no_match_count: int = 0 + no_match_rate: float = 0.0 + pass_count: int = 0 + pass_rate: float = 0.0 + test_agent: str = None + flow_display_name: str = None + data_source: str = None + + class NluEvals(scrapi_base.ScrapiBase): """NLU Evaluation Class for Dialogflow CX Testing.""" def __init__( @@ -86,6 +111,7 @@ def __init__( ) self.agent_id = agent_id + self._sheets_client = self._build_sheets_client() self._a = agents.Agents(creds=self.creds) self._i = intents.Intents(creds=self.creds) @@ -96,150 +122,40 @@ def __init__( ) self._dffx = dataframe_functions.DataframeFunctions(creds=self.creds) - - def _clean_dataframe(self, df): - """Various Dataframe cleaning functions.""" - df.columns = df.columns.str.lower() - df = df.replace("Start Page", "START_PAGE") - df.rename( - columns={ - "source": "description", - }, - inplace=True, - ) - - # Validate input schema - try: - df = df[INPUT_SCHEMA_COLUMNS] - except KeyError: - raise UserWarning("Ensure your input data contains the following "\ - f"columns: {INPUT_SCHEMA_COLUMNS}") - - df["agent_display_name"] = self._a.get_agent(self.agent_id).display_name - - return df - def _build_sheets_client(self): client = gspread.authorize(self.creds) return client - def process_input_csv(self, input_file_path: str): - """Process the input data in CSV format.""" - df = pd.read_csv(input_file_path) - df = df.fillna('') - df = self._clean_dataframe(df) - df["input_source"] = input_file_path - - return df - - def process_input_google_sheet(self, gsheet_name: str, gsheet_tab: str): - """Process the input data in Google Sheets format.""" - df = self._dffx.sheets_to_dataframe(gsheet_name, gsheet_tab) - df = self._clean_dataframe(df) - df["input_source"] = gsheet_tab - - return df - - def generate_report( - self, - results: pd.DataFrame, - report_timestamp: datetime.datetime, - ): - """Generates a printable report and dataframe. - - Args: - results: Input dataframe of testing results from run_tests method. - - Returns: - A dataframe with report summary stats. - """ - # Calc fields - failed_df = results[results.detected_intent != results.expected_intent] - no_match_count = ( - results[results.detected_intent == "NO_MATCH"] + def _calculate_stats(self, df: pd.DataFrame): + """Calculate all the stats needed for the summary report.""" + stats = Stats() + stats.no_match_count = ( + df[df.detected_intent == "NO_MATCH"] .groupby("detected_intent") .size() .sum() ) - no_match_df = results[results.detected_intent == "NO_MATCH"] - no_match_rate = no_match_count / results.shape[0] - pass_count = ( - results[results.detected_intent == results.expected_intent] + stats.no_match_rate = stats.no_match_count / df.shape[0] + stats.pass_count = ( + df[df.detected_intent == df.expected_intent] .groupby("detected_intent") .size() .sum() ) - pass_rate = pass_count / results.shape[0] - timestamp = report_timestamp - test_agent = results.agent_display_name.unique()[0] - flow_display_name = results.flow_display_name.unique()[0] - data_source = results.input_source.unique()[0] - - # Get Failure list of Utterance / Page pairs - failure_list = [] - for _, row in failed_df.iterrows(): - failure_list.append( - [ - row["utterance"], - row["flow_display_name"], - row["page_display_name"], - row["expected_intent"], - row["detected_intent"], - row["expected_parameters"], - row["parameters_set"], - ] - ) + stats.pass_rate = stats.pass_count / df.shape[0] + stats.test_agent = df.agent_display_name.unique()[0] + stats.flow_display_name = df.flow_display_name.unique()[0] + stats.data_source = df.input_source.unique()[0] - # Generate Dataframe format - df_report = pd.DataFrame( - columns=[ - "test_run_timestamp", - "total_tests", - "pass_count", - "pass_rate", - "no_match_count", - "no_match_rate", - "test_agent", - "flow_display_name", - "data_source", - ], - data=[ - [ - timestamp, - results.shape[0], - pass_count, - pass_rate, - no_match_count, - no_match_rate, - test_agent, - flow_display_name, - data_source, - ] - ], - ) + return stats - # Printable Report Format - print("---------- RESULTS ----------") - print(f"Test Agent: {test_agent}") - print(f"Total Tests: {results.shape[0]}") - print(f"Pass Count: {pass_count}") - print(f"Pass Rate: {pass_rate:.2%}") - print(f"No Match Count: {no_match_count}") - print(f"No Match Rate: {no_match_rate:.2%}") - print(f"Test Run Timestamp: {timestamp}") - print(f"Test Set Data Source: {data_source}") - print("\n") - - return df_report - - def write_report_summary_to_log( + def _write_report_summary_to_sheets( self, df: pd.DataFrame, sheet_name: str, sheet_tab: str ): """Writes the output report summary to Google Sheets.""" - client = self._build_sheets_client() - gsheet = client.open(sheet_name) + gsheet = self._sheets_client.open(sheet_name) sheet = gsheet.worksheet(sheet_tab) df["test_run_timestamp"] = df.test_run_timestamp.astype("str") @@ -248,26 +164,12 @@ def write_report_summary_to_log( df.values.flatten().tolist(), value_input_option="USER_ENTERED" ) - def write_test_results_to_sheets( - self, results: pd.DataFrame, sheet_name: str, sheet_tab: str - ): - """Writes the output result details to Google Sheets.""" - - client = self._build_sheets_client() - gsheet = client.open(sheet_name) - sheet = gsheet.worksheet(sheet_tab) - - sheet.clear() - - self._dffx.dataframe_to_sheets(sheet_name, sheet_tab, results) - - def append_test_results_to_sheets( + def _append_test_results_to_sheets( self, results: pd.DataFrame, sheet_name: str, sheet_tab: str ): """Adds results to an existing Google Sheet collection.""" - client = self._build_sheets_client() - gsheet = client.open(sheet_name) + gsheet = self._sheets_client.open(sheet_name) sheet = gsheet.worksheet(sheet_tab) # Fixes an error that sometimes happens when trying to write parameters @@ -277,27 +179,57 @@ def append_test_results_to_sheets( sheet.append_rows(result_list, value_input_option="USER_ENTERED") - def generate_report(google_sheet_name: str, google_sheet_tab: str, - google_sheet_output_tab: str, google_sheet_summary_tab: str, - eval_run_display_name: str = "Evals", append=False): - """""" - report_timestamp = datetime.datetime.now() - df_report = self.generate_report(df_results, report_timestamp) + def _write_test_results_to_sheets( + self, results: pd.DataFrame, sheet_name: str, sheet_tab: str + ): + """Writes the output result details to Google Sheets.""" - self.write_report_summary_to_log( - df_report, google_sheet_name, google_sheet_summary_tab - ) + gsheet = self._sheets_client.open(sheet_name) + sheet = gsheet.worksheet(sheet_tab) - if append: - self.append_test_results_to_sheets( - df_results, google_sheet_name, google_sheet_output_tab - ) + sheet.clear() - else: - self.write_test_results_to_sheets( - df_results, google_sheet_name, google_sheet_output_tab + self._dffx.dataframe_to_sheets(sheet_name, sheet_tab, results) + + def _clean_dataframe(self, df): + """Various Dataframe cleaning functions.""" + df.columns = df.columns.str.lower() + df = df.replace("Start Page", "START_PAGE") + df.rename( + columns={ + "source": "description", + }, + inplace=True, ) + # Validate input schema + try: + df = df[INPUT_SCHEMA_COLUMNS] + except KeyError: + raise UserWarning("Ensure your input data contains the following "\ + f"columns: {INPUT_SCHEMA_COLUMNS}") + + df["agent_display_name"] = self._a.get_agent(self.agent_id).display_name + + return df + + def process_input_csv(self, input_file_path: str): + """Process the input data in CSV format.""" + df = pd.read_csv(input_file_path) + df = df.fillna('') + df = self._clean_dataframe(df) + df["input_source"] = input_file_path + + return df + + def process_input_google_sheet(self, gsheet_name: str, gsheet_tab: str): + """Process the input data in Google Sheets format.""" + df = self._dffx.sheets_to_dataframe(gsheet_name, gsheet_tab) + df = self._clean_dataframe(df) + df["input_source"] = gsheet_tab + + return df + def run_evals(self, df: pd.DataFrame, chunk_size: int = 300, rate_limit: float = 10.0, eval_run_display_name: str = "Evals"): @@ -321,3 +253,60 @@ def run_evals(self, df: pd.DataFrame, chunk_size: int = 300, return results + def generate_report(self, df: pd.DataFrame, + report_timestamp: datetime.datetime + ): + """Generates a summary stats report for most recent NLU Eval tests.""" + # Calc fields + stats = self._calculate_stats(df) + + # Generate Dataframe format + df_report = pd.DataFrame( + columns=SUMMARY_SCHEMA_COLUMNS, + data=[ + [ + report_timestamp, + df.shape[0], + stats.pass_count, + stats.pass_rate, + stats.no_match_count, + stats.no_match_rate, + stats.test_agent, + stats.flow_display_name, + stats.data_source, + ] + ], + ) + + return df_report + + def write_summary_to_file(self, df: pd.DataFrame, output_file: str): + """Write summary output to a local CSV file.""" + report_timestamp = datetime.datetime.now() + df_report = self.generate_report(df, report_timestamp) + df_report.to_csv(output_file, index=False) + + def write_results_to_file(self, df: pd.DataFrame, output_file: str): + df.to_csv(output_file, index=False) + + def write_results_to_sheets(self, df: pd.DataFrame, google_sheet_name: str, + full_output_tab: str, + summary_tab: str, + append=False): + """Write summary and detailed output to Google Sheets.""" + report_timestamp = datetime.datetime.now() + df_report = self.generate_report(df, report_timestamp) + + self._write_report_summary_to_sheets( + df_report, google_sheet_name, summary_tab + ) + + if append: + self._append_test_results_to_sheets( + df, google_sheet_name, full_output_tab + ) + + else: + self._write_test_results_to_sheets( + df, google_sheet_name, full_output_tab + ) From ee5a3575cf10602e70920c9200bd70f2639cfaeb Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Sun, 10 Sep 2023 21:50:06 -0500 Subject: [PATCH 137/151] fix: lint fixes --- src/dfcx_scrapi/tools/nlu_evals.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dfcx_scrapi/tools/nlu_evals.py b/src/dfcx_scrapi/tools/nlu_evals.py index 33496e97..d74639c7 100644 --- a/src/dfcx_scrapi/tools/nlu_evals.py +++ b/src/dfcx_scrapi/tools/nlu_evals.py @@ -205,9 +205,9 @@ def _clean_dataframe(self, df): # Validate input schema try: df = df[INPUT_SCHEMA_COLUMNS] - except KeyError: + except KeyError as err: raise UserWarning("Ensure your input data contains the following "\ - f"columns: {INPUT_SCHEMA_COLUMNS}") + f"columns: {INPUT_SCHEMA_COLUMNS}") from err df["agent_display_name"] = self._a.get_agent(self.agent_id).display_name @@ -216,7 +216,7 @@ def _clean_dataframe(self, df): def process_input_csv(self, input_file_path: str): """Process the input data in CSV format.""" df = pd.read_csv(input_file_path) - df = df.fillna('') + df = df.fillna("") df = self._clean_dataframe(df) df["input_source"] = input_file_path @@ -247,7 +247,7 @@ def run_evals(self, df: pd.DataFrame, chunk_size: int = 300, # When a NO_MATCH occurs, the detected_intent field will be blank # this replaces with NO_MATCH string, which will allow for easier stats # calculation downstream - results.detected_intent.replace({'': 'NO_MATCH'}, inplace=True) + results.detected_intent.replace({"": "NO_MATCH"}, inplace=True) logging.info(f"{logsx} {eval_run_display_name} COMPLETE {logsx}") From e5503949907512bacf5d5706317b491e19f5b88f Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Mon, 11 Sep 2023 11:43:55 -0500 Subject: [PATCH 138/151] feat: add notebook for demonstrating NLU pipeline --- .../nlu_evaluation_testing.ipynb | 419 ++++++++++++++++++ 1 file changed, 419 insertions(+) create mode 100644 examples/nlu_analysis_series/nlu_evaluation_testing.ipynb diff --git a/examples/nlu_analysis_series/nlu_evaluation_testing.ipynb b/examples/nlu_analysis_series/nlu_evaluation_testing.ipynb new file mode 100644 index 00000000..e8839d50 --- /dev/null +++ b/examples/nlu_analysis_series/nlu_evaluation_testing.ipynb @@ -0,0 +1,419 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Copyright 2023 Google LLC\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# NLU Evaluation Testing\n", + "In this notebook, we will show you how to perform bulk NLU testing by providing a large input corpus and receiving the predicted Intent and Parameter extraction results from your agent.\n", + "\n", + "## Prerequisites\n", + "- Ensure you have a GCP Service Account key with the Dialogflow API Admin privileges assigned to it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# If you haven't already, make sure you install the `dfcx-scrapi` library\n", + "\n", + "!pip install dfcx-scrapi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "from dfcx_scrapi.tools.nlu_evals import NluEvals" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# (Option 1) Google Sheet as Input\n", + "The primary option for running the NLU Eval pipeline revolves around using Google Sheets as a source for your data. \n", + "This method provides a simple, streamlined system that can pull and push data all into a single Google Sheet.\n", + "\n", + "In order to run the full NLU evaluation test, the following inputs are needed:\n", + "- `agent_id`, The Dialogflow CX Agent ID.\n", + "- `input_google_sheet`, the Display Name of the Google Sheet.\n", + "- `input_google_sheet_tab`, the Display Name of the tab on the Google Sheet where your input data lives.\n", + "- `output_google_sheet_results`, the Display Name of the tab on the Google Sheet where you want the full output results to be written.\n", + "- `output_google_sheet_summary`, the Display Name of the tab on the Google Sheet where you want the report summary to be written.\n", + "\n", + "_**NOTE** - In order for your Service Account to access your Google Sheet (read / write) you need to share the Google Sheet with your Service Account email address._" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "agent_id = ''\n", + "creds_path = ''\n", + "\n", + "# Sample Inputs\n", + "input_google_sheet = 'Dialogflow CX SCRAPI - NLU Eval Sample Dataset'\n", + "input_google_sheet_tab = 'input_dataset'\n", + "output_google_sheet_results = 'results'\n", + "output_google_sheet_summary = 'summary'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run NLU Evals\n", + "There are 3 main stages that happen for the Eval Pipeline:\n", + "1. Process and validate the input data\n", + "2. Run the Eval Tests\n", + "3. Write the output summary and details to a report." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-11 11:40:31 INFO ---------- STARTING Evals ----------\n", + "2023-09-11 11:40:37 WARNING Text input is too long. Truncating to 256 characters.\n", + "2023-09-11 11:40:37 WARNING TRUNCATED TEXT: Why can't I get my bill paid at all online? Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all onli\n", + "2023-09-11 11:40:41 INFO Progress(0/15)[> ] 0.00%\n", + "2023-09-11 11:40:51 INFO Progress(15/15)[------------------------------------------------->] 100.00%\n", + "2023-09-11 11:40:51 INFO ---------- Evals COMPLETE ----------\n" + ] + } + ], + "source": [ + "nlu = NluEvals(agent_id, creds_path=creds_path)\n", + "\n", + "df = nlu.process_input_google_sheet(input_google_sheet, input_google_sheet_tab)\n", + "df = nlu.run_evals(df)\n", + "nlu.write_results_to_sheets(df, input_google_sheet, output_google_sheet_results, output_google_sheet_summary)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Inspect Results Locally\n", + "You can also inspect and filter the results of your tests locally as needed." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
flow_display_namepage_display_nameutteranceexpected_intentexpected_parameterstarget_pagematch_typeconfidenceparameters_setdetected_intentagent_display_namedescriptioninput_source
0Default Start FlowSTART_PAGEI need to get my order statushead_intent.order_statussentiment_routerINTENT1.000000head_intent.order_status[Demo] Multi Demo Extravaganza Part Deux: The RevengeDemo Testsinput_dataset
1Default Start FlowSTART_PAGETrying to check the status of my orderhead_intent.order_statussentiment_routerINTENT0.947959head_intent.order_status[Demo] Multi Demo Extravaganza Part Deux: The RevengeDemo Testsinput_dataset
2Default Start FlowSTART_PAGEI hate this order status agent!head_intent.order_statussentiment_routerINTENT0.955709head_intent.order_status[Demo] Multi Demo Extravaganza Part Deux: The RevengeDemo Testsinput_dataset
3Default Start FlowSTART_PAGEWha'ts the point of ordering anything?NO_MATCHsentiment_routerINTENT0.841712head_intent.order_status[Demo] Multi Demo Extravaganza Part Deux: The RevengeDemo Testsinput_dataset
4Default Start FlowSTART_PAGEI was looking at the order of operations yesterday but couldn't figure it outNO_MATCHsentiment_routerINTENT0.790275head_intent.order_status[Demo] Multi Demo Extravaganza Part Deux: The RevengeDemo Testsinput_dataset
\n", + "
" + ], + "text/plain": [ + " flow_display_name page_display_name \\\n", + "0 Default Start Flow START_PAGE \n", + "1 Default Start Flow START_PAGE \n", + "2 Default Start Flow START_PAGE \n", + "3 Default Start Flow START_PAGE \n", + "4 Default Start Flow START_PAGE \n", + "\n", + " utterance \\\n", + "0 I need to get my order status \n", + "1 Trying to check the status of my order \n", + "2 I hate this order status agent! \n", + "3 Wha'ts the point of ordering anything? \n", + "4 I was looking at the order of operations yesterday but couldn't figure it out \n", + "\n", + " expected_intent expected_parameters target_page match_type \\\n", + "0 head_intent.order_status sentiment_router INTENT \n", + "1 head_intent.order_status sentiment_router INTENT \n", + "2 head_intent.order_status sentiment_router INTENT \n", + "3 NO_MATCH sentiment_router INTENT \n", + "4 NO_MATCH sentiment_router INTENT \n", + "\n", + " confidence parameters_set detected_intent \\\n", + "0 1.000000 head_intent.order_status \n", + "1 0.947959 head_intent.order_status \n", + "2 0.955709 head_intent.order_status \n", + "3 0.841712 head_intent.order_status \n", + "4 0.790275 head_intent.order_status \n", + "\n", + " agent_display_name description \\\n", + "0 [Demo] Multi Demo Extravaganza Part Deux: The Revenge Demo Tests \n", + "1 [Demo] Multi Demo Extravaganza Part Deux: The Revenge Demo Tests \n", + "2 [Demo] Multi Demo Extravaganza Part Deux: The Revenge Demo Tests \n", + "3 [Demo] Multi Demo Extravaganza Part Deux: The Revenge Demo Tests \n", + "4 [Demo] Multi Demo Extravaganza Part Deux: The Revenge Demo Tests \n", + "\n", + " input_source \n", + "0 input_dataset \n", + "1 input_dataset \n", + "2 input_dataset \n", + "3 input_dataset \n", + "4 input_dataset " + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# (Option 2) CSV as Input\n", + "Similar to the above pipeline, except we will process the input data from a CSV file.\n", + "\n", + "For the output to local files, you will need to define 2 output destinations:\n", + "1. An output file for the full detailed results\n", + "2. An output file for the report summary\n", + "\n", + "- `agent_id`, The Dialogflow CX Agent ID.\n", + "- `input_path`, The local path where your input data lives\n", + "- `output_summary_path`, The local path where you want the report summary written\n", + "- `output_results_path`, The local path where you want the full results written\n", + "\n", + "You can find a [Sample CSV Dataset here.](https://github.com/GoogleCloudPlatform/dfcx-scrapi/blob/main/data/nlu_evals_sample.csv)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "agent_id = ''\n", + "\n", + "input_path = '/path/to/your/input/data.csv'\n", + "output_summary_path = '/path/to/your/output/summary.csv'\n", + "output_results_path = '/path/to/your/output/results.csv'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run NLU Evals\n", + "\n", + "There are 3 main stages that happen for the Eval Pipeline:\n", + "1. Process and validate the input data\n", + "2. Run the Eval Tests\n", + "3. Write the output summary and details to a report." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-11 11:31:51 INFO ---------- STARTING Evals ----------\n", + "2023-09-11 11:31:56 WARNING Text input is too long. Truncating to 256 characters.\n", + "2023-09-11 11:31:56 WARNING TRUNCATED TEXT: Why can't I get my bill paid at all online? Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all onli\n", + "2023-09-11 11:31:58 INFO Progress(0/15)[> ] 0.00%\n", + "2023-09-11 11:32:08 INFO Progress(15/15)[------------------------------------------------->] 100.00%\n", + "2023-09-11 11:32:08 INFO ---------- Evals COMPLETE ----------\n" + ] + } + ], + "source": [ + "nlu = NluEvals(agent_id, creds_path=creds_path)\n", + "\n", + "df = nlu.process_input_csv(input_path)\n", + "df = nlu.run_evals(df)\n", + "nlu.write_summary_to_file(df, output_summary_path)\n", + "nlu.write_results_to_file(df, output_results_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "scrapi-local", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 88c7fbee2f3a538f4e6cf2afd5a0b8a9aa2ba0fd Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Mon, 11 Sep 2023 11:58:57 -0500 Subject: [PATCH 139/151] fix: adjust output columns --- src/dfcx_scrapi/tools/nlu_evals.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/dfcx_scrapi/tools/nlu_evals.py b/src/dfcx_scrapi/tools/nlu_evals.py index d74639c7..d8c84977 100644 --- a/src/dfcx_scrapi/tools/nlu_evals.py +++ b/src/dfcx_scrapi/tools/nlu_evals.py @@ -69,7 +69,6 @@ "no_match_count", "no_match_rate", "test_agent", - "flow_display_name", "data_source" ] @@ -89,7 +88,6 @@ class Stats: pass_count: int = 0 pass_rate: float = 0.0 test_agent: str = None - flow_display_name: str = None data_source: str = None @@ -145,7 +143,6 @@ def _calculate_stats(self, df: pd.DataFrame): ) stats.pass_rate = stats.pass_count / df.shape[0] stats.test_agent = df.agent_display_name.unique()[0] - stats.flow_display_name = df.flow_display_name.unique()[0] stats.data_source = df.input_source.unique()[0] return stats @@ -272,7 +269,6 @@ def generate_report(self, df: pd.DataFrame, stats.no_match_count, stats.no_match_rate, stats.test_agent, - stats.flow_display_name, stats.data_source, ] ], From 95c6fff4b82cfae74b8c7d73fd44fa6c5c4c4d86 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Mon, 11 Sep 2023 12:03:26 -0500 Subject: [PATCH 140/151] fix: update sample link --- examples/nlu_analysis_series/nlu_evaluation_testing.ipynb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/nlu_analysis_series/nlu_evaluation_testing.ipynb b/examples/nlu_analysis_series/nlu_evaluation_testing.ipynb index e8839d50..c9660c9a 100644 --- a/examples/nlu_analysis_series/nlu_evaluation_testing.ipynb +++ b/examples/nlu_analysis_series/nlu_evaluation_testing.ipynb @@ -74,7 +74,9 @@ "- `output_google_sheet_results`, the Display Name of the tab on the Google Sheet where you want the full output results to be written.\n", "- `output_google_sheet_summary`, the Display Name of the tab on the Google Sheet where you want the report summary to be written.\n", "\n", - "_**NOTE** - In order for your Service Account to access your Google Sheet (read / write) you need to share the Google Sheet with your Service Account email address._" + "_**NOTE** - In order for your Service Account to access your Google Sheet (read / write) you need to share the Google Sheet with your Service Account email address._\n", + "\n", + "You can find a [Sample Google Sheet dataset](https://docs.google.com/spreadsheets/d/e/2PACX-1vREvsZAktNvRr78KjUBlZl2PVUHKJru8hRCgmuDi9kn_oDT_weFKkGmyoQwRPdj0JcxK1kNzgceAPA5/pubhtml#) here." ] }, { From 0e81f49d34db434d958ffa3da7c2a25d77335692 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Mon, 11 Sep 2023 12:56:08 -0500 Subject: [PATCH 141/151] chore: update v1.8.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 595c8ce5..159197df 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ setup( name='dfcx-scrapi', - version='1.7.0', + version='1.8.0', description='A high level scripting API for bot builders, developers, and\ maintainers.', long_description=long_description, From 3c147519b96a3bdef0d1bcf0bf832e4229234abc Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Mon, 11 Sep 2023 14:31:05 -0500 Subject: [PATCH 142/151] fix: typo on local path --- src/dfcx_scrapi/agent_extract/agents.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dfcx_scrapi/agent_extract/agents.py b/src/dfcx_scrapi/agent_extract/agents.py index eaa4153d..0bcf8879 100644 --- a/src/dfcx_scrapi/agent_extract/agents.py +++ b/src/dfcx_scrapi/agent_extract/agents.py @@ -125,7 +125,7 @@ def download_and_extract(self, agent_local_path: str, gcs_bucket_uri: str): def process_agent(self, agent_id: str, gcs_bucket_uri: str, environment_display_name: str = None): """Process the specified Agent for offline data gathering.""" - agent_local_path = "tmp/agent" + agent_local_path = "/tmp/agent" self.prep_local_dir(agent_local_path) self.export_agent(agent_id, gcs_bucket_uri, environment_display_name) self.download_and_extract(agent_local_path, gcs_bucket_uri) From 13fead6b053048e50406e3c38133c72e93f1ec32 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Mon, 11 Sep 2023 14:31:29 -0500 Subject: [PATCH 143/151] fix: bump version to 1.8.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 159197df..22d9bc21 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ setup( name='dfcx-scrapi', - version='1.8.0', + version='1.8.1', description='A high level scripting API for bot builders, developers, and\ maintainers.', long_description=long_description, From 40cb7fce6b10703adb132a973687000d89af78f0 Mon Sep 17 00:00:00 2001 From: Diego Alonso Date: Sun, 8 Oct 2023 22:58:39 +0200 Subject: [PATCH 144/151] Add Evaluation Tool for generative conversations Add new ipynb to evaluate the quality of Vertex AI Conversation agents. This tool doesn't rely on GSheet for input and output of data --- ...rtex_ai_conversation_evaluation_tool.ipynb | 335 ++++++++++++++++++ 1 file changed, 335 insertions(+) create mode 100644 examples/vertex_ai_conversation_evaluation_tool.ipynb diff --git a/examples/vertex_ai_conversation_evaluation_tool.ipynb b/examples/vertex_ai_conversation_evaluation_tool.ipynb new file mode 100644 index 00000000..909341d3 --- /dev/null +++ b/examples/vertex_ai_conversation_evaluation_tool.ipynb @@ -0,0 +1,335 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Vertex AI Conversation - Evaluation Tool\n", + "\n", + "This tool requieres user's input in several steps. Please run the cells one by one (Shift+Enter) to ensure all the steps are succesfully completed.\n", + "\n", + "## Instructions:\n", + "\n", + "1. **Set-up**\n", + " 1. First cell: install and import dependencies\n", + " 2. Second cell: authentication - it requieres following the steps in the pop-up window. Alternatively, it can be replaced by other [supported authentication method](https://github.com/GoogleCloudPlatform/dfcx-scrapi#authentication)\n", + " 3. Third cell: introduce values for project, location and agent in the right panel; then run the cell.\n", + " 4. Fourth cell: run examples to validate set-up is correct\n", + "2. **Generate Questions & Answer**\n", + " 1. First cell: save a sample csv file with correct format\n", + " 2. Second cell: upload csv file with the fields `user_query` and an `ideal_answer` for all examples\n", + " 3. Third cell: bulk generation of `agent_answer` that includes the text and link\n", + "3. **Rating**\n", + " 1. First cell: download csv and add the ratings offline\n", + " 2. Second cell: upload csv file with the ratings\n", + "4. **Results**\n", + " 1. First cell: visualize distribution of ratings\n", + "\n", + "This notebook calls `DetectIntent` using [dfcx-scrapi library](https://github.com/GoogleCloudPlatform/dfcx-scrapi) for Dialogflow CX.\n", + "\n", + "\n", + "## Rating guidance:\n", + "\n", + "For each sample (aka row), the rater should evaluate each answer (including ythe link) that was generated by the agent. The answer will be evaluated with a integer number (escalar) from -1 to 3 as following:\n", + "* **+3** : Perfect answer > fully addresses the question with correct information and polite tone\n", + "* **+2** : Good answer > may contain unnecessary info, may miss some info, or may not be perfectly articulated\n", + "* **+1** : Slightly good answer > some truth to the answer\n", + "* **0** : Neutral answer > no answer or answer contains irrelevant info\n", + "* **-1** : Hurtful answer > wrong or misleading info, or inappropriate tone\n", + "\n" + ], + "metadata": { + "id": "WpkyirmC-F33" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Set-up\n" + ], + "metadata": { + "id": "Afvsuux0zaWZ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PPJYRHN83bHg" + }, + "outputs": [], + "source": [ + "# Dependencies\n", + "!pip install dfcx-scrapi --quiet\n", + "\n", + "import io\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from dfcx_scrapi.core.sessions import Sessions\n", + "from google.auth import default\n", + "from google.colab import auth\n", + "from google.colab import files\n", + "from google.protobuf.json_format import MessageToDict\n", + "\n", + "\n", + "def get_agent_answer(user_query):\n", + " s = Sessions()\n", + " session_id = s.build_session_id(agent_id)\n", + " res = MessageToDict(s.detect_intent(agent_id, session_id, user_query)._pb)\n", + "\n", + " answer_text = res['responseMessages'][0]['text']['text'][0]\n", + " answer_link = res['responseMessages'][1]['payload']['richContent'][0][0]['actionLink'] if len(res['responseMessages']) > 1 else ''\n", + "\n", + " return f\"{answer_text} ({answer_link})\"\n" + ] + }, + { + "cell_type": "code", + "source": [ + "# Authentication\n", + "\n", + "auth.authenticate_user()\n", + "creds, _ = default()\n" + ], + "metadata": { + "id": "sztyBjNlIGAw" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Agent config\n", + "project_id = '' #@param{type: 'string'}\n", + "location = 'global' #@param{type: 'string'}\n", + "agent_id = '' #@param{type: 'string'}\n", + "\n", + "agent_id = f\"projects/{project_id}/locations/{location}/agents/{agent_id}\"\n", + "print(agent_id)\n" + ], + "metadata": { + "id": "mRUB0Uf-3uzS" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Test\n", + "user_query = 'Hello World!'\n", + "agent_answer = get_agent_answer(user_query)\n", + "print(f\" Q: {user_query}\\n A: {agent_answer}\")\n", + "\n", + "user_query = 'Which is the cheapest plan?'\n", + "agent_answer = get_agent_answer(user_query)\n", + "print(f\" Q: {user_query}\\n A: {agent_answer}\")\n" + ], + "metadata": { + "id": "OChJbblt3dt7" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Generate Questions & Answer" + ], + "metadata": { + "id": "L2WQime-8-Dw" + } + }, + { + "cell_type": "code", + "source": [ + "# Create sample csv\n", + "\n", + "sample_df = pd.DataFrame({\n", + " \"user_query\": [],\n", + " \"ideal_answer\": [],\n", + " \"agent_answer\": [],\n", + " \"rating\": [],\n", + " \"comment\": []\n", + "})\n", + "\n", + "sample_df.loc[0] = [\"Who are you?\", \"I am an assistant\", \"\", 0, \"\"]\n", + "sample_df.loc[1] = [\"Which is the cheapest plan?\", \"Basic plan\", \"\", 0, \"\"]\n", + "sample_df.loc[2] = [\"My device is not working\", \"Call 888-555\", \"\", 0, \"\"]\n", + "\n", + "# Export to local drive as csv file\n", + "file_name = 'data_sample.csv'\n", + "sample_df.to_csv(file_name, encoding='utf-8-sig', index=False)\n", + "files.download(file_name)\n" + ], + "metadata": { + "id": "q3II66B04F0j" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "input(f\"In your local drive, you can find the csv file '{file_name}' Add the user_query and ideal_answer per example \\nWhen done, click 'Enter'\")\n", + "print('done')\n", + "\n", + "# Import from local drive the csv file with the user_query and ideal_answer per examples\n", + "uploaded = files.upload()\n", + "file_name2 = next(iter(uploaded))\n", + "df = pd.read_csv(io.BytesIO(uploaded[file_name2]))\n", + "\n", + "assert df.shape[0] > 0, \"The csv has zero rows\"\n", + "assert set(df.columns) == set(sample_df.columns), f\"The csv must have the following columns: {sample_df.columns.values}\"\n", + "\n", + "df" + ], + "metadata": { + "id": "OYr4Dy77KbfL" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Generate answers for each query\n", + "df['agent_answer'] = df.apply(lambda row: get_agent_answer(row[\"user_query\"]), axis=1)\n", + "\n", + "df" + ], + "metadata": { + "id": "RmJcxpFI881j" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Rating" + ], + "metadata": { + "id": "yO2x7lc2BRDR" + } + }, + { + "cell_type": "code", + "source": [ + "# Export to local drive as csv file\n", + "file_name = 'output.csv'\n", + "df.to_csv(file_name, encoding='utf-8-sig', index=False)\n", + "files.download(file_name)\n" + ], + "metadata": { + "id": "ZfAMlQbS8qsy" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "input(f\"In your local drive, you can find the csv file '{file_name}' Rate each agent_answer using ideal_answer as reference. Rating from -1 to 3. \\nWhen done, click 'Enter'\")\n", + "print('done')\n", + "\n", + "# Import from local drive the csv file with the ratings\n", + "uploaded = files.upload()\n", + "file_name2 = next(iter(uploaded))\n", + "df = pd.read_csv(io.BytesIO(uploaded[file_name2]))\n", + "\n", + "assert df.shape[0] > 0, \"The csv has zero rows\"\n", + "assert set(df.columns) == set(sample_df.columns), f\"The csv must have the following columns: {sample_df.columns.values}\"\n", + "\n", + "df" + ], + "metadata": { + "id": "SEU44Mcy9mBU" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Results\n" + ], + "metadata": { + "id": "W5j9yAewRmNO" + } + }, + { + "cell_type": "code", + "source": [ + "# Rating distribution\n", + "#df[\"rating\"].describe()\n", + "\n", + "# Histogram\n", + "ratings_set = [-1, 0, 1, 2, 3]\n", + "ratings_values = df['rating'].values\n", + "ratings_count = len(ratings_values)\n", + "\n", + "bar_centers = np.linspace(min(ratings_set), max(ratings_set), len(ratings_set))\n", + "bar_edges = np.linspace(min(ratings_set)-0.5, max(ratings_set)+0.5, len(ratings_set)+1)\n", + "bar_heights, _ = np.histogram(ratings_values, bins=bar_edges, density=True)\n", + "\n", + "for center, _h in zip(bar_centers, bar_heights):\n", + " print(f\"{center}: count={round(_h*ratings_count):.0f}, percentage={_h*100:.2f}%\")\n", + "\n", + "# Plot\n", + "height_sum = 100 # for percentage, use 100\n", + "fig, axs = plt.subplots(1, 1, figsize=(6, 4), tight_layout=True)\n", + "\n", + "plt.bar(bar_centers, height_sum*bar_heights, width=0.8)\n", + "ratings_mean = np.mean(ratings_values)\n", + "plt.plot([ratings_mean, ratings_mean], [0, height_sum], '--', label=f\"mean={ratings_mean:.2f}\", color='red')\n", + "ratings_median = np.median(ratings_values)\n", + "plt.plot([ratings_median, ratings_median], [0, height_sum], '--', label=f\"median={ratings_median:.2f}\", color='green')\n", + "\n", + "plt.axis((min(bar_edges), max(bar_edges), 0, round(1.2*max(height_sum*bar_heights), 1)))\n", + "plt.legend(loc='upper left')\n", + "plt.gca().grid(axis='y')\n", + "plt.xlabel('Rating')\n", + "plt.ylabel('Percentage [%]')\n", + "plt.title(f\"Rating distribution (count={ratings_count})\")\n", + "\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "fig.savefig('ratings_distribution.png', dpi=fig.dpi)\n", + "\n" + ], + "metadata": { + "id": "I5209MB7VS1q" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "rYwsIZ0Ej-v9" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file From 743875d147a214a08f247a1fd72875c93967f042 Mon Sep 17 00:00:00 2001 From: ADiegoCAlonso Date: Tue, 17 Oct 2023 23:52:58 +0200 Subject: [PATCH 145/151] Move notebook to its own folder --- ...rtex_ai_conversation_evaluation_tool.ipynb | 335 ------------------ 1 file changed, 335 deletions(-) delete mode 100644 examples/vertex_ai_conversation_evaluation_tool.ipynb diff --git a/examples/vertex_ai_conversation_evaluation_tool.ipynb b/examples/vertex_ai_conversation_evaluation_tool.ipynb deleted file mode 100644 index 909341d3..00000000 --- a/examples/vertex_ai_conversation_evaluation_tool.ipynb +++ /dev/null @@ -1,335 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# Vertex AI Conversation - Evaluation Tool\n", - "\n", - "This tool requieres user's input in several steps. Please run the cells one by one (Shift+Enter) to ensure all the steps are succesfully completed.\n", - "\n", - "## Instructions:\n", - "\n", - "1. **Set-up**\n", - " 1. First cell: install and import dependencies\n", - " 2. Second cell: authentication - it requieres following the steps in the pop-up window. Alternatively, it can be replaced by other [supported authentication method](https://github.com/GoogleCloudPlatform/dfcx-scrapi#authentication)\n", - " 3. Third cell: introduce values for project, location and agent in the right panel; then run the cell.\n", - " 4. Fourth cell: run examples to validate set-up is correct\n", - "2. **Generate Questions & Answer**\n", - " 1. First cell: save a sample csv file with correct format\n", - " 2. Second cell: upload csv file with the fields `user_query` and an `ideal_answer` for all examples\n", - " 3. Third cell: bulk generation of `agent_answer` that includes the text and link\n", - "3. **Rating**\n", - " 1. First cell: download csv and add the ratings offline\n", - " 2. Second cell: upload csv file with the ratings\n", - "4. **Results**\n", - " 1. First cell: visualize distribution of ratings\n", - "\n", - "This notebook calls `DetectIntent` using [dfcx-scrapi library](https://github.com/GoogleCloudPlatform/dfcx-scrapi) for Dialogflow CX.\n", - "\n", - "\n", - "## Rating guidance:\n", - "\n", - "For each sample (aka row), the rater should evaluate each answer (including ythe link) that was generated by the agent. The answer will be evaluated with a integer number (escalar) from -1 to 3 as following:\n", - "* **+3** : Perfect answer > fully addresses the question with correct information and polite tone\n", - "* **+2** : Good answer > may contain unnecessary info, may miss some info, or may not be perfectly articulated\n", - "* **+1** : Slightly good answer > some truth to the answer\n", - "* **0** : Neutral answer > no answer or answer contains irrelevant info\n", - "* **-1** : Hurtful answer > wrong or misleading info, or inappropriate tone\n", - "\n" - ], - "metadata": { - "id": "WpkyirmC-F33" - } - }, - { - "cell_type": "markdown", - "source": [ - "## Set-up\n" - ], - "metadata": { - "id": "Afvsuux0zaWZ" - } - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "PPJYRHN83bHg" - }, - "outputs": [], - "source": [ - "# Dependencies\n", - "!pip install dfcx-scrapi --quiet\n", - "\n", - "import io\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "from dfcx_scrapi.core.sessions import Sessions\n", - "from google.auth import default\n", - "from google.colab import auth\n", - "from google.colab import files\n", - "from google.protobuf.json_format import MessageToDict\n", - "\n", - "\n", - "def get_agent_answer(user_query):\n", - " s = Sessions()\n", - " session_id = s.build_session_id(agent_id)\n", - " res = MessageToDict(s.detect_intent(agent_id, session_id, user_query)._pb)\n", - "\n", - " answer_text = res['responseMessages'][0]['text']['text'][0]\n", - " answer_link = res['responseMessages'][1]['payload']['richContent'][0][0]['actionLink'] if len(res['responseMessages']) > 1 else ''\n", - "\n", - " return f\"{answer_text} ({answer_link})\"\n" - ] - }, - { - "cell_type": "code", - "source": [ - "# Authentication\n", - "\n", - "auth.authenticate_user()\n", - "creds, _ = default()\n" - ], - "metadata": { - "id": "sztyBjNlIGAw" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# Agent config\n", - "project_id = '' #@param{type: 'string'}\n", - "location = 'global' #@param{type: 'string'}\n", - "agent_id = '' #@param{type: 'string'}\n", - "\n", - "agent_id = f\"projects/{project_id}/locations/{location}/agents/{agent_id}\"\n", - "print(agent_id)\n" - ], - "metadata": { - "id": "mRUB0Uf-3uzS" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# Test\n", - "user_query = 'Hello World!'\n", - "agent_answer = get_agent_answer(user_query)\n", - "print(f\" Q: {user_query}\\n A: {agent_answer}\")\n", - "\n", - "user_query = 'Which is the cheapest plan?'\n", - "agent_answer = get_agent_answer(user_query)\n", - "print(f\" Q: {user_query}\\n A: {agent_answer}\")\n" - ], - "metadata": { - "id": "OChJbblt3dt7" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "## Generate Questions & Answer" - ], - "metadata": { - "id": "L2WQime-8-Dw" - } - }, - { - "cell_type": "code", - "source": [ - "# Create sample csv\n", - "\n", - "sample_df = pd.DataFrame({\n", - " \"user_query\": [],\n", - " \"ideal_answer\": [],\n", - " \"agent_answer\": [],\n", - " \"rating\": [],\n", - " \"comment\": []\n", - "})\n", - "\n", - "sample_df.loc[0] = [\"Who are you?\", \"I am an assistant\", \"\", 0, \"\"]\n", - "sample_df.loc[1] = [\"Which is the cheapest plan?\", \"Basic plan\", \"\", 0, \"\"]\n", - "sample_df.loc[2] = [\"My device is not working\", \"Call 888-555\", \"\", 0, \"\"]\n", - "\n", - "# Export to local drive as csv file\n", - "file_name = 'data_sample.csv'\n", - "sample_df.to_csv(file_name, encoding='utf-8-sig', index=False)\n", - "files.download(file_name)\n" - ], - "metadata": { - "id": "q3II66B04F0j" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "input(f\"In your local drive, you can find the csv file '{file_name}' Add the user_query and ideal_answer per example \\nWhen done, click 'Enter'\")\n", - "print('done')\n", - "\n", - "# Import from local drive the csv file with the user_query and ideal_answer per examples\n", - "uploaded = files.upload()\n", - "file_name2 = next(iter(uploaded))\n", - "df = pd.read_csv(io.BytesIO(uploaded[file_name2]))\n", - "\n", - "assert df.shape[0] > 0, \"The csv has zero rows\"\n", - "assert set(df.columns) == set(sample_df.columns), f\"The csv must have the following columns: {sample_df.columns.values}\"\n", - "\n", - "df" - ], - "metadata": { - "id": "OYr4Dy77KbfL" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# Generate answers for each query\n", - "df['agent_answer'] = df.apply(lambda row: get_agent_answer(row[\"user_query\"]), axis=1)\n", - "\n", - "df" - ], - "metadata": { - "id": "RmJcxpFI881j" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Rating" - ], - "metadata": { - "id": "yO2x7lc2BRDR" - } - }, - { - "cell_type": "code", - "source": [ - "# Export to local drive as csv file\n", - "file_name = 'output.csv'\n", - "df.to_csv(file_name, encoding='utf-8-sig', index=False)\n", - "files.download(file_name)\n" - ], - "metadata": { - "id": "ZfAMlQbS8qsy" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "input(f\"In your local drive, you can find the csv file '{file_name}' Rate each agent_answer using ideal_answer as reference. Rating from -1 to 3. \\nWhen done, click 'Enter'\")\n", - "print('done')\n", - "\n", - "# Import from local drive the csv file with the ratings\n", - "uploaded = files.upload()\n", - "file_name2 = next(iter(uploaded))\n", - "df = pd.read_csv(io.BytesIO(uploaded[file_name2]))\n", - "\n", - "assert df.shape[0] > 0, \"The csv has zero rows\"\n", - "assert set(df.columns) == set(sample_df.columns), f\"The csv must have the following columns: {sample_df.columns.values}\"\n", - "\n", - "df" - ], - "metadata": { - "id": "SEU44Mcy9mBU" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Results\n" - ], - "metadata": { - "id": "W5j9yAewRmNO" - } - }, - { - "cell_type": "code", - "source": [ - "# Rating distribution\n", - "#df[\"rating\"].describe()\n", - "\n", - "# Histogram\n", - "ratings_set = [-1, 0, 1, 2, 3]\n", - "ratings_values = df['rating'].values\n", - "ratings_count = len(ratings_values)\n", - "\n", - "bar_centers = np.linspace(min(ratings_set), max(ratings_set), len(ratings_set))\n", - "bar_edges = np.linspace(min(ratings_set)-0.5, max(ratings_set)+0.5, len(ratings_set)+1)\n", - "bar_heights, _ = np.histogram(ratings_values, bins=bar_edges, density=True)\n", - "\n", - "for center, _h in zip(bar_centers, bar_heights):\n", - " print(f\"{center}: count={round(_h*ratings_count):.0f}, percentage={_h*100:.2f}%\")\n", - "\n", - "# Plot\n", - "height_sum = 100 # for percentage, use 100\n", - "fig, axs = plt.subplots(1, 1, figsize=(6, 4), tight_layout=True)\n", - "\n", - "plt.bar(bar_centers, height_sum*bar_heights, width=0.8)\n", - "ratings_mean = np.mean(ratings_values)\n", - "plt.plot([ratings_mean, ratings_mean], [0, height_sum], '--', label=f\"mean={ratings_mean:.2f}\", color='red')\n", - "ratings_median = np.median(ratings_values)\n", - "plt.plot([ratings_median, ratings_median], [0, height_sum], '--', label=f\"median={ratings_median:.2f}\", color='green')\n", - "\n", - "plt.axis((min(bar_edges), max(bar_edges), 0, round(1.2*max(height_sum*bar_heights), 1)))\n", - "plt.legend(loc='upper left')\n", - "plt.gca().grid(axis='y')\n", - "plt.xlabel('Rating')\n", - "plt.ylabel('Percentage [%]')\n", - "plt.title(f\"Rating distribution (count={ratings_count})\")\n", - "\n", - "plt.tight_layout()\n", - "plt.show()\n", - "\n", - "fig.savefig('ratings_distribution.png', dpi=fig.dpi)\n", - "\n" - ], - "metadata": { - "id": "I5209MB7VS1q" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [], - "metadata": { - "id": "rYwsIZ0Ej-v9" - }, - "execution_count": null, - "outputs": [] - } - ] -} \ No newline at end of file From a9809df8992ecbaabd299810cf2adbdc73269eb4 Mon Sep 17 00:00:00 2001 From: ADiegoCAlonso Date: Wed, 18 Oct 2023 07:17:33 +0200 Subject: [PATCH 146/151] Add notebook without colab dependencies --- ...valuation_tool__numeric_score__colab.ipynb | 334 ++++++++++++++++ ...luation_tool__numeric_score__nocolab.ipynb | 369 ++++++++++++++++++ 2 files changed, 703 insertions(+) create mode 100644 examples/vertex_ai_conversation/evaluation_tool__numeric_score__colab.ipynb create mode 100644 examples/vertex_ai_conversation/evaluation_tool__numeric_score__nocolab.ipynb diff --git a/examples/vertex_ai_conversation/evaluation_tool__numeric_score__colab.ipynb b/examples/vertex_ai_conversation/evaluation_tool__numeric_score__colab.ipynb new file mode 100644 index 00000000..8700ff78 --- /dev/null +++ b/examples/vertex_ai_conversation/evaluation_tool__numeric_score__colab.ipynb @@ -0,0 +1,334 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Vertex AI Conversation - Evaluation Tool\n", + "\n", + "This tool requieres user's input in several steps. Please run the cells one by one (Shift+Enter) to ensure all the steps are succesfully completed.\n", + "\n", + "## Instructions:\n", + "\n", + "1. **Set-up**\n", + " 1. First cell: install and import dependencies\n", + " 2. Second cell: authentication - it requieres following the steps in the pop-up window. Alternatively, it can be replaced by other [supported authentication method](https://github.com/GoogleCloudPlatform/dfcx-scrapi#authentication)\n", + " 3. Third cell: introduce values for project, location and agent in the right panel; then run the cell.\n", + " 4. Fourth cell: run examples to validate set-up is correct\n", + "2. **Generate Questions & Answer**\n", + " 1. First cell: save a sample csv file with correct format\n", + " 2. Second cell: upload csv file with the fields `user_query` and an `ideal_answer` for all examples\n", + " 3. Third cell: bulk generation of `agent_answer` that includes the text and link\n", + "3. **Rating**\n", + " 1. First cell: download csv and add the ratings offline\n", + " 2. Second cell: upload csv file with the ratings\n", + "4. **Results**\n", + " 1. First cell: visualize distribution of ratings\n", + "\n", + "This notebook calls `DetectIntent` using [dfcx-scrapi library](https://github.com/GoogleCloudPlatform/dfcx-scrapi) for Dialogflow CX.\n", + "\n", + "\n", + "## Rating guidance:\n", + "\n", + "For each sample (aka row), the rater should evaluate each answer (including ythe link) that was generated by the agent. The answer will be evaluated with a integer number (escalar) from -1 to 3 as following:\n", + "* **+3** : Perfect answer > fully addresses the question with correct information and polite tone\n", + "* **+2** : Good answer > may contain unnecessary info, may miss some info, or may not be perfectly articulated\n", + "* **+1** : Slightly good answer > some truth to the answer\n", + "* **0** : Neutral answer > no answer or answer contains irrelevant info\n", + "* **-1** : Hurtful answer > wrong or misleading info, or inappropriate tone\n", + "\n" + ], + "metadata": { + "id": "WpkyirmC-F33" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Set-up\n" + ], + "metadata": { + "id": "Afvsuux0zaWZ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PPJYRHN83bHg" + }, + "outputs": [], + "source": [ + "# Dependencies\n", + "!pip install dfcx-scrapi --quiet\n", + "\n", + "import io\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from dfcx_scrapi.core.sessions import Sessions\n", + "from google.auth import default\n", + "from google.colab import auth\n", + "from google.colab import files\n", + "from google.protobuf.json_format import MessageToDict\n", + "\n", + "\n", + "def get_agent_answer(user_query):\n", + " s = Sessions()\n", + " session_id = s.build_session_id(agent_id)\n", + " res = MessageToDict(s.detect_intent(agent_id, session_id, user_query)._pb)\n", + "\n", + " answer_text = res['responseMessages'][0]['text']['text'][0]\n", + " answer_link = res['responseMessages'][1]['payload']['richContent'][0][0]['actionLink'] if len(res['responseMessages']) > 1 else ''\n", + "\n", + " return f\"{answer_text} ({answer_link})\"\n" + ] + }, + { + "cell_type": "code", + "source": [ + "# Authentication\n", + "\n", + "auth.authenticate_user()\n", + "creds, _ = default()\n" + ], + "metadata": { + "id": "sztyBjNlIGAw" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Agent config\n", + "project_id = '' #@param{type: 'string'}\n", + "location = 'global' #@param{type: 'string'}\n", + "agent_id = '' #@param{type: 'string'}\n", + "\n", + "agent_id = f\"projects/{project_id}/locations/{location}/agents/{agent_id}\"\n", + "print(agent_id)\n" + ], + "metadata": { + "id": "mRUB0Uf-3uzS" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Test\n", + "user_query = 'Hello World!'\n", + "agent_answer = get_agent_answer(user_query)\n", + "print(f\" Q: {user_query}\\n A: {agent_answer}\")\n", + "\n", + "user_query = 'Which is the cheapest plan?'\n", + "agent_answer = get_agent_answer(user_query)\n", + "print(f\" Q: {user_query}\\n A: {agent_answer}\")\n" + ], + "metadata": { + "id": "OChJbblt3dt7" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Generate Questions & Answer" + ], + "metadata": { + "id": "L2WQime-8-Dw" + } + }, + { + "cell_type": "code", + "source": [ + "# Create sample csv\n", + "\n", + "sample_df = pd.DataFrame({\n", + " \"user_query\": [],\n", + " \"ideal_answer\": [],\n", + " \"agent_answer\": [],\n", + " \"rating\": [],\n", + " \"comment\": []\n", + "})\n", + "\n", + "sample_df.loc[0] = [\"Who are you?\", \"I am an assistant\", \"\", 0, \"\"]\n", + "sample_df.loc[1] = [\"Which is the cheapest plan?\", \"Basic plan\", \"\", 0, \"\"]\n", + "sample_df.loc[2] = [\"My device is not working\", \"Call 888-555\", \"\", 0, \"\"]\n", + "\n", + "# Export to local drive as csv file\n", + "file_name = 'data_sample.csv'\n", + "sample_df.to_csv(file_name, encoding='utf-8-sig', index=False)\n", + "files.download(file_name)\n" + ], + "metadata": { + "id": "q3II66B04F0j" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "input(f\"In your local drive, you can find the csv file '{file_name}' Add the user_query and ideal_answer per example \\nWhen done, click 'Enter'\")\n", + "print('done')\n", + "\n", + "# Import from local drive the csv file with the user_query and ideal_answer per examples\n", + "uploaded = files.upload()\n", + "file_name2 = next(iter(uploaded))\n", + "df = pd.read_csv(io.BytesIO(uploaded[file_name2]))\n", + "\n", + "assert df.shape[0] > 0, \"The csv has zero rows\"\n", + "assert set(df.columns) == set(sample_df.columns), f\"The csv must have the following columns: {sample_df.columns.values}\"\n", + "\n", + "df" + ], + "metadata": { + "id": "OYr4Dy77KbfL" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Generate answers for each query\n", + "df['agent_answer'] = df.apply(lambda row: get_agent_answer(row[\"user_query\"]), axis=1)\n", + "\n", + "df" + ], + "metadata": { + "id": "RmJcxpFI881j" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Rating" + ], + "metadata": { + "id": "yO2x7lc2BRDR" + } + }, + { + "cell_type": "code", + "source": [ + "# Export to local drive as csv file\n", + "file_name = 'output.csv'\n", + "df.to_csv(file_name, encoding='utf-8-sig', index=False)\n", + "files.download(file_name)\n" + ], + "metadata": { + "id": "ZfAMlQbS8qsy" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "input(f\"In your local drive, you can find the csv file '{file_name}' Rate each agent_answer using ideal_answer as reference. Rating from -1 to 3. \\nWhen done, click 'Enter'\")\n", + "print('done')\n", + "\n", + "# Import from local drive the csv file with the ratings\n", + "uploaded = files.upload()\n", + "file_name2 = next(iter(uploaded))\n", + "df = pd.read_csv(io.BytesIO(uploaded[file_name2]))\n", + "\n", + "assert df.shape[0] > 0, \"The csv has zero rows\"\n", + "assert set(df.columns) == set(sample_df.columns), f\"The csv must have the following columns: {sample_df.columns.values}\"\n", + "\n", + "df" + ], + "metadata": { + "id": "SEU44Mcy9mBU" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Results\n" + ], + "metadata": { + "id": "W5j9yAewRmNO" + } + }, + { + "cell_type": "code", + "source": [ + "# Rating distribution\n", + "#df[\"rating\"].describe()\n", + "\n", + "# Histogram\n", + "ratings_set = [-1, 0, 1, 2, 3]\n", + "ratings_values = df['rating'].values\n", + "ratings_count = len(ratings_values)\n", + "\n", + "bar_centers = np.linspace(min(ratings_set), max(ratings_set), len(ratings_set))\n", + "bar_edges = np.linspace(min(ratings_set)-0.5, max(ratings_set)+0.5, len(ratings_set)+1)\n", + "bar_heights, _ = np.histogram(ratings_values, bins=bar_edges, density=True)\n", + "\n", + "for center, _h in zip(bar_centers, bar_heights):\n", + " print(f\"{center}: count={round(_h*ratings_count):.0f}, percentage={_h*100:.2f}%\")\n", + "\n", + "# Plot\n", + "height_sum = 100 # for percentage, use 100\n", + "fig, axs = plt.subplots(1, 1, figsize=(6, 4), tight_layout=True)\n", + "\n", + "plt.bar(bar_centers, height_sum*bar_heights, width=0.8)\n", + "ratings_mean = np.mean(ratings_values)\n", + "plt.plot([ratings_mean, ratings_mean], [0, height_sum], '--', label=f\"mean={ratings_mean:.2f}\", color='red')\n", + "ratings_median = np.median(ratings_values)\n", + "plt.plot([ratings_median, ratings_median], [0, height_sum], '--', label=f\"median={ratings_median:.2f}\", color='green')\n", + "\n", + "plt.axis((min(bar_edges), max(bar_edges), 0, round(1.2*max(height_sum*bar_heights), 1)))\n", + "plt.legend(loc='upper left')\n", + "plt.gca().grid(axis='y')\n", + "plt.xlabel('Rating')\n", + "plt.ylabel('Percentage [%]')\n", + "plt.title(f\"Rating distribution (count={ratings_count})\")\n", + "\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "fig.savefig('ratings_distribution.png', dpi=fig.dpi)\n", + "\n" + ], + "metadata": { + "id": "I5209MB7VS1q" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "rYwsIZ0Ej-v9" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/examples/vertex_ai_conversation/evaluation_tool__numeric_score__nocolab.ipynb b/examples/vertex_ai_conversation/evaluation_tool__numeric_score__nocolab.ipynb new file mode 100644 index 00000000..32af6763 --- /dev/null +++ b/examples/vertex_ai_conversation/evaluation_tool__numeric_score__nocolab.ipynb @@ -0,0 +1,369 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WpkyirmC-F33" + }, + "source": [ + "# Infobot Eval\n", + "\n", + "This tool requieres user's input in several steps. Please run the cells one by one (Shift+Enter) to ensure all the steps are succesfully completed.\n", + "\n", + "## Instructions:\n", + "\n", + "1. **Set-up**\n", + " 1. First cell: install and import dependencies\n", + " 2. Second cell: authentication - it requieres following the steps in the pop-up window. Alternatively, it can be replaced by other [supported authentication method](https://github.com/GoogleCloudPlatform/dfcx-scrapi#authentication)\n", + " 3. Third cell: introduce values for project, location and agent in the right panel; then run the cell.\n", + " 4. Fourth cell: run examples to validate set-up is correct\n", + "2. **Generate Questions & Answer**\n", + " 1. First cell: save a sample csv file with correct format\n", + " 2. Second cell: upload csv file with the fields `user_query` and an `ideal_answer` for all examples\n", + " 3. Third cell: bulk generation of `agent_answer` that includes the text and link\n", + "3. **Rating**\n", + " 1. First cell: download csv and add the ratings offline\n", + " 2. Second cell: upload csv file with the ratings\n", + "4. **Results**\n", + " 1. First cell: visualize distribution of ratings\n", + "\n", + "This notebook calls `DetectIntent` using [dfcx-scrapi library](https://github.com/GoogleCloudPlatform/dfcx-scrapi) for Dialogflow CX.\n", + "\n", + "\n", + "## Rating guidance:\n", + "\n", + "For each sample (aka row), the rater should evaluate each answer (including ythe link) that was generated by the agent. The answer will be evaluated with a integer number (escalar) from -1 to 3 as following:\n", + "* **+3** : Perfect answer > fully addresses the question with correct information and polite tone\n", + "* **+2** : Good answer > may contain unnecessary info, may miss some info, or may not be perfectly articulated\n", + "* **+1** : Slightly good answer > some truth to the answer\n", + "* **0** : Neutral answer > no answer or answer contains irrelevant info\n", + "* **-1** : Hurtful answer > wrong or misleading info, or inappropriate tone\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Afvsuux0zaWZ" + }, + "source": [ + "## Set-up\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PPJYRHN83bHg" + }, + "outputs": [], + "source": [ + "# Dependencies\n", + "!pip install dfcx-scrapi --quiet\n", + "\n", + "import io\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from dfcx_scrapi.core.sessions import Sessions\n", + "from google.auth import default\n", + "from google.protobuf.json_format import MessageToDict\n", + "\n", + "\n", + "def get_agent_answer(user_query):\n", + " s = Sessions()\n", + " session_id = s.build_session_id(agent_id)\n", + " res = MessageToDict(s.detect_intent(agent_id, session_id, user_query)._pb)\n", + "\n", + " answer_text = res['responseMessages'][0]['text']['text'][0]\n", + " answer_link = res['responseMessages'][1]['payload']['richContent'][0][0]['actionLink'] if len(res['responseMessages']) > 1 else ''\n", + "\n", + " return f\"{answer_text} ({answer_link})\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ldbOTqPVnXRj" + }, + "source": [ + "**ATTENTION: MANUAL STEP**\n", + "\n", + "Instruction: Run the following commands one by one in the Terminal in order to authenticate the notebook\n", + "```\n", + "gcloud auth login\n", + "gcloud auth application-default login\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OegHGMr_nXRj" + }, + "source": [ + "**ATTENTION: MANUAL STEP**\n", + "\n", + "Instruction: In the next cell, edit the values of the Agent config, then run the cell\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mRUB0Uf-3uzS" + }, + "outputs": [], + "source": [ + "\n", + "# Agent config\n", + "project_id = '' #@param{type: 'string'}\n", + "location = 'global' #@param{type: 'string'}\n", + "agent_id = '' #@param{type: 'string'}\n", + "\n", + "agent_id = f\"projects/{project_id}/locations/{location}/agents/{agent_id}\"\n", + "print(agent_id)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OChJbblt3dt7" + }, + "outputs": [], + "source": [ + "# Test\n", + "user_query = 'Hello World!'\n", + "agent_answer = get_agent_answer(user_query)\n", + "print(f\" Q: {user_query}\\n A: {agent_answer}\")\n", + "\n", + "user_query = 'Which is the cheapest plan?'\n", + "agent_answer = get_agent_answer(user_query)\n", + "print(f\" Q: {user_query}\\n A: {agent_answer}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L2WQime-8-Dw" + }, + "source": [ + "## Generate Questions & Answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "q3II66B04F0j" + }, + "outputs": [], + "source": [ + "# Create sample csv\n", + "\n", + "sample_df = pd.DataFrame({\n", + " \"user_query\": [],\n", + " \"ideal_answer\": [],\n", + " \"agent_answer\": [],\n", + " \"rating\": [],\n", + " \"comment\": []\n", + "})\n", + "\n", + "sample_df.loc[0] = [\"Who are you?\", \"I am an assistant\", \"\", 0, \"\"]\n", + "sample_df.loc[1] = [\"Which is the cheapest plan?\", \"Basic plan\", \"\", 0, \"\"]\n", + "sample_df.loc[2] = [\"My device is not working\", \"Call 888-555\", \"\", 0, \"\"]\n", + "\n", + "# Export to local drive as csv file\n", + "file_name = 'data_sample.csv'\n", + "sample_df.to_csv(file_name, encoding='utf-8-sig', index=False)\n", + "\n", + "df\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4n-dGQLonXRm" + }, + "source": [ + "**ATTENTION: MANUAL STEP**\n", + "\n", + "Instructions:\n", + "\n", + "1. Download the file `data_sample.csv` to your local drive by right-clicking in the file\n", + "2. Open the csv file `data_sample.csv` and add the `user_query` and `ideal_answer` per example\n", + "3. Upload the updated file from your local drive to the Jupyter File system by clicking 'Upload File'\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OYr4Dy77KbfL" + }, + "outputs": [], + "source": [ + "\n", + "file_name2 = file_name\n", + "df = pd.read_csv(file_name2)\n", + "\n", + "assert df.shape[0] > 0, \"The csv has zero rows\"\n", + "assert set(df.columns) == set(sample_df.columns), f\"The csv must have the following columns: {sample_df.columns.values}\"\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RmJcxpFI881j" + }, + "outputs": [], + "source": [ + "# Generate answers for each query\n", + "df['agent_answer'] = df.apply(lambda row: get_agent_answer(row[\"user_query\"]), axis=1)\n", + "\n", + "# Export to local drive as csv file\n", + "file_name3 = file_name2\n", + "df.to_csv(file_name3, encoding='utf-8-sig', index=False)\n", + "\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yO2x7lc2BRDR" + }, + "source": [ + "# Rating" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uB0txK4QnXRn" + }, + "source": [ + "**ATTENTION: MANUAL STEP**\n", + "\n", + "Instructions:\n", + "\n", + "1. Download the file `data_sample.csv` to your local drive by right-clicking in the file\n", + "2. Open the csv file `data_sample.csv` and add the `rating` and `comment` (optionally) per example\n", + "3. Upload the updated file from your local drive to the Jupyter File system by clicking 'Upload File'\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SEU44Mcy9mBU" + }, + "outputs": [], + "source": [ + "\n", + "df = pd.read_csv(file_name3)\n", + "\n", + "assert df.shape[0] > 0, \"The csv has zero rows\"\n", + "assert set(df.columns) == set(sample_df.columns), f\"The csv must have the following columns: {sample_df.columns.values}\"\n", + "\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W5j9yAewRmNO" + }, + "source": [ + "# Results\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "I5209MB7VS1q" + }, + "outputs": [], + "source": [ + "# Rating distribution\n", + "#df[\"rating\"].describe()\n", + "\n", + "# Histogram\n", + "ratings_set = [-1, 0, 1, 2, 3]\n", + "ratings_values = df['rating'].values\n", + "ratings_count = len(ratings_values)\n", + "\n", + "bar_centers = np.linspace(min(ratings_set), max(ratings_set), len(ratings_set))\n", + "bar_edges = np.linspace(min(ratings_set)-0.5, max(ratings_set)+0.5, len(ratings_set)+1)\n", + "bar_heights, _ = np.histogram(ratings_values, bins=bar_edges, density=True)\n", + "\n", + "for center, _h in zip(bar_centers, bar_heights):\n", + " print(f\"{center}: count={round(_h*ratings_count):.0f}, percentage={_h*100:.2f}%\")\n", + "\n", + "# Plot\n", + "height_sum = 100 # for percentage, use 100\n", + "fig, axs = plt.subplots(1, 1, figsize=(6, 4), tight_layout=True)\n", + "\n", + "plt.bar(bar_centers, height_sum*bar_heights, width=0.8)\n", + "ratings_mean = np.mean(ratings_values)\n", + "plt.plot([ratings_mean, ratings_mean], [0, height_sum], '--', label=f\"mean={ratings_mean:.2f}\", color='red')\n", + "ratings_median = np.median(ratings_values)\n", + "plt.plot([ratings_median, ratings_median], [0, height_sum], '--', label=f\"median={ratings_median:.2f}\", color='green')\n", + "\n", + "plt.axis((min(bar_edges), max(bar_edges), 0, round(1.2*max(height_sum*bar_heights), 1)))\n", + "plt.legend(loc='upper left')\n", + "plt.gca().grid(axis='y')\n", + "plt.xlabel('Rating')\n", + "plt.ylabel('Percentage [%]')\n", + "plt.title(f\"Rating distribution (count={ratings_count})\")\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rYwsIZ0Ej-v9" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [], + "toc_visible": true + }, + "environment": { + "kernel": "python3", + "name": "tf2-cpu.2-11.m112", + "type": "gcloud", + "uri": "gcr.io/deeplearning-platform-release/tf2-cpu.2-11:m112" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From 6680c14f425c92ce137ae512054c0b8123d5a855 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Wed, 18 Oct 2023 09:59:01 -0500 Subject: [PATCH 147/151] fix: Move get_agent_answer to Sessions class --- src/dfcx_scrapi/core/sessions.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/dfcx_scrapi/core/sessions.py b/src/dfcx_scrapi/core/sessions.py index c6e0a665..12bab554 100644 --- a/src/dfcx_scrapi/core/sessions.py +++ b/src/dfcx_scrapi/core/sessions.py @@ -19,6 +19,7 @@ from typing import Dict, List from google.cloud.dialogflowcx_v3beta1 import services from google.cloud.dialogflowcx_v3beta1 import types +from google.protobuf.json_format import MessageToDict from dfcx_scrapi.core import scrapi_base @@ -285,3 +286,17 @@ def preset_parameters( response = session_client.detect_intent(request=request) return response + + def get_agent_answer(self, user_query: str) -> str: + """Extract the answer/citation from a Vertex Conversation response.""" + + session_id = self.build_session_id(self.agent_id) + res = MessageToDict(self.detect_intent( # pylint: disable=W0212 + self.agent_id, session_id, user_query)._pb) + + answer_text = res["responseMessages"][0]["text"]["text"][0] + answer_link = res["responseMessages"][1]["payload"][ + "richContent"][0][0]["actionLink"] if len( + res["responseMessages"]) > 1 else "" + + return f"{answer_text} ({answer_link})" From 2e177ee628a21e164a29190d2ca6df8abff53496 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Wed, 18 Oct 2023 09:59:50 -0500 Subject: [PATCH 148/151] fix: remove unused code; updates for Sessions --- ...luation_tool__numeric_score__nocolab.ipynb | 36 ++++++------------- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/examples/vertex_ai_conversation/evaluation_tool__numeric_score__nocolab.ipynb b/examples/vertex_ai_conversation/evaluation_tool__numeric_score__nocolab.ipynb index 32af6763..30507310 100644 --- a/examples/vertex_ai_conversation/evaluation_tool__numeric_score__nocolab.ipynb +++ b/examples/vertex_ai_conversation/evaluation_tool__numeric_score__nocolab.ipynb @@ -66,20 +66,7 @@ "import numpy as np\n", "import pandas as pd\n", "\n", - "from dfcx_scrapi.core.sessions import Sessions\n", - "from google.auth import default\n", - "from google.protobuf.json_format import MessageToDict\n", - "\n", - "\n", - "def get_agent_answer(user_query):\n", - " s = Sessions()\n", - " session_id = s.build_session_id(agent_id)\n", - " res = MessageToDict(s.detect_intent(agent_id, session_id, user_query)._pb)\n", - "\n", - " answer_text = res['responseMessages'][0]['text']['text'][0]\n", - " answer_link = res['responseMessages'][1]['payload']['richContent'][0][0]['actionLink'] if len(res['responseMessages']) > 1 else ''\n", - "\n", - " return f\"{answer_text} ({answer_link})\"\n" + "from dfcx_scrapi.core.sessions import Sessions" ] }, { @@ -116,14 +103,15 @@ }, "outputs": [], "source": [ - "\n", "# Agent config\n", "project_id = '' #@param{type: 'string'}\n", "location = 'global' #@param{type: 'string'}\n", "agent_id = '' #@param{type: 'string'}\n", "\n", "agent_id = f\"projects/{project_id}/locations/{location}/agents/{agent_id}\"\n", - "print(agent_id)\n" + "print(agent_id)\n", + "\n", + "s = Sessions(agent_id=agent_id)" ] }, { @@ -136,12 +124,12 @@ "source": [ "# Test\n", "user_query = 'Hello World!'\n", - "agent_answer = get_agent_answer(user_query)\n", + "agent_answer = s.get_agent_answer(user_query)\n", "print(f\" Q: {user_query}\\n A: {agent_answer}\")\n", "\n", "user_query = 'Which is the cheapest plan?'\n", - "agent_answer = get_agent_answer(user_query)\n", - "print(f\" Q: {user_query}\\n A: {agent_answer}\")\n" + "agent_answer = s.get_agent_answer(user_query)\n", + "print(f\" Q: {user_query}\\n A: {agent_answer}\")" ] }, { @@ -177,9 +165,7 @@ "\n", "# Export to local drive as csv file\n", "file_name = 'data_sample.csv'\n", - "sample_df.to_csv(file_name, encoding='utf-8-sig', index=False)\n", - "\n", - "df\n" + "sample_df.to_csv(file_name, encoding='utf-8-sig', index=False)" ] }, { @@ -224,7 +210,7 @@ "outputs": [], "source": [ "# Generate answers for each query\n", - "df['agent_answer'] = df.apply(lambda row: get_agent_answer(row[\"user_query\"]), axis=1)\n", + "df['agent_answer'] = df.apply(lambda row: s.get_agent_answer(row[\"user_query\"]), axis=1)\n", "\n", "# Export to local drive as csv file\n", "file_name3 = file_name2\n", @@ -361,9 +347,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.8.12" } }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} From a277f556b91566df42905d56ac2e309bcba9ad43 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Wed, 18 Oct 2023 10:01:27 -0500 Subject: [PATCH 149/151] fix: remove unused code; add Sessions updates --- ...valuation_tool__numeric_score__colab.ipynb | 219 +++++++++--------- 1 file changed, 109 insertions(+), 110 deletions(-) diff --git a/examples/vertex_ai_conversation/evaluation_tool__numeric_score__colab.ipynb b/examples/vertex_ai_conversation/evaluation_tool__numeric_score__colab.ipynb index 8700ff78..21eb3d10 100644 --- a/examples/vertex_ai_conversation/evaluation_tool__numeric_score__colab.ipynb +++ b/examples/vertex_ai_conversation/evaluation_tool__numeric_score__colab.ipynb @@ -1,21 +1,10 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, "cells": [ { "cell_type": "markdown", + "metadata": { + "id": "WpkyirmC-F33" + }, "source": [ "# Vertex AI Conversation - Evaluation Tool\n", "\n", @@ -50,19 +39,16 @@ "* **0** : Neutral answer > no answer or answer contains irrelevant info\n", "* **-1** : Hurtful answer > wrong or misleading info, or inappropriate tone\n", "\n" - ], - "metadata": { - "id": "WpkyirmC-F33" - } + ] }, { "cell_type": "markdown", - "source": [ - "## Set-up\n" - ], "metadata": { "id": "Afvsuux0zaWZ" - } + }, + "source": [ + "## Set-up\n" + ] }, { "cell_type": "code", @@ -83,37 +69,30 @@ "from dfcx_scrapi.core.sessions import Sessions\n", "from google.auth import default\n", "from google.colab import auth\n", - "from google.colab import files\n", - "from google.protobuf.json_format import MessageToDict\n", - "\n", - "\n", - "def get_agent_answer(user_query):\n", - " s = Sessions()\n", - " session_id = s.build_session_id(agent_id)\n", - " res = MessageToDict(s.detect_intent(agent_id, session_id, user_query)._pb)\n", - "\n", - " answer_text = res['responseMessages'][0]['text']['text'][0]\n", - " answer_link = res['responseMessages'][1]['payload']['richContent'][0][0]['actionLink'] if len(res['responseMessages']) > 1 else ''\n", - "\n", - " return f\"{answer_text} ({answer_link})\"\n" + "from google.colab import files" ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sztyBjNlIGAw" + }, + "outputs": [], "source": [ "# Authentication\n", "\n", "auth.authenticate_user()\n", "creds, _ = default()\n" - ], - "metadata": { - "id": "sztyBjNlIGAw" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mRUB0Uf-3uzS" + }, + "outputs": [], "source": [ "# Agent config\n", "project_id = '' #@param{type: 'string'}\n", @@ -121,43 +100,45 @@ "agent_id = '' #@param{type: 'string'}\n", "\n", "agent_id = f\"projects/{project_id}/locations/{location}/agents/{agent_id}\"\n", - "print(agent_id)\n" - ], - "metadata": { - "id": "mRUB0Uf-3uzS" - }, - "execution_count": null, - "outputs": [] + "print(agent_id)\n", + "\n", + "s = Sessions(agent_id=agent_id)" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OChJbblt3dt7" + }, + "outputs": [], "source": [ "# Test\n", "user_query = 'Hello World!'\n", - "agent_answer = get_agent_answer(user_query)\n", + "agent_answer = s.get_agent_answer(user_query)\n", "print(f\" Q: {user_query}\\n A: {agent_answer}\")\n", "\n", "user_query = 'Which is the cheapest plan?'\n", - "agent_answer = get_agent_answer(user_query)\n", + "agent_answer = s.get_agent_answer(user_query)\n", "print(f\" Q: {user_query}\\n A: {agent_answer}\")\n" - ], - "metadata": { - "id": "OChJbblt3dt7" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "## Generate Questions & Answer" - ], "metadata": { "id": "L2WQime-8-Dw" - } + }, + "source": [ + "## Generate Questions & Answer" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "q3II66B04F0j" + }, + "outputs": [], "source": [ "# Create sample csv\n", "\n", @@ -177,15 +158,15 @@ "file_name = 'data_sample.csv'\n", "sample_df.to_csv(file_name, encoding='utf-8-sig', index=False)\n", "files.download(file_name)\n" - ], - "metadata": { - "id": "q3II66B04F0j" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OYr4Dy77KbfL" + }, + "outputs": [], "source": [ "input(f\"In your local drive, you can find the csv file '{file_name}' Add the user_query and ideal_answer per example \\nWhen done, click 'Enter'\")\n", "print('done')\n", @@ -199,52 +180,52 @@ "assert set(df.columns) == set(sample_df.columns), f\"The csv must have the following columns: {sample_df.columns.values}\"\n", "\n", "df" - ], - "metadata": { - "id": "OYr4Dy77KbfL" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RmJcxpFI881j" + }, + "outputs": [], "source": [ "# Generate answers for each query\n", - "df['agent_answer'] = df.apply(lambda row: get_agent_answer(row[\"user_query\"]), axis=1)\n", + "df['agent_answer'] = df.apply(lambda row: s.get_agent_answer(row[\"user_query\"]), axis=1)\n", "\n", "df" - ], - "metadata": { - "id": "RmJcxpFI881j" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "# Rating" - ], "metadata": { "id": "yO2x7lc2BRDR" - } + }, + "source": [ + "# Rating" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZfAMlQbS8qsy" + }, + "outputs": [], "source": [ "# Export to local drive as csv file\n", "file_name = 'output.csv'\n", "df.to_csv(file_name, encoding='utf-8-sig', index=False)\n", "files.download(file_name)\n" - ], - "metadata": { - "id": "ZfAMlQbS8qsy" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SEU44Mcy9mBU" + }, + "outputs": [], "source": [ "input(f\"In your local drive, you can find the csv file '{file_name}' Rate each agent_answer using ideal_answer as reference. Rating from -1 to 3. \\nWhen done, click 'Enter'\")\n", "print('done')\n", @@ -258,24 +239,24 @@ "assert set(df.columns) == set(sample_df.columns), f\"The csv must have the following columns: {sample_df.columns.values}\"\n", "\n", "df" - ], - "metadata": { - "id": "SEU44Mcy9mBU" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "# Results\n" - ], "metadata": { "id": "W5j9yAewRmNO" - } + }, + "source": [ + "# Results\n" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "I5209MB7VS1q" + }, + "outputs": [], "source": [ "# Rating distribution\n", "#df[\"rating\"].describe()\n", @@ -314,21 +295,39 @@ "\n", "fig.savefig('ratings_distribution.png', dpi=fig.dpi)\n", "\n" - ], - "metadata": { - "id": "I5209MB7VS1q" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", - "source": [], + "execution_count": null, "metadata": { "id": "rYwsIZ0Ej-v9" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 0 +} From 5bbc49d609e7d9a914fcc17a8769fdecce6b5759 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Wed, 18 Oct 2023 19:51:08 -0500 Subject: [PATCH 150/151] feat: add new match_type for LLM --- src/dfcx_scrapi/core/conversation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dfcx_scrapi/core/conversation.py b/src/dfcx_scrapi/core/conversation.py index 5e8a9da1..ce354e7b 100644 --- a/src/dfcx_scrapi/core/conversation.py +++ b/src/dfcx_scrapi/core/conversation.py @@ -132,7 +132,8 @@ def _get_match_type_from_map(match_type: int): 4: "NO_MATCH", 5: "NO_INPUT", 6: "EVENT", - 8: "KNOWLEDGE_CONNECTOR" + 8: "KNOWLEDGE_CONNECTOR", + 9: "LLM" } return match_type_map[match_type] From 37cf8cf7b2013a377740f68d8dcb7355632161e0 Mon Sep 17 00:00:00 2001 From: Patrick Marlow Date: Wed, 18 Oct 2023 20:01:33 -0500 Subject: [PATCH 151/151] feat: update to v1.9.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 22d9bc21..ae21a6a1 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ setup( name='dfcx-scrapi', - version='1.8.1', + version='1.9.0', description='A high level scripting API for bot builders, developers, and\ maintainers.', long_description=long_description,