Merge pull request #1183 from ansible/ttakamiy/prometheus-metrics-for…

…-wca-playbook-expgen Prometheus metrics for WCA playbook expgen APIs
ansible · Jul 8, 2024 · 4eb5c00 · 4eb5c00
2 parents dfdf6b4 + 64b977b
commit 4eb5c00
Show file tree

Hide file tree

Showing 2 changed files with 95 additions and 8 deletions.
diff --git a/ansible_ai_connect/ai/api/model_client/tests/test_wca_client.py b/ansible_ai_connect/ai/api/model_client/tests/test_wca_client.py
@@ -54,9 +54,13 @@
     ibm_cloud_identity_token_hist,
     ibm_cloud_identity_token_retry_counter,
     wca_codegen_hist,
+    wca_codegen_playbook_hist,
+    wca_codegen_playbook_retry_counter,
     wca_codegen_retry_counter,
     wca_codematch_hist,
     wca_codematch_retry_counter,
+    wca_explain_playbook_hist,
+    wca_explain_playbook_retry_counter,
 )
 from ansible_ai_connect.test_utils import (
     WisdomAppsBackendMocking,
@@ -265,6 +269,7 @@ def setUp(self):
         wca_client.session.post.return_value = response
         self.wca_client = wca_client
 
+    @assert_call_count_metrics(metric=wca_codegen_playbook_hist)
     def test_playbook_gen(self):
         request = Mock()
         playbook, outline = self.wca_client.generate_playbook(
@@ -273,23 +278,53 @@ def test_playbook_gen(self):
         self.assertEqual(playbook, "Oh!")
         self.assertEqual(outline, "Ahh!")
 
+    @assert_call_count_metrics(metric=wca_codegen_playbook_hist)
+    @assert_call_count_metrics(metric=wca_codegen_playbook_retry_counter)
+    def test_playbook_gen_error(self):
+        request = Mock()
+        model_client = WCAClient(inference_url="http://example.com/")
+        model_client.get_api_key = Mock(return_value="some-key")
+        model_client.get_token = Mock(return_value={"access_token": "a-token"})
+        model_client.get_model_id = Mock(return_value="a-random-model")
+        model_client.session = Mock()
+        model_client.session.post = Mock(side_effect=HTTPError(500))
+        with self.assertRaises(HTTPError):
+            model_client.generate_playbook(request, text="Install Wordpress", create_outline=True)
+
+    @assert_call_count_metrics(metric=wca_explain_playbook_hist)
     def test_playbook_exp(self):
         request = Mock()
         explanation = self.wca_client.explain_playbook(request, content="Some playbook")
         self.assertEqual(explanation, "!Óh¡")
 
+    @assert_call_count_metrics(metric=wca_explain_playbook_hist)
+    @assert_call_count_metrics(metric=wca_explain_playbook_retry_counter)
+    def test_playbook_exp_error(self):
+        request = Mock()
+        model_client = WCAClient(inference_url="http://example.com/")
+        model_client.get_api_key = Mock(return_value="some-key")
+        model_client.get_token = Mock(return_value={"access_token": "a-token"})
+        model_client.get_model_id = Mock(return_value="a-random-model")
+        model_client.session = Mock()
+        model_client.session.post = Mock(side_effect=HTTPError(500))
+        with self.assertRaises(HTTPError):
+            model_client.explain_playbook(request, content="Some playbook")
+
+    @assert_call_count_metrics(metric=wca_codegen_playbook_hist)
     def test_playbook_gen_no_org(self):
         request = Mock()
         request.user.organization = None
         self.wca_client.generate_playbook(request, text="Install Wordpress")
         self.wca_client.get_api_key.assert_called_with(None)
 
+    @assert_call_count_metrics(metric=wca_explain_playbook_hist)
     def test_playbook_exp_no_org(self):
         request = Mock()
         request.user.organization = None
         self.wca_client.explain_playbook(request, content="Some playbook")
         self.wca_client.get_api_key.assert_called_with(None)
 
+    @assert_call_count_metrics(metric=wca_codegen_playbook_hist)
     @override_settings(ENABLE_ANSIBLE_LINT_POSTPROCESS=True)
     def test_playbook_gen_with_lint(self):
         fake_linter = Mock()
@@ -301,6 +336,7 @@ def test_playbook_gen_with_lint(self):
         self.assertEqual(playbook, "I'm super fake!")
         self.assertEqual(outline, "Ahh!")
 
+    @assert_call_count_metrics(metric=wca_codegen_playbook_hist)
     @override_settings(ENABLE_ANSIBLE_LINT_POSTPROCESS=True)
     def test_playbook_gen_when_is_not_initialized(self):
         self.mock_ansible_lint_caller_with(None)

diff --git a/ansible_ai_connect/ai/api/model_client/wca_client.py b/ansible_ai_connect/ai/api/model_client/wca_client.py
@@ -79,6 +79,16 @@
     "Histogram of WCA codematch API processing time",
     namespace=NAMESPACE,
 )
+wca_codegen_playbook_hist = Histogram(
+    "wca_codegen_playbook_latency_seconds",
+    "Histogram of WCA codegen-playbook API processing time",
+    namespace=NAMESPACE,
+)
+wca_explain_playbook_hist = Histogram(
+    "wca_explain_playbook_latency_seconds",
+    "Histogram of WCA explain-playbook API processing time",
+    namespace=NAMESPACE,
+)
 ibm_cloud_identity_token_hist = Histogram(
     "wca_ibm_identity_token_latency_seconds",
     "Histogram of IBM Cloud identity token API processing time",
@@ -94,6 +104,16 @@
     "Counter of WCA codematch API invocation retries",
     namespace=NAMESPACE,
 )
+wca_codegen_playbook_retry_counter = Counter(
+    "wca_codegen_playbook_retries",
+    "Counter of WCA codegen-playbook API invocation retries",
+    namespace=NAMESPACE,
+)
+wca_explain_playbook_retry_counter = Counter(
+    "wca_explain_playbook_retries",
+    "Counter of WCA explain-playbook API invocation retries",
+    namespace=NAMESPACE,
+)
 ibm_cloud_identity_token_retry_counter = Counter(
     "ibm_cloud_identity_token_retries",
     "Counter of IBM Cloud identity token API invocation retries",
@@ -162,6 +182,14 @@ def on_backoff_inference(details):
     def on_backoff_codematch(details):
         wca_codematch_retry_counter.inc()
 
+    @staticmethod
+    def on_backoff_codegen_playbook(details):
+        wca_codegen_playbook_retry_counter.inc()
+
+    @staticmethod
+    def on_backoff_explain_playbook(details):
+        wca_explain_playbook_retry_counter.inc()
+
     @staticmethod
     def on_backoff_ibm_cloud_identity_token(details):
         ibm_cloud_identity_token_retry_counter.inc()
@@ -486,11 +514,22 @@ def generate_playbook(
         if outline:
             data["outline"] = outline
 
-        result = self.session.post(
-            f"{self._inference_url}/v1/wca/codegen/ansible/playbook",
-            headers=headers,
-            json=data,
+        @backoff.on_exception(
+            backoff.expo,
+            Exception,
+            max_tries=self.retries + 1,
+            giveup=self.fatal_exception,
+            on_backoff=self.on_backoff_codegen_playbook,
         )
+        @wca_codegen_playbook_hist.time()
+        def post_request():
+            return self.session.post(
+                f"{self._inference_url}/v1/wca/codegen/ansible/playbook",
+                headers=headers,
+                json=data,
+            )
+
+        result = post_request()
 
         context = Context(model_id, result, False)
         InferenceResponseChecks().run_checks(context)
@@ -516,11 +555,23 @@ def explain_playbook(self, request, content: str) -> str:
             "model_id": model_id,
             "playbook": content,
         }
-        result = self.session.post(
-            f"{self._inference_url}/v1/wca/explain/ansible/playbook",
-            headers=headers,
-            json=data,
+
+        @backoff.on_exception(
+            backoff.expo,
+            Exception,
+            max_tries=self.retries + 1,
+            giveup=self.fatal_exception,
+            on_backoff=self.on_backoff_explain_playbook,
         )
+        @wca_explain_playbook_hist.time()
+        def post_request():
+            return self.session.post(
+                f"{self._inference_url}/v1/wca/explain/ansible/playbook",
+                headers=headers,
+                json=data,
+            )
+
+        result = post_request()
 
         context = Context(model_id, result, False)
         InferenceResponseChecks().run_checks(context)