From 64b977b7bd9690e1ef2715b41455a46aa6843eab Mon Sep 17 00:00:00 2001 From: Tami Takamiya Date: Mon, 8 Jul 2024 13:22:51 -0400 Subject: [PATCH] Prometheus metrics for WCA playbook exp/gen APIs --- .../api/model_client/tests/test_wca_client.py | 36 ++++++++++ .../ai/api/model_client/wca_client.py | 67 ++++++++++++++++--- 2 files changed, 95 insertions(+), 8 deletions(-) diff --git a/ansible_ai_connect/ai/api/model_client/tests/test_wca_client.py b/ansible_ai_connect/ai/api/model_client/tests/test_wca_client.py index 0c3488794..a0ae67c27 100644 --- a/ansible_ai_connect/ai/api/model_client/tests/test_wca_client.py +++ b/ansible_ai_connect/ai/api/model_client/tests/test_wca_client.py @@ -54,9 +54,13 @@ ibm_cloud_identity_token_hist, ibm_cloud_identity_token_retry_counter, wca_codegen_hist, + wca_codegen_playbook_hist, + wca_codegen_playbook_retry_counter, wca_codegen_retry_counter, wca_codematch_hist, wca_codematch_retry_counter, + wca_explain_playbook_hist, + wca_explain_playbook_retry_counter, ) from ansible_ai_connect.test_utils import ( WisdomAppsBackendMocking, @@ -265,6 +269,7 @@ def setUp(self): wca_client.session.post.return_value = response self.wca_client = wca_client + @assert_call_count_metrics(metric=wca_codegen_playbook_hist) def test_playbook_gen(self): request = Mock() playbook, outline = self.wca_client.generate_playbook( @@ -273,23 +278,53 @@ def test_playbook_gen(self): self.assertEqual(playbook, "Oh!") self.assertEqual(outline, "Ahh!") + @assert_call_count_metrics(metric=wca_codegen_playbook_hist) + @assert_call_count_metrics(metric=wca_codegen_playbook_retry_counter) + def test_playbook_gen_error(self): + request = Mock() + model_client = WCAClient(inference_url="http://example.com/") + model_client.get_api_key = Mock(return_value="some-key") + model_client.get_token = Mock(return_value={"access_token": "a-token"}) + model_client.get_model_id = Mock(return_value="a-random-model") + model_client.session = Mock() + model_client.session.post = Mock(side_effect=HTTPError(500)) + with self.assertRaises(HTTPError): + model_client.generate_playbook(request, text="Install Wordpress", create_outline=True) + + @assert_call_count_metrics(metric=wca_explain_playbook_hist) def test_playbook_exp(self): request = Mock() explanation = self.wca_client.explain_playbook(request, content="Some playbook") self.assertEqual(explanation, "!Óh¡") + @assert_call_count_metrics(metric=wca_explain_playbook_hist) + @assert_call_count_metrics(metric=wca_explain_playbook_retry_counter) + def test_playbook_exp_error(self): + request = Mock() + model_client = WCAClient(inference_url="http://example.com/") + model_client.get_api_key = Mock(return_value="some-key") + model_client.get_token = Mock(return_value={"access_token": "a-token"}) + model_client.get_model_id = Mock(return_value="a-random-model") + model_client.session = Mock() + model_client.session.post = Mock(side_effect=HTTPError(500)) + with self.assertRaises(HTTPError): + model_client.explain_playbook(request, content="Some playbook") + + @assert_call_count_metrics(metric=wca_codegen_playbook_hist) def test_playbook_gen_no_org(self): request = Mock() request.user.organization = None self.wca_client.generate_playbook(request, text="Install Wordpress") self.wca_client.get_api_key.assert_called_with(None) + @assert_call_count_metrics(metric=wca_explain_playbook_hist) def test_playbook_exp_no_org(self): request = Mock() request.user.organization = None self.wca_client.explain_playbook(request, content="Some playbook") self.wca_client.get_api_key.assert_called_with(None) + @assert_call_count_metrics(metric=wca_codegen_playbook_hist) @override_settings(ENABLE_ANSIBLE_LINT_POSTPROCESS=True) def test_playbook_gen_with_lint(self): fake_linter = Mock() @@ -301,6 +336,7 @@ def test_playbook_gen_with_lint(self): self.assertEqual(playbook, "I'm super fake!") self.assertEqual(outline, "Ahh!") + @assert_call_count_metrics(metric=wca_codegen_playbook_hist) @override_settings(ENABLE_ANSIBLE_LINT_POSTPROCESS=True) def test_playbook_gen_when_is_not_initialized(self): self.mock_ansible_lint_caller_with(None) diff --git a/ansible_ai_connect/ai/api/model_client/wca_client.py b/ansible_ai_connect/ai/api/model_client/wca_client.py index eda549a09..78f8056df 100644 --- a/ansible_ai_connect/ai/api/model_client/wca_client.py +++ b/ansible_ai_connect/ai/api/model_client/wca_client.py @@ -79,6 +79,16 @@ "Histogram of WCA codematch API processing time", namespace=NAMESPACE, ) +wca_codegen_playbook_hist = Histogram( + "wca_codegen_playbook_latency_seconds", + "Histogram of WCA codegen-playbook API processing time", + namespace=NAMESPACE, +) +wca_explain_playbook_hist = Histogram( + "wca_explain_playbook_latency_seconds", + "Histogram of WCA explain-playbook API processing time", + namespace=NAMESPACE, +) ibm_cloud_identity_token_hist = Histogram( "wca_ibm_identity_token_latency_seconds", "Histogram of IBM Cloud identity token API processing time", @@ -94,6 +104,16 @@ "Counter of WCA codematch API invocation retries", namespace=NAMESPACE, ) +wca_codegen_playbook_retry_counter = Counter( + "wca_codegen_playbook_retries", + "Counter of WCA codegen-playbook API invocation retries", + namespace=NAMESPACE, +) +wca_explain_playbook_retry_counter = Counter( + "wca_explain_playbook_retries", + "Counter of WCA explain-playbook API invocation retries", + namespace=NAMESPACE, +) ibm_cloud_identity_token_retry_counter = Counter( "ibm_cloud_identity_token_retries", "Counter of IBM Cloud identity token API invocation retries", @@ -162,6 +182,14 @@ def on_backoff_inference(details): def on_backoff_codematch(details): wca_codematch_retry_counter.inc() + @staticmethod + def on_backoff_codegen_playbook(details): + wca_codegen_playbook_retry_counter.inc() + + @staticmethod + def on_backoff_explain_playbook(details): + wca_explain_playbook_retry_counter.inc() + @staticmethod def on_backoff_ibm_cloud_identity_token(details): ibm_cloud_identity_token_retry_counter.inc() @@ -486,11 +514,22 @@ def generate_playbook( if outline: data["outline"] = outline - result = self.session.post( - f"{self._inference_url}/v1/wca/codegen/ansible/playbook", - headers=headers, - json=data, + @backoff.on_exception( + backoff.expo, + Exception, + max_tries=self.retries + 1, + giveup=self.fatal_exception, + on_backoff=self.on_backoff_codegen_playbook, ) + @wca_codegen_playbook_hist.time() + def post_request(): + return self.session.post( + f"{self._inference_url}/v1/wca/codegen/ansible/playbook", + headers=headers, + json=data, + ) + + result = post_request() context = Context(model_id, result, False) InferenceResponseChecks().run_checks(context) @@ -516,11 +555,23 @@ def explain_playbook(self, request, content: str) -> str: "model_id": model_id, "playbook": content, } - result = self.session.post( - f"{self._inference_url}/v1/wca/explain/ansible/playbook", - headers=headers, - json=data, + + @backoff.on_exception( + backoff.expo, + Exception, + max_tries=self.retries + 1, + giveup=self.fatal_exception, + on_backoff=self.on_backoff_explain_playbook, ) + @wca_explain_playbook_hist.time() + def post_request(): + return self.session.post( + f"{self._inference_url}/v1/wca/explain/ansible/playbook", + headers=headers, + json=data, + ) + + result = post_request() context = Context(model_id, result, False) InferenceResponseChecks().run_checks(context)