Skip to content

Commit

Permalink
Add CLI argument for server metrics url
Browse files Browse the repository at this point in the history
  • Loading branch information
lkomali committed Aug 13, 2024
1 parent 80c9487 commit c325234
Show file tree
Hide file tree
Showing 4 changed files with 182 additions and 4 deletions.
94 changes: 91 additions & 3 deletions genai-perf/genai_perf/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import json
import os
import sys
from enum import Enum, auto
from pathlib import Path
from typing import Tuple
from urllib.parse import urlparse

import genai_perf.logging as logging
import genai_perf.utils as utils
Expand Down Expand Up @@ -245,6 +245,85 @@ def _check_load_manager_args(args: argparse.Namespace) -> argparse.Namespace:
args.concurrency = 1
return args

def is_valid_url(url):
"""
Checks if a URL is valid. It must use 'http' or 'https', have a valid
netloc, optional port, and contain '/metrics' in the path.
"""
try:
result = urlparse(url)

# Check scheme
if result.scheme not in ['http', 'https']:
return False

# Check netloc
if not result.netloc:
return False

# Check path contains '/metrics'
if '/metrics' not in result.path:
return False

return True
except Exception:
return False


def _check_server_metrics_url(parser: argparse.ArgumentParser, args: argparse.Namespace) -> argparse.Namespace:
"""
Checks if the server metrics URL is valid based on the backend and service kind.
"""

# Check if the URL is valid and contains the expected path
if args.service_kind == 'triton' and args.server_metrics_url and not is_valid_url(args.server_metrics_url):
parser.error("The --server-metrics-url option contains an invalid URL format.")

return args


def is_valid_url(url):
"""
Checks if a URL is valid. It must use 'http' or 'https', have a valid
netloc, optional port, and contain '/metrics' in the path.
"""
try:
result = urlparse(url)

# Check scheme
if result.scheme not in ["http", "https"]:
return False

# Check netloc
if not result.netloc:
return False

# Check path contains '/metrics'
if "/metrics" not in result.path:
return False

return True
except Exception:
return False


def _check_server_metrics_url(
parser: argparse.ArgumentParser, args: argparse.Namespace
) -> argparse.Namespace:
"""
Checks if the server metrics URL is valid based on the backend and service kind.
"""

# Check if the URL is valid and contains the expected path
if (
args.service_kind == "triton"
and args.server_metrics_url
and not is_valid_url(args.server_metrics_url)
):
parser.error("The --server-metrics-url option contains an invalid URL format.")

return args


def _set_artifact_paths(args: argparse.Namespace) -> argparse.Namespace:
"""
Expand Down Expand Up @@ -604,6 +683,14 @@ def _add_endpoint_args(parser):
help="URL of the endpoint to target for benchmarking.",
)

endpoint_group.add_argument(
"--server-metrics-url",
type=str,
default=None,
required=False,
help="URL of the server metrics endpoint. Required for 'openai' service kind. Defaults to the default URL if 'service_kind' is 'triton'."
)


def _add_output_args(parser):
output_group = parser.add_argument_group("Output")
Expand Down Expand Up @@ -768,9 +855,9 @@ def profile_handler(args, extra_args):

telemetry_data_collector = None
if args.service_kind == "triton":
# TPA-275: pass server url as a CLI option in non-default case
server_metrics_url=args.server_metrics_url or DEFAULT_TRITON_METRICS_URL
telemetry_data_collector = TritonTelemetryDataCollector(
server_metrics_url=DEFAULT_TRITON_METRICS_URL
server_metrics_url=server_metrics_url
)

Profiler.run(telemetry_data_collector, args=args, extra_args=extra_args)
Expand Down Expand Up @@ -822,6 +909,7 @@ def refine_args(
args = _check_conditional_args(parser, args)
args = _check_image_input_args(parser, args)
args = _check_load_manager_args(args)
args = _check_server_metrics_url(parser, args)
args = _set_artifact_paths(args)
elif args.subcommand == Subcommand.COMPARE.to_lowercase():
args = _check_compare_args(parser, args)
Expand Down
15 changes: 15 additions & 0 deletions genai-perf/genai_perf/telemetry_data/telemetry_data_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,16 @@ def stop(self) -> None:
self._stop_event.set()
self._thread.join()

def check_url_reachability(self) -> bool:
"""Check if the server metrics URL is reachable and valid."""
if self._server_metrics_url:
try:
response = requests.get(self._server_metrics_url, timeout=5)
return response.status_code == 200
except requests.RequestException:
return False
return True

def _fetch_metrics(self) -> str:
"""Fetch metrics from the metrics endpoint"""
response = requests.get(self._server_metrics_url)
Expand All @@ -81,3 +91,8 @@ def _collect_metrics(self) -> None:
def metrics(self) -> TelemetryMetrics:
"""Return the collected metrics."""
return self._metrics

@property
def metrics_url(self) -> str:
"""Return server metrics url"""
return self._server_metrics_url
7 changes: 6 additions & 1 deletion genai-perf/genai_perf/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def build_cmd(args: Namespace, extra_args: Optional[List[str]] = None) -> List[s
"image_height_mean",
"image_height_stddev",
"image_format",
"server_metrics_url",
]

utils.remove_file(args.profile_export_file)
Expand Down Expand Up @@ -148,7 +149,10 @@ def run(
) -> None:
try:
if telemetry_data_collector is not None:
telemetry_data_collector.start()
if telemetry_data_collector.check_url_reachability():
telemetry_data_collector.start()
else:
logger.debug(f"The server-metrics-url provided ({telemetry_data_collector.metrics_url}) is unreachable, cannot collect telemetry data")
cmd = Profiler.build_cmd(args, extra_args)
logger.info(f"Running Perf Analyzer : '{' '.join(cmd)}'")
if args and args.verbose:
Expand All @@ -158,3 +162,4 @@ def run(
finally:
if telemetry_data_collector is not None:
telemetry_data_collector.stop()

70 changes: 70 additions & 0 deletions genai-perf/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def test_help_version_arguments_output_and_exit(
"embeddings",
"--service-kind",
"openai",
],
{"batch_size": 5},
),
Expand Down Expand Up @@ -841,3 +842,72 @@ def test_get_extra_inputs_as_dict(self, extra_inputs_list, expected_dict):
namespace.extra_inputs = extra_inputs_list
actual_dict = parser.get_extra_inputs_as_dict(namespace)
assert actual_dict == expected_dict

TEST_TRITON_METRICS_URL = "http://custom-metrics-url:8002/metrics"
INVALID_URL = "invalid_url"
INVALID_URL_ERROR_MESSAGE = (
"The --server-metrics-url option contains an invalid URL format."
)

@pytest.mark.parametrize(
"args_list, expected_url, expected_error",
[
# Test with a custom URL
(
[
"genai-perf",
"profile",
"--model",
"test_model",
"--service-kind",
"triton",
"--server-metrics-url",
TEST_TRITON_METRICS_URL,
],
TEST_TRITON_METRICS_URL,
None,
),
# Test with default URL
(
[
"genai-perf",
"profile",
"--model",
"test_model",
"--service-kind",
"triton",
],
None,
None,
),
# Test with invalid URL
(
[
"genai-perf",
"profile",
"--model",
"test_model",
"--service-kind",
"triton",
"--server-metrics-url",
INVALID_URL,
],
None,
INVALID_URL_ERROR_MESSAGE,
),
],
)
def test_server_metrics_url_for_triton(
self, args_list, expected_url, expected_error, monkeypatch, capsys
):
monkeypatch.setattr("sys.argv", args_list)

if expected_error:
with pytest.raises(SystemExit) as excinfo:
parser.parse_args()
captured = capsys.readouterr()
assert expected_error in captured.err
assert excinfo.value.code != 0
else:
args, _ = parser.parse_args()
assert args.server_metrics_url == expected_url

0 comments on commit c325234

Please sign in to comment.