From b23389caacc940518c556cb8c84486e2bf88a148 Mon Sep 17 00:00:00 2001 From: braf Date: Thu, 17 Oct 2024 17:11:46 +0000 Subject: [PATCH] Minor refactor and cleanup --- genai-perf/genai_perf/demo_for_visualize.py | 75 +++++++-------------- 1 file changed, 23 insertions(+), 52 deletions(-) diff --git a/genai-perf/genai_perf/demo_for_visualize.py b/genai-perf/genai_perf/demo_for_visualize.py index 0f58922b..21e317bc 100644 --- a/genai-perf/genai_perf/demo_for_visualize.py +++ b/genai-perf/genai_perf/demo_for_visualize.py @@ -19,6 +19,7 @@ from genai_perf.config.generate.sweep_objective_generator import SweepObjectiveGenerator from genai_perf.config.input.config_command import ConfigCommand from genai_perf.config.run.results import Results +from genai_perf.config.run.run_config import RunConfig from genai_perf.measurements.model_constraints import ModelConstraints from genai_perf.measurements.run_constraints import RunConstraints from genai_perf.record.types.input_sequence_length import InputSequenceLength @@ -28,8 +29,22 @@ from tests.test_utils import create_run_config +def print_run_config(run_config: RunConfig) -> None: + throughput = run_config.get_model_perf_metric_value( + "test_model", PerfThroughput.tag + ) + latency = run_config.get_model_perf_metric_value("test_model", PerfLatencyP99.tag) + isl = run_config.get_model_perf_metric_value("test_model", InputSequenceLength.tag) + pa_parameters = run_config.perf_analyzer_config.get_parameters() + concurrency = pa_parameters["concurrency"] + + print( + f"\t{run_config.name} \t concurrency: {concurrency} \t ISL: {isl} \t throughput: {throughput} \t latency: {latency}" + ) + + def main(): - random.seed(0) + random.seed(10) # This is a demonstration of how sweep/analyze would run in # GenAI-Perf and how the output (Results class) can be used @@ -89,72 +104,28 @@ def main(): # Results is a list of RunConfigs sorted by objective - for my "fake" config I've # set the default to be throughput. Results is always sorted based on objective with # the first entry being the best - print("Example 1 - Throughput:") + print("\nExample 1 - Objective is highest throughput:") for run_config in results.run_configs: - throughput = run_config.get_model_perf_metric_value( - "test_model", PerfThroughput.tag - ) - latency = run_config.get_model_perf_metric_value( - "test_model", PerfLatencyP99.tag - ) - isl = run_config.get_model_perf_metric_value( - "test_model", InputSequenceLength.tag - ) - pa_parameters = run_config.perf_analyzer_config.get_parameters() - concurrency = pa_parameters["concurrency"] - - print( - f"\t{run_config.name} \t concurrency: {concurrency} \t ISL: {isl} \t throughput: {throughput} \t latency: {latency}" - ) - print("") + print_run_config(run_config) # Now lets change the objective to latency results.set_perf_metric_objectives({"test_model": {PerfLatencyP99.tag: 1}}) - print("Example 2 - Latency:") + print("\nExample 2 - Objective is lowest latency:") for run_config in results.run_configs: - throughput = run_config.get_model_perf_metric_value( - "test_model", PerfThroughput.tag - ) - latency = run_config.get_model_perf_metric_value( - "test_model", PerfLatencyP99.tag - ) - isl = run_config.get_model_perf_metric_value( - "test_model", InputSequenceLength.tag - ) - pa_parameters = run_config.perf_analyzer_config.get_parameters() - concurrency = pa_parameters["concurrency"] - - print( - f"\t{run_config.name} \t concurrency: {concurrency} \t ISL: {isl} \t throughput: {throughput} \t latency: {latency}" - ) - print("") + print_run_config(run_config) # Now lets set the objective back to throughput, but place a constraint that latency has to # be below a certain value results.set_perf_metric_objectives({"test_model": {PerfThroughput.tag: 1}}) - model_constraints = ModelConstraints({PerfLatencyP99.tag: 80}) + model_constraints = ModelConstraints({PerfLatencyP99.tag: 70}) run_constraints = RunConstraints({"test_model": model_constraints}) results.set_constraints(run_constraints) - print("Example 3 - Throughput w/ a latency constraint:") + print("\nExample 3 - Objective is throughput w/ a latency constraint of 70 ms:") for run_config in results.get_results_passing_constraints().run_configs: - throughput = run_config.get_model_perf_metric_value( - "test_model", PerfThroughput.tag - ) - latency = run_config.get_model_perf_metric_value( - "test_model", PerfLatencyP99.tag - ) - isl = run_config.get_model_perf_metric_value( - "test_model", InputSequenceLength.tag - ) - pa_parameters = run_config.perf_analyzer_config.get_parameters() - concurrency = pa_parameters["concurrency"] - - print( - f"\t{run_config.name} \t concurrency: {concurrency} \t ISL: {isl} \t throughput: {throughput} \t latency: {latency}" - ) + print_run_config(run_config) if __name__ == "__main__":