Minor refactor and cleanup

triton-inference-server · Oct 17, 2024 · b23389c · b23389c
1 parent 8322e58
commit b23389c
Showing 1 changed file with 23 additions and 52 deletions.
diff --git a/genai-perf/genai_perf/demo_for_visualize.py b/genai-perf/genai_perf/demo_for_visualize.py
@@ -19,6 +19,7 @@
 from genai_perf.config.generate.sweep_objective_generator import SweepObjectiveGenerator
 from genai_perf.config.input.config_command import ConfigCommand
 from genai_perf.config.run.results import Results
+from genai_perf.config.run.run_config import RunConfig
 from genai_perf.measurements.model_constraints import ModelConstraints
 from genai_perf.measurements.run_constraints import RunConstraints
 from genai_perf.record.types.input_sequence_length import InputSequenceLength
@@ -28,8 +29,22 @@
 from tests.test_utils import create_run_config
 
 
+def print_run_config(run_config: RunConfig) -> None:
+    throughput = run_config.get_model_perf_metric_value(
+        "test_model", PerfThroughput.tag
+    )
+    latency = run_config.get_model_perf_metric_value("test_model", PerfLatencyP99.tag)
+    isl = run_config.get_model_perf_metric_value("test_model", InputSequenceLength.tag)
+    pa_parameters = run_config.perf_analyzer_config.get_parameters()
+    concurrency = pa_parameters["concurrency"]
+
+    print(
+        f"\t{run_config.name} \t concurrency: {concurrency} \t ISL: {isl} \t throughput: {throughput} \t latency: {latency}"
+    )
+
+
 def main():
-    random.seed(0)
+    random.seed(10)
 
     # This is a demonstration of how sweep/analyze would run in
     # GenAI-Perf and how the output (Results class) can be used
@@ -89,72 +104,28 @@ def main():
     # Results is a list of RunConfigs sorted by objective - for my "fake" config I've
     # set the default to be throughput. Results is always sorted based on objective with
     # the first entry being the best
-    print("Example 1 - Throughput:")
+    print("\nExample 1 - Objective is highest throughput:")
     for run_config in results.run_configs:
-        throughput = run_config.get_model_perf_metric_value(
-            "test_model", PerfThroughput.tag
-        )
-        latency = run_config.get_model_perf_metric_value(
-            "test_model", PerfLatencyP99.tag
-        )
-        isl = run_config.get_model_perf_metric_value(
-            "test_model", InputSequenceLength.tag
-        )
-        pa_parameters = run_config.perf_analyzer_config.get_parameters()
-        concurrency = pa_parameters["concurrency"]
-
-        print(
-            f"\t{run_config.name} \t concurrency: {concurrency} \t ISL: {isl} \t throughput: {throughput} \t latency: {latency}"
-        )
-    print("")
+        print_run_config(run_config)
 
     # Now lets change the objective to latency
     results.set_perf_metric_objectives({"test_model": {PerfLatencyP99.tag: 1}})
 
-    print("Example 2 - Latency:")
+    print("\nExample 2 - Objective is lowest latency:")
     for run_config in results.run_configs:
-        throughput = run_config.get_model_perf_metric_value(
-            "test_model", PerfThroughput.tag
-        )
-        latency = run_config.get_model_perf_metric_value(
-            "test_model", PerfLatencyP99.tag
-        )
-        isl = run_config.get_model_perf_metric_value(
-            "test_model", InputSequenceLength.tag
-        )
-        pa_parameters = run_config.perf_analyzer_config.get_parameters()
-        concurrency = pa_parameters["concurrency"]
-
-        print(
-            f"\t{run_config.name} \t concurrency: {concurrency} \t ISL: {isl} \t throughput: {throughput} \t latency: {latency}"
-        )
-    print("")
+        print_run_config(run_config)
 
     # Now lets set the objective back to throughput, but place a constraint that latency has to
     # be below a certain value
     results.set_perf_metric_objectives({"test_model": {PerfThroughput.tag: 1}})
 
-    model_constraints = ModelConstraints({PerfLatencyP99.tag: 80})
+    model_constraints = ModelConstraints({PerfLatencyP99.tag: 70})
     run_constraints = RunConstraints({"test_model": model_constraints})
     results.set_constraints(run_constraints)
 
-    print("Example 3 - Throughput w/ a latency constraint:")
+    print("\nExample 3 - Objective is throughput w/ a latency constraint of 70 ms:")
     for run_config in results.get_results_passing_constraints().run_configs:
-        throughput = run_config.get_model_perf_metric_value(
-            "test_model", PerfThroughput.tag
-        )
-        latency = run_config.get_model_perf_metric_value(
-            "test_model", PerfLatencyP99.tag
-        )
-        isl = run_config.get_model_perf_metric_value(
-            "test_model", InputSequenceLength.tag
-        )
-        pa_parameters = run_config.perf_analyzer_config.get_parameters()
-        concurrency = pa_parameters["concurrency"]
-
-        print(
-            f"\t{run_config.name} \t concurrency: {concurrency} \t ISL: {isl} \t throughput: {throughput} \t latency: {latency}"
-        )
+        print_run_config(run_config)
 
 
 if __name__ == "__main__":