Skip to content

Commit

Permalink
Minor refactor and cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-braf committed Oct 17, 2024
1 parent 8322e58 commit b23389c
Showing 1 changed file with 23 additions and 52 deletions.
75 changes: 23 additions & 52 deletions genai-perf/genai_perf/demo_for_visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from genai_perf.config.generate.sweep_objective_generator import SweepObjectiveGenerator
from genai_perf.config.input.config_command import ConfigCommand
from genai_perf.config.run.results import Results
from genai_perf.config.run.run_config import RunConfig
from genai_perf.measurements.model_constraints import ModelConstraints
from genai_perf.measurements.run_constraints import RunConstraints
from genai_perf.record.types.input_sequence_length import InputSequenceLength
Expand All @@ -28,8 +29,22 @@
from tests.test_utils import create_run_config


def print_run_config(run_config: RunConfig) -> None:
throughput = run_config.get_model_perf_metric_value(
"test_model", PerfThroughput.tag
)
latency = run_config.get_model_perf_metric_value("test_model", PerfLatencyP99.tag)
isl = run_config.get_model_perf_metric_value("test_model", InputSequenceLength.tag)
pa_parameters = run_config.perf_analyzer_config.get_parameters()
concurrency = pa_parameters["concurrency"]

print(
f"\t{run_config.name} \t concurrency: {concurrency} \t ISL: {isl} \t throughput: {throughput} \t latency: {latency}"
)


def main():
random.seed(0)
random.seed(10)

# This is a demonstration of how sweep/analyze would run in
# GenAI-Perf and how the output (Results class) can be used
Expand Down Expand Up @@ -89,72 +104,28 @@ def main():
# Results is a list of RunConfigs sorted by objective - for my "fake" config I've
# set the default to be throughput. Results is always sorted based on objective with
# the first entry being the best
print("Example 1 - Throughput:")
print("\nExample 1 - Objective is highest throughput:")
for run_config in results.run_configs:
throughput = run_config.get_model_perf_metric_value(
"test_model", PerfThroughput.tag
)
latency = run_config.get_model_perf_metric_value(
"test_model", PerfLatencyP99.tag
)
isl = run_config.get_model_perf_metric_value(
"test_model", InputSequenceLength.tag
)
pa_parameters = run_config.perf_analyzer_config.get_parameters()
concurrency = pa_parameters["concurrency"]

print(
f"\t{run_config.name} \t concurrency: {concurrency} \t ISL: {isl} \t throughput: {throughput} \t latency: {latency}"
)
print("")
print_run_config(run_config)

# Now lets change the objective to latency
results.set_perf_metric_objectives({"test_model": {PerfLatencyP99.tag: 1}})

print("Example 2 - Latency:")
print("\nExample 2 - Objective is lowest latency:")
for run_config in results.run_configs:
throughput = run_config.get_model_perf_metric_value(
"test_model", PerfThroughput.tag
)
latency = run_config.get_model_perf_metric_value(
"test_model", PerfLatencyP99.tag
)
isl = run_config.get_model_perf_metric_value(
"test_model", InputSequenceLength.tag
)
pa_parameters = run_config.perf_analyzer_config.get_parameters()
concurrency = pa_parameters["concurrency"]

print(
f"\t{run_config.name} \t concurrency: {concurrency} \t ISL: {isl} \t throughput: {throughput} \t latency: {latency}"
)
print("")
print_run_config(run_config)

# Now lets set the objective back to throughput, but place a constraint that latency has to
# be below a certain value
results.set_perf_metric_objectives({"test_model": {PerfThroughput.tag: 1}})

model_constraints = ModelConstraints({PerfLatencyP99.tag: 80})
model_constraints = ModelConstraints({PerfLatencyP99.tag: 70})
run_constraints = RunConstraints({"test_model": model_constraints})
results.set_constraints(run_constraints)

print("Example 3 - Throughput w/ a latency constraint:")
print("\nExample 3 - Objective is throughput w/ a latency constraint of 70 ms:")
for run_config in results.get_results_passing_constraints().run_configs:
throughput = run_config.get_model_perf_metric_value(
"test_model", PerfThroughput.tag
)
latency = run_config.get_model_perf_metric_value(
"test_model", PerfLatencyP99.tag
)
isl = run_config.get_model_perf_metric_value(
"test_model", InputSequenceLength.tag
)
pa_parameters = run_config.perf_analyzer_config.get_parameters()
concurrency = pa_parameters["concurrency"]

print(
f"\t{run_config.name} \t concurrency: {concurrency} \t ISL: {isl} \t throughput: {throughput} \t latency: {latency}"
)
print_run_config(run_config)


if __name__ == "__main__":
Expand Down

0 comments on commit b23389c

Please sign in to comment.