Skip to content

Commit

Permalink
Initial demo
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-braf committed Oct 17, 2024
1 parent 53d5b27 commit 8322e58
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 1 deletion.
161 changes: 161 additions & 0 deletions genai-perf/genai_perf/demo_for_visualize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import random
import sys

from genai_perf.config.generate.search_parameters import SearchParameters
from genai_perf.config.generate.sweep_objective_generator import SweepObjectiveGenerator
from genai_perf.config.input.config_command import ConfigCommand
from genai_perf.config.run.results import Results
from genai_perf.measurements.model_constraints import ModelConstraints
from genai_perf.measurements.run_constraints import RunConstraints
from genai_perf.record.types.input_sequence_length import InputSequenceLength
from genai_perf.record.types.perf_latency_p99 import PerfLatencyP99
from genai_perf.record.types.perf_throughput import PerfThroughput
from genai_perf.types import ModelSearchParameters
from tests.test_utils import create_run_config


def main():
random.seed(0)

# This is a demonstration of how sweep/analyze would run in
# GenAI-Perf and how the output (Results class) can be used
# by visualization

# We don't have a new Config/CLI interface yet, so for now I've created
# a dataclass that allows you to set values. For this first example,
# we will use all default values (these will not be our actual defaults
# in product)
config = ConfigCommand(model_names=["test_model"])

# In this next section we will determine what the search space is
# by default right now this sweeps over concurrency from 1 to 1024 (by powers of 2)
model_search_parameters = {"test_model": SearchParameters(config.analyze)}

# Now we instance the Sweep Objective Generator which will create GenAI-Perf & PA
# configs based on the user config and the model's search parameters
sweep_objective_generator = SweepObjectiveGenerator(config, model_search_parameters)

# Next we iterate through the generator - in the real world we would call PA
# to find the metrics for each config profiled. For this example I will use a
# test utility created to generate metrics

# Each profile (or iteration) creates a RunConfig instance and the list of these
# are stored in the Results class
results = Results()
for count, objective in enumerate(sweep_objective_generator.get_objectives()):

# A RunConfig consists of a unique name, the GenAI-Perf config, the PA config
# and GPU + Performance metrics. This test utility uses the information provided
# here to create this
run_config = create_run_config(
# These values are used to set the GAP/PA config
run_config_name="test_model_run_config_" + str(count),
model_objective_parameters=objective,
config=config,
# Telemetry metrics
gpu_power=random.randint(400, 500),
gpu_utilization=random.randint(1, 100),
# Performance metrics
throughput=random.randint(100, 300),
latency=random.randint(50, 100),
input_seq_length=random.randint(20, 80),
output_seq_length=random.randint(30, 60),
)

# Now we add the RunConfig to the Results class
results.add_run_config(run_config)

# At this point Analyze would be complete and the Results would be saved to a checkpoint file/
# When visualize is called the checkpoint file would be read and the Results class would be
# restored. I am omitting these steps as they are not relevant to the visualize work and
# you can assume that when visualize is called the Results class will be passed in

# Now I will demonstrate how Results and RunConfig can be utilized via the APIs

# Results is a list of RunConfigs sorted by objective - for my "fake" config I've
# set the default to be throughput. Results is always sorted based on objective with
# the first entry being the best
print("Example 1 - Throughput:")
for run_config in results.run_configs:
throughput = run_config.get_model_perf_metric_value(
"test_model", PerfThroughput.tag
)
latency = run_config.get_model_perf_metric_value(
"test_model", PerfLatencyP99.tag
)
isl = run_config.get_model_perf_metric_value(
"test_model", InputSequenceLength.tag
)
pa_parameters = run_config.perf_analyzer_config.get_parameters()
concurrency = pa_parameters["concurrency"]

print(
f"\t{run_config.name} \t concurrency: {concurrency} \t ISL: {isl} \t throughput: {throughput} \t latency: {latency}"
)
print("")

# Now lets change the objective to latency
results.set_perf_metric_objectives({"test_model": {PerfLatencyP99.tag: 1}})

print("Example 2 - Latency:")
for run_config in results.run_configs:
throughput = run_config.get_model_perf_metric_value(
"test_model", PerfThroughput.tag
)
latency = run_config.get_model_perf_metric_value(
"test_model", PerfLatencyP99.tag
)
isl = run_config.get_model_perf_metric_value(
"test_model", InputSequenceLength.tag
)
pa_parameters = run_config.perf_analyzer_config.get_parameters()
concurrency = pa_parameters["concurrency"]

print(
f"\t{run_config.name} \t concurrency: {concurrency} \t ISL: {isl} \t throughput: {throughput} \t latency: {latency}"
)
print("")

# Now lets set the objective back to throughput, but place a constraint that latency has to
# be below a certain value
results.set_perf_metric_objectives({"test_model": {PerfThroughput.tag: 1}})

model_constraints = ModelConstraints({PerfLatencyP99.tag: 80})
run_constraints = RunConstraints({"test_model": model_constraints})
results.set_constraints(run_constraints)

print("Example 3 - Throughput w/ a latency constraint:")
for run_config in results.get_results_passing_constraints().run_configs:
throughput = run_config.get_model_perf_metric_value(
"test_model", PerfThroughput.tag
)
latency = run_config.get_model_perf_metric_value(
"test_model", PerfLatencyP99.tag
)
isl = run_config.get_model_perf_metric_value(
"test_model", InputSequenceLength.tag
)
pa_parameters = run_config.perf_analyzer_config.get_parameters()
concurrency = pa_parameters["concurrency"]

print(
f"\t{run_config.name} \t concurrency: {concurrency} \t ISL: {isl} \t throughput: {throughput} \t latency: {latency}"
)


if __name__ == "__main__":
sys.exit(main())
2 changes: 1 addition & 1 deletion genai-perf/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def create_run_config_measurement(
# RunConfig Constructor
###########################################################################
def create_run_config(
config: ConfigCommand,
run_config_name: str,
model_name: str = "test_model",
model_objective_parameters: ModelObjectiveParameters = {},
Expand All @@ -112,7 +113,6 @@ def create_run_config(
input_seq_length: int = 0,
output_seq_length: int = 0,
) -> RunConfig:
config = ConfigCommand([model_name])
genai_perf_config = GenAIPerfConfig(
config=config, model_objective_parameters=model_objective_parameters
)
Expand Down

0 comments on commit 8322e58

Please sign in to comment.