From eb88f8ee558b6d59f1ab3e04c4375eed35a8dc7a Mon Sep 17 00:00:00 2001 From: braf Date: Mon, 4 Nov 2024 22:23:03 +0000 Subject: [PATCH 01/16] Changing record names to match GAP and adding some missing type checking --- genai-perf/genai_perf/record/record.py | 18 +----- .../record/types/perf_latency_avg.py | 58 ------------------- .../record/types/perf_latency_p90.py | 58 ------------------- .../record/types/perf_latency_p95.py | 58 ------------------- .../record/types/perf_latency_p99.py | 58 ------------------- .../record/types/request_latency_avg.py | 33 +++++++++++ ...atency_base.py => request_latency_base.py} | 35 +++-------- .../record/types/request_latency_max.py | 33 +++++++++++ .../record/types/request_latency_min.py | 33 +++++++++++ .../record/types/request_latency_p25.py | 33 +++++++++++ .../record/types/request_latency_p50.py | 33 +++++++++++ .../record/types/request_latency_p75.py | 33 +++++++++++ .../record/types/request_latency_p90.py | 33 +++++++++++ .../record/types/request_latency_p95.py | 33 +++++++++++ .../record/types/request_latency_p99.py | 33 +++++++++++ .../record/types/request_latency_std.py | 33 +++++++++++ genai-perf/genai_perf/types.py | 1 + genai-perf/tests/test_record.py | 16 +++-- 18 files changed, 354 insertions(+), 278 deletions(-) delete mode 100644 genai-perf/genai_perf/record/types/perf_latency_avg.py delete mode 100644 genai-perf/genai_perf/record/types/perf_latency_p90.py delete mode 100644 genai-perf/genai_perf/record/types/perf_latency_p95.py delete mode 100644 genai-perf/genai_perf/record/types/perf_latency_p99.py create mode 100644 genai-perf/genai_perf/record/types/request_latency_avg.py rename genai-perf/genai_perf/record/types/{perf_latency_base.py => request_latency_base.py} (67%) create mode 100644 genai-perf/genai_perf/record/types/request_latency_max.py create mode 100644 genai-perf/genai_perf/record/types/request_latency_min.py create mode 100644 genai-perf/genai_perf/record/types/request_latency_p25.py create mode 100644 genai-perf/genai_perf/record/types/request_latency_p50.py create mode 100644 genai-perf/genai_perf/record/types/request_latency_p75.py create mode 100644 genai-perf/genai_perf/record/types/request_latency_p90.py create mode 100644 genai-perf/genai_perf/record/types/request_latency_p95.py create mode 100644 genai-perf/genai_perf/record/types/request_latency_p99.py create mode 100644 genai-perf/genai_perf/record/types/request_latency_std.py diff --git a/genai-perf/genai_perf/record/record.py b/genai-perf/genai_perf/record/record.py index 122ed4c1..ca19be15 100644 --- a/genai-perf/genai_perf/record/record.py +++ b/genai-perf/genai_perf/record/record.py @@ -19,6 +19,7 @@ from typing import Dict, Union from genai_perf.exceptions import GenAIPerfException +from genai_perf.types import RecordValue class RecordType(ABCMeta): @@ -90,16 +91,7 @@ class Record(metaclass=RecordType): records """ - def __init__(self, value: Union[float, int], timestamp: int): - """ - Parameters - ---------- - value : float or int - The value of the GPU metric - timestamp : int - The timestamp for the record in nanoseconds - """ - + def __init__(self, value: RecordValue, timestamp: int): assert type(value) is float or type(value) is int assert type(timestamp) is int @@ -124,16 +116,12 @@ def aggregation_function(): def value_function() -> float: """ Returns the average value from a list - - Returns - ------- - Average value of the list """ return mean # type: ignore @staticmethod @abstractmethod - def header(aggregation_tag=False): + def header(aggregation_tag=False) -> str: """ Parameters ---------- diff --git a/genai-perf/genai_perf/record/types/perf_latency_avg.py b/genai-perf/genai_perf/record/types/perf_latency_avg.py deleted file mode 100644 index 7b40ce14..00000000 --- a/genai-perf/genai_perf/record/types/perf_latency_avg.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.types.perf_latency_base import PerfLatencyBase - - -@total_ordering -class PerfLatencyAvg(PerfLatencyBase): - """ - A record for perf_analyzer latency metric - """ - - tag = "perf_latency_avg" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Avg Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/perf_latency_p90.py b/genai-perf/genai_perf/record/types/perf_latency_p90.py deleted file mode 100644 index e05b7632..00000000 --- a/genai-perf/genai_perf/record/types/perf_latency_p90.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.types.perf_latency_base import PerfLatencyBase - - -@total_ordering -class PerfLatencyP90(PerfLatencyBase): - """ - A record for perf_analyzer latency metric - """ - - tag = "perf_latency_p90" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "p90 Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/perf_latency_p95.py b/genai-perf/genai_perf/record/types/perf_latency_p95.py deleted file mode 100644 index 53877cad..00000000 --- a/genai-perf/genai_perf/record/types/perf_latency_p95.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.types.perf_latency_base import PerfLatencyBase - - -@total_ordering -class PerfLatencyP95(PerfLatencyBase): - """ - A record for perf_analyzer latency metric - """ - - tag = "perf_latency_p95" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "p95 Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/perf_latency_p99.py b/genai-perf/genai_perf/record/types/perf_latency_p99.py deleted file mode 100644 index ba568a90..00000000 --- a/genai-perf/genai_perf/record/types/perf_latency_p99.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.types.perf_latency_base import PerfLatencyBase - - -@total_ordering -class PerfLatencyP99(PerfLatencyBase): - """ - A record for perf_analyzer latency metric - """ - - tag = "perf_latency_p99" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "p99 Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/request_latency_avg.py b/genai-perf/genai_perf/record/types/request_latency_avg.py new file mode 100644 index 00000000..2f7a0bb5 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_latency_avg.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.request_latency_base import RequestLatencyBase + + +@total_ordering +class RequestLatencyAvg(RequestLatencyBase): + """ + A record for avg request latency metric + """ + + tag = RequestLatencyBase.base_tag + "_avg" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Avg. Request Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/perf_latency_base.py b/genai-perf/genai_perf/record/types/request_latency_base.py similarity index 67% rename from genai-perf/genai_perf/record/types/perf_latency_base.py rename to genai-perf/genai_perf/record/types/request_latency_base.py index b7d6f5f5..37926a1e 100644 --- a/genai-perf/genai_perf/record/types/perf_latency_base.py +++ b/genai-perf/genai_perf/record/types/request_latency_base.py @@ -15,44 +15,27 @@ from functools import total_ordering from genai_perf.record.record import DecreasingRecord +from genai_perf.types import RecordValue @total_ordering -class PerfLatencyBase(DecreasingRecord): +class RequestLatencyBase(DecreasingRecord): """ - A base class for perf_analyzer latency metric + A base class for the request latency metric """ - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ + base_tag = "request_latency" + def __init__(self, value: RecordValue, timestamp: int = 0) -> None: super().__init__(value, timestamp) - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - + def __eq__(self, other: "RequestLatencyBase") -> bool: # type: ignore return self.value() == other.value() - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - + def __lt__(self, other: "RequestLatencyBase") -> bool: return self.value() > other.value() - def __add__(self, other): + def __add__(self, other: "RequestLatencyBase") -> "RequestLatencyBase": """ Allows adding two records together to produce a brand new record. @@ -60,7 +43,7 @@ def __add__(self, other): return self.__class__(value=(self.value() + other.value())) - def __sub__(self, other): + def __sub__(self, other: "RequestLatencyBase") -> "RequestLatencyBase": """ Allows subbing two records together to produce a brand new record. diff --git a/genai-perf/genai_perf/record/types/request_latency_max.py b/genai-perf/genai_perf/record/types/request_latency_max.py new file mode 100644 index 00000000..70ec40e4 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_latency_max.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.request_latency_base import RequestLatencyBase + + +@total_ordering +class RequestLatencyMaX(RequestLatencyBase): + """ + A record for max request latency metric + """ + + tag = RequestLatencyBase.base_tag + "_max" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Max Request Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/request_latency_min.py b/genai-perf/genai_perf/record/types/request_latency_min.py new file mode 100644 index 00000000..ac84e3d8 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_latency_min.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.request_latency_base import RequestLatencyBase + + +@total_ordering +class RequestLatencyMin(RequestLatencyBase): + """ + A record for min request latency metric + """ + + tag = RequestLatencyBase.base_tag + "_min" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Min Request Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/request_latency_p25.py b/genai-perf/genai_perf/record/types/request_latency_p25.py new file mode 100644 index 00000000..cd011a65 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_latency_p25.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.request_latency_base import RequestLatencyBase + + +@total_ordering +class RequestLatencyP25(RequestLatencyBase): + """ + A record for p25 request latency metric + """ + + tag = RequestLatencyBase.base_tag + "_p25" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p25 Request Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/request_latency_p50.py b/genai-perf/genai_perf/record/types/request_latency_p50.py new file mode 100644 index 00000000..9de0c760 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_latency_p50.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.request_latency_base import RequestLatencyBase + + +@total_ordering +class RequestLatencyAvg(RequestLatencyBase): + """ + A record for p50 request latency metric + """ + + tag = RequestLatencyBase.base_tag + "_p50" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p50 Request Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/request_latency_p75.py b/genai-perf/genai_perf/record/types/request_latency_p75.py new file mode 100644 index 00000000..adb79f6f --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_latency_p75.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.request_latency_base import RequestLatencyBase + + +@total_ordering +class RequestLatencyP75(RequestLatencyBase): + """ + A record for p75 request latency metric + """ + + tag = RequestLatencyBase.base_tag + "_p75" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p75 Request Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/request_latency_p90.py b/genai-perf/genai_perf/record/types/request_latency_p90.py new file mode 100644 index 00000000..901d55b2 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_latency_p90.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.request_latency_base import RequestLatencyBase + + +@total_ordering +class RequestLatencyP90(RequestLatencyBase): + """ + A record for p90 request latency metric + """ + + tag = RequestLatencyBase.base_tag + "_p90" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p90 Request Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/request_latency_p95.py b/genai-perf/genai_perf/record/types/request_latency_p95.py new file mode 100644 index 00000000..b1ef4061 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_latency_p95.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.request_latency_base import RequestLatencyBase + + +@total_ordering +class RequestLatencyP95(RequestLatencyBase): + """ + A record for p95 request latency metric + """ + + tag = RequestLatencyBase.base_tag + "_p95" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p95 Request Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/request_latency_p99.py b/genai-perf/genai_perf/record/types/request_latency_p99.py new file mode 100644 index 00000000..41415a96 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_latency_p99.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.request_latency_base import RequestLatencyBase + + +@total_ordering +class RequestLatencyP99(RequestLatencyBase): + """ + A record for p99 request latency metric + """ + + tag = RequestLatencyBase.base_tag + "_p99" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p99 Request Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/request_latency_std.py b/genai-perf/genai_perf/record/types/request_latency_std.py new file mode 100644 index 00000000..a79e7b48 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_latency_std.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.request_latency_base import RequestLatencyBase + + +@total_ordering +class RequestLatencyStd(RequestLatencyBase): + """ + A record for std request latency metric + """ + + tag = RequestLatencyBase.base_tag + "_std" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Std. Request Latency (ms)" diff --git a/genai-perf/genai_perf/types.py b/genai-perf/genai_perf/types.py index 578da98a..b5e1f205 100644 --- a/genai-perf/genai_perf/types.py +++ b/genai-perf/genai_perf/types.py @@ -38,6 +38,7 @@ GpuRecords: TypeAlias = Dict[GpuId, TelemetryRecords] PerfRecords: TypeAlias = Dict[str, "Record"] # type: ignore PerfMetricName: TypeAlias = str +RecordValue = Union[float, int] ########################################################################### # Constraints diff --git a/genai-perf/tests/test_record.py b/genai-perf/tests/test_record.py index d2b398f4..498664a2 100644 --- a/genai-perf/tests/test_record.py +++ b/genai-perf/tests/test_record.py @@ -45,10 +45,16 @@ def setUp(self): self.less_is_better_types = { record_types[t] for t in [ - "perf_latency_avg", - "perf_latency_p90", - "perf_latency_p95", - "perf_latency_p99", + "request_latency_min", + "request_latency_max", + "request_latency_avg", + "request_latency_std", + "request_latency_p25", + "request_latency_p50", + "request_latency_p75", + "request_latency_p90", + "request_latency_p95", + "request_latency_p99", "inter_token_latency_min", "inter_token_latency_max", "inter_token_latency_avg", @@ -200,7 +206,7 @@ def test_value(self): Test the value method """ avg_value = RecordType.get_all_record_types()[ - "perf_latency_p99" + "request_latency_p99" ].value_function()([10, 50, 100, 40]) total_value = RecordType.get_all_record_types()[ From 17e301ad361edcaf5adf92c400aece3596fde5ec Mon Sep 17 00:00:00 2001 From: braf Date: Mon, 4 Nov 2024 22:29:13 +0000 Subject: [PATCH 02/16] Fixing other unit tests --- genai-perf/genai_perf/demo_for_visualize.py | 132 ++++++++++++++++++ .../tests/test_model_config_measurement.py | 40 +++--- genai-perf/tests/test_results.py | 4 +- .../tests/test_run_config_measurement.py | 24 ++-- genai-perf/tests/test_utils.py | 4 +- 5 files changed, 172 insertions(+), 32 deletions(-) create mode 100644 genai-perf/genai_perf/demo_for_visualize.py diff --git a/genai-perf/genai_perf/demo_for_visualize.py b/genai-perf/genai_perf/demo_for_visualize.py new file mode 100644 index 00000000..21e317bc --- /dev/null +++ b/genai-perf/genai_perf/demo_for_visualize.py @@ -0,0 +1,132 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +import sys + +from genai_perf.config.generate.search_parameters import SearchParameters +from genai_perf.config.generate.sweep_objective_generator import SweepObjectiveGenerator +from genai_perf.config.input.config_command import ConfigCommand +from genai_perf.config.run.results import Results +from genai_perf.config.run.run_config import RunConfig +from genai_perf.measurements.model_constraints import ModelConstraints +from genai_perf.measurements.run_constraints import RunConstraints +from genai_perf.record.types.input_sequence_length import InputSequenceLength +from genai_perf.record.types.perf_latency_p99 import PerfLatencyP99 +from genai_perf.record.types.perf_throughput import PerfThroughput +from genai_perf.types import ModelSearchParameters +from tests.test_utils import create_run_config + + +def print_run_config(run_config: RunConfig) -> None: + throughput = run_config.get_model_perf_metric_value( + "test_model", PerfThroughput.tag + ) + latency = run_config.get_model_perf_metric_value("test_model", PerfLatencyP99.tag) + isl = run_config.get_model_perf_metric_value("test_model", InputSequenceLength.tag) + pa_parameters = run_config.perf_analyzer_config.get_parameters() + concurrency = pa_parameters["concurrency"] + + print( + f"\t{run_config.name} \t concurrency: {concurrency} \t ISL: {isl} \t throughput: {throughput} \t latency: {latency}" + ) + + +def main(): + random.seed(10) + + # This is a demonstration of how sweep/analyze would run in + # GenAI-Perf and how the output (Results class) can be used + # by visualization + + # We don't have a new Config/CLI interface yet, so for now I've created + # a dataclass that allows you to set values. For this first example, + # we will use all default values (these will not be our actual defaults + # in product) + config = ConfigCommand(model_names=["test_model"]) + + # In this next section we will determine what the search space is + # by default right now this sweeps over concurrency from 1 to 1024 (by powers of 2) + model_search_parameters = {"test_model": SearchParameters(config.analyze)} + + # Now we instance the Sweep Objective Generator which will create GenAI-Perf & PA + # configs based on the user config and the model's search parameters + sweep_objective_generator = SweepObjectiveGenerator(config, model_search_parameters) + + # Next we iterate through the generator - in the real world we would call PA + # to find the metrics for each config profiled. For this example I will use a + # test utility created to generate metrics + + # Each profile (or iteration) creates a RunConfig instance and the list of these + # are stored in the Results class + results = Results() + for count, objective in enumerate(sweep_objective_generator.get_objectives()): + + # A RunConfig consists of a unique name, the GenAI-Perf config, the PA config + # and GPU + Performance metrics. This test utility uses the information provided + # here to create this + run_config = create_run_config( + # These values are used to set the GAP/PA config + run_config_name="test_model_run_config_" + str(count), + model_objective_parameters=objective, + config=config, + # Telemetry metrics + gpu_power=random.randint(400, 500), + gpu_utilization=random.randint(1, 100), + # Performance metrics + throughput=random.randint(100, 300), + latency=random.randint(50, 100), + input_seq_length=random.randint(20, 80), + output_seq_length=random.randint(30, 60), + ) + + # Now we add the RunConfig to the Results class + results.add_run_config(run_config) + + # At this point Analyze would be complete and the Results would be saved to a checkpoint file/ + # When visualize is called the checkpoint file would be read and the Results class would be + # restored. I am omitting these steps as they are not relevant to the visualize work and + # you can assume that when visualize is called the Results class will be passed in + + # Now I will demonstrate how Results and RunConfig can be utilized via the APIs + + # Results is a list of RunConfigs sorted by objective - for my "fake" config I've + # set the default to be throughput. Results is always sorted based on objective with + # the first entry being the best + print("\nExample 1 - Objective is highest throughput:") + for run_config in results.run_configs: + print_run_config(run_config) + + # Now lets change the objective to latency + results.set_perf_metric_objectives({"test_model": {PerfLatencyP99.tag: 1}}) + + print("\nExample 2 - Objective is lowest latency:") + for run_config in results.run_configs: + print_run_config(run_config) + + # Now lets set the objective back to throughput, but place a constraint that latency has to + # be below a certain value + results.set_perf_metric_objectives({"test_model": {PerfThroughput.tag: 1}}) + + model_constraints = ModelConstraints({PerfLatencyP99.tag: 70}) + run_constraints = RunConstraints({"test_model": model_constraints}) + results.set_constraints(run_constraints) + + print("\nExample 3 - Objective is throughput w/ a latency constraint of 70 ms:") + for run_config in results.get_results_passing_constraints().run_configs: + print_run_config(run_config) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/genai-perf/tests/test_model_config_measurement.py b/genai-perf/tests/test_model_config_measurement.py index 5f22b45e..ee73e77f 100644 --- a/genai-perf/tests/test_model_config_measurement.py +++ b/genai-perf/tests/test_model_config_measurement.py @@ -21,8 +21,8 @@ ModelConfigMeasurement, ModelConfigMeasurementDefaults, ) -from genai_perf.record.types.perf_latency_p99 import PerfLatencyP99 from genai_perf.record.types.perf_throughput import PerfThroughput +from genai_perf.record.types.request_latency_p99 import RequestLatencyP99 from genai_perf.record.types.time_to_first_token_avg import TimeToFirstTokenAvg @@ -33,21 +33,21 @@ class TestModelConfigMeasurement(unittest.TestCase): def setUp(self): self.throughput_recordA = PerfThroughput(1000) - self.latency_recordA = PerfLatencyP99(20) + self.latency_recordA = RequestLatencyP99(20) self.perf_metricsA = { PerfThroughput.tag: self.throughput_recordA, - PerfLatencyP99.tag: self.latency_recordA, + RequestLatencyP99.tag: self.latency_recordA, } self.mcmA = ModelConfigMeasurement(self.perf_metricsA) self.throughput_recordB = PerfThroughput(500) - self.latency_recordB = PerfLatencyP99(10) + self.latency_recordB = RequestLatencyP99(10) self.perf_metricsB = { PerfThroughput.tag: self.throughput_recordB, - PerfLatencyP99.tag: self.latency_recordB, + RequestLatencyP99.tag: self.latency_recordB, } self.mcmB = ModelConfigMeasurement(self.perf_metricsB) @@ -64,7 +64,7 @@ def test_basic_accessor_methods(self): """ self.assertEqual(self.mcmA.get_perf_metrics(), self.perf_metricsA) self.assertEqual( - self.mcmA.get_perf_metric(PerfLatencyP99.tag), self.latency_recordA + self.mcmA.get_perf_metric(RequestLatencyP99.tag), self.latency_recordA ) self.assertEqual( self.mcmA.get_perf_metric_value(PerfThroughput.tag, return_value=-1), @@ -85,8 +85,10 @@ def test_set_metric_objective(self): self.mcmA._metric_objectives, ) - self.mcmA.set_metric_objectives({PerfThroughput.tag: 2, PerfLatencyP99.tag: 3}) - expected_mw = {PerfThroughput.tag: 2 / 5, PerfLatencyP99.tag: 3 / 5} + self.mcmA.set_metric_objectives( + {PerfThroughput.tag: 2, RequestLatencyP99.tag: 3} + ) + expected_mw = {PerfThroughput.tag: 2 / 5, RequestLatencyP99.tag: 3 / 5} self.assertEqual(expected_mw, self.mcmA._metric_objectives) def test_get_weighted_score(self): @@ -104,8 +106,8 @@ def test_get_weighted_score(self): # In this case we will change the objective to be latency, with mcmA = 20, mcmB = 5 # since latency is a decreasing record (lower is better), scoreB will be positive - self.mcmA.set_metric_objectives({PerfLatencyP99.tag: 1}) - self.mcmB.set_metric_objectives({PerfLatencyP99.tag: 1}) + self.mcmA.set_metric_objectives({RequestLatencyP99.tag: 1}) + self.mcmB.set_metric_objectives({RequestLatencyP99.tag: 1}) scoreA = self.mcmA.get_weighted_score(self.mcmB) scoreB = self.mcmB.get_weighted_score(self.mcmA) @@ -144,8 +146,8 @@ def test_calculate_weighted_percentage_gain(self): self.assertEqual(self.mcmA.calculate_weighted_percentage_gain(self.mcmB), 100) self.assertEqual(self.mcmB.calculate_weighted_percentage_gain(self.mcmA), -50) - self.mcmA.set_metric_objectives({PerfLatencyP99.tag: 1}) - self.mcmB.set_metric_objectives({PerfLatencyP99.tag: 1}) + self.mcmA.set_metric_objectives({RequestLatencyP99.tag: 1}) + self.mcmB.set_metric_objectives({RequestLatencyP99.tag: 1}) # latency: mcmA: 20, mcmB: 10 self.assertEqual(self.mcmA.calculate_weighted_percentage_gain(self.mcmB), -50) @@ -157,8 +159,12 @@ def test_calculate_weighted_percentage_gain(self): # # mcmA has 50% worse throughput, but 100% better latency # mcmB has 100% better latency, but 50% worse throughput - self.mcmA.set_metric_objectives({PerfThroughput.tag: 1, PerfLatencyP99.tag: 1}) - self.mcmB.set_metric_objectives({PerfThroughput.tag: 1, PerfLatencyP99.tag: 1}) + self.mcmA.set_metric_objectives( + {PerfThroughput.tag: 1, RequestLatencyP99.tag: 1} + ) + self.mcmB.set_metric_objectives( + {PerfThroughput.tag: 1, RequestLatencyP99.tag: 1} + ) self.assertEqual(self.mcmA, self.mcmB) self.assertEqual(self.mcmA.calculate_weighted_percentage_gain(self.mcmB), 25) self.assertEqual(self.mcmB.calculate_weighted_percentage_gain(self.mcmA), 25) @@ -176,7 +182,7 @@ def test_is_better_than(self): self.assertTrue(self.mcmA.is_better_than(self.mcmB)) self.assertGreater(self.mcmA, self.mcmB) - self.mcmA.set_metric_objectives({PerfLatencyP99.tag: 1}) + self.mcmA.set_metric_objectives({RequestLatencyP99.tag: 1}) # latency: 20 is worse than 10 self.assertFalse(self.mcmA.is_better_than(self.mcmB)) @@ -188,7 +194,9 @@ def test_is_better_than_combo(self): """ # throuhput: 2000 vs. 1000 (better), latency: 20 vs. 10 (worse) # with latency bias mcmB is better - self.mcmA.set_metric_objectives({PerfThroughput.tag: 1, PerfLatencyP99.tag: 3}) + self.mcmA.set_metric_objectives( + {PerfThroughput.tag: 1, RequestLatencyP99.tag: 3} + ) self.assertFalse(self.mcmA.is_better_than(self.mcmB)) diff --git a/genai-perf/tests/test_results.py b/genai-perf/tests/test_results.py index 8136303c..9ee419d9 100644 --- a/genai-perf/tests/test_results.py +++ b/genai-perf/tests/test_results.py @@ -20,7 +20,7 @@ from genai_perf.config.run.results import Results from genai_perf.measurements.run_constraints import ModelConstraints, RunConstraints from genai_perf.record.types.gpu_power_usage import GPUPowerUsage -from genai_perf.record.types.perf_latency_p99 import PerfLatencyP99 +from genai_perf.record.types.request_latency_p99 import RequestLatencyP99 from tests.test_utils import create_run_config @@ -73,7 +73,7 @@ def test_objective_setting(self): # Changing the objective to latency will result in config_9 being best self._results.set_perf_metric_objectives( - {"test_model": {PerfLatencyP99.tag: 1}} + {"test_model": {RequestLatencyP99.tag: 1}} ) self.assertEqual("test_run_config_9", self._results.run_configs[0].name) diff --git a/genai-perf/tests/test_run_config_measurement.py b/genai-perf/tests/test_run_config_measurement.py index ca356617..94a3c810 100644 --- a/genai-perf/tests/test_run_config_measurement.py +++ b/genai-perf/tests/test_run_config_measurement.py @@ -23,8 +23,8 @@ from genai_perf.measurements.run_constraints import RunConstraints from genai_perf.record.types.gpu_power_usage import GPUPowerUsage from genai_perf.record.types.gpu_utilization import GPUUtilization -from genai_perf.record.types.perf_latency_p99 import PerfLatencyP99 from genai_perf.record.types.perf_throughput import PerfThroughput +from genai_perf.record.types.request_latency_p99 import RequestLatencyP99 class TestRunConfigMeasurement(unittest.TestCase): @@ -79,39 +79,39 @@ def _create_perf_metrics(self) -> None: # # Record A self.throughput_recordA = PerfThroughput(1000) - self.latency_recordA = PerfLatencyP99(40) + self.latency_recordA = RequestLatencyP99(40) self.perf_metricsA = { PerfThroughput.tag: self.throughput_recordA, - PerfLatencyP99.tag: self.latency_recordA, + RequestLatencyP99.tag: self.latency_recordA, } # # Record B self.throughput_recordB = PerfThroughput(500) - self.latency_recordB = PerfLatencyP99(30) + self.latency_recordB = RequestLatencyP99(30) self.perf_metricsB = { PerfThroughput.tag: self.throughput_recordB, - PerfLatencyP99.tag: self.latency_recordB, + RequestLatencyP99.tag: self.latency_recordB, } # # Record MM self.throughput_recordMM_0 = PerfThroughput(1000) - self.latency_recordMM_0 = PerfLatencyP99(20) + self.latency_recordMM_0 = RequestLatencyP99(20) self.throughput_recordMM_1 = PerfThroughput(2000) - self.latency_recordMM_1 = PerfLatencyP99(30) + self.latency_recordMM_1 = RequestLatencyP99(30) self.perf_metricsMM_0 = { PerfThroughput.tag: self.throughput_recordMM_0, - PerfLatencyP99.tag: self.latency_recordMM_0, + RequestLatencyP99.tag: self.latency_recordMM_0, } self.perf_metricsMM_1 = { PerfThroughput.tag: self.throughput_recordMM_1, - PerfLatencyP99.tag: self.latency_recordMM_1, + RequestLatencyP99.tag: self.latency_recordMM_1, } def _create_rcmA(self) -> RunConfigMeasurement: @@ -259,7 +259,7 @@ def test_is_better_than_perf_metric(self): # Changing the metric objectives to bias latency # this tips the scale in the favor of RCMB latency_bias_objectives = { - "test_model": {PerfThroughput.tag: 1, PerfLatencyP99.tag: 4} + "test_model": {PerfThroughput.tag: 1, RequestLatencyP99.tag: 4} } rcmA.set_perf_metric_objectives(latency_bias_objectives) rcmB.set_perf_metric_objectives(latency_bias_objectives) @@ -350,12 +350,12 @@ def test_is_passing_perf_constraints(self): rcmA = self._create_rcmA() # RCMA's latency is 40 - model_constraints = ModelConstraints({PerfLatencyP99.tag: 50}) + model_constraints = ModelConstraints({RequestLatencyP99.tag: 50}) run_constraints = RunConstraints({"test_model": model_constraints}) rcmA.set_constraints(run_constraints) self.assertTrue(rcmA.is_passing_constraints()) - model_constraints = ModelConstraints({PerfLatencyP99.tag: 20}) + model_constraints = ModelConstraints({RequestLatencyP99.tag: 20}) run_constraints = RunConstraints({"test_model": model_constraints}) rcmA.set_constraints(run_constraints) self.assertFalse(rcmA.is_passing_constraints()) diff --git a/genai-perf/tests/test_utils.py b/genai-perf/tests/test_utils.py index 24cf79ad..5caa702e 100644 --- a/genai-perf/tests/test_utils.py +++ b/genai-perf/tests/test_utils.py @@ -37,8 +37,8 @@ from genai_perf.record.types.gpu_utilization import GPUUtilization from genai_perf.record.types.input_sequence_length import InputSequenceLength from genai_perf.record.types.output_sequence_length import OutputSequenceLength -from genai_perf.record.types.perf_latency_p99 import PerfLatencyP99 from genai_perf.record.types.perf_throughput import PerfThroughput +from genai_perf.record.types.request_latency_p99 import RequestLatencyP99 from genai_perf.types import GpuId, ModelObjectiveParameters, PerfRecords @@ -69,7 +69,7 @@ def create_perf_metrics( if throughput: perf_metrics[PerfThroughput.tag] = PerfThroughput(throughput) if latency: - perf_metrics[PerfLatencyP99.tag] = PerfLatencyP99(latency) + perf_metrics[RequestLatencyP99.tag] = RequestLatencyP99(latency) if input_seq_length: perf_metrics[InputSequenceLength.tag] = InputSequenceLength(input_seq_length) if output_seq_length: From 776550db30ed0e2afaf8e96c48963fd0e320a9b8 Mon Sep 17 00:00:00 2001 From: braf Date: Mon, 4 Nov 2024 22:57:22 +0000 Subject: [PATCH 03/16] Updating time to first token records --- .../record/types/time_to_first_token_avg.py | 31 ++--------------- .../record/types/time_to_first_token_base.py | 33 +++++-------------- .../record/types/time_to_first_token_max.py | 29 ++-------------- .../record/types/time_to_first_token_min.py | 31 ++--------------- .../record/types/time_to_first_token_p25.py | 31 ++--------------- .../record/types/time_to_first_token_p50.py | 31 ++--------------- .../record/types/time_to_first_token_p75.py | 31 ++--------------- .../record/types/time_to_first_token_p90.py | 31 ++--------------- .../record/types/time_to_first_token_p95.py | 31 ++--------------- .../record/types/time_to_first_token_p99.py | 31 ++--------------- .../record/types/time_to_first_token_std.py | 33 +++++++++++++++++++ genai-perf/tests/test_record.py | 1 + 12 files changed, 68 insertions(+), 276 deletions(-) create mode 100644 genai-perf/genai_perf/record/types/time_to_first_token_std.py diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_avg.py b/genai-perf/genai_perf/record/types/time_to_first_token_avg.py index 3a9b861b..8f2f095f 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_avg.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_avg.py @@ -20,39 +20,14 @@ @total_ordering class TimeToFirstTokenAvg(TimeToFirstTokenBase): """ - A record for perf_analyzer Time to first token metric + A record for avg Time to first token metric """ - tag = "time_to_first_token_avg" + tag = TimeToFirstTokenBase.base_tag + "_avg" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "Avg Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_base.py b/genai-perf/genai_perf/record/types/time_to_first_token_base.py index f7216f3f..04f77354 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_base.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_base.py @@ -15,44 +15,27 @@ from functools import total_ordering from genai_perf.record.record import DecreasingRecord +from genai_perf.types import RecordValue @total_ordering class TimeToFirstTokenBase(DecreasingRecord): """ - A base class record for perf_analyzer time to first token metric + A base class record for the time to first token metric """ - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ + base_tag = "time_to_first_token" + def __init__(self, value: RecordValue, timestamp: int = 0) -> None: super().__init__(value, timestamp) - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - + def __eq__(self, other: "TimeToFirstTokenBase") -> bool: # type: ignore return self.value() == other.value() - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - + def __lt__(self, other: "TimeToFirstTokenBase") -> bool: return self.value() > other.value() - def __add__(self, other): + def __add__(self, other: "TimeToFirstTokenBase") -> "TimeToFirstTokenBase": """ Allows adding two records together to produce a brand new record. @@ -60,7 +43,7 @@ def __add__(self, other): return self.__class__(value=(self.value() + other.value())) - def __sub__(self, other): + def __sub__(self, other: "TimeToFirstTokenBase") -> "TimeToFirstTokenBase": """ Allows subbing two records together to produce a brand new record. diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_max.py b/genai-perf/genai_perf/record/types/time_to_first_token_max.py index eaba2b75..97ac5778 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_max.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_max.py @@ -23,36 +23,11 @@ class TimeToFirstTokenMax(TimeToFirstTokenBase): A record for perf_analyzer Time to first token metric """ - tag = "time_to_first_token_max" + tag = TimeToFirstTokenBase.base_tag + "_max" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "Max Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_min.py b/genai-perf/genai_perf/record/types/time_to_first_token_min.py index 15612bee..6b42e58c 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_min.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_min.py @@ -20,39 +20,14 @@ @total_ordering class TimeToFirstTokenMin(TimeToFirstTokenBase): """ - A record for perf_analyzer Time to first token metric + A record for min Time to first token metric """ - tag = "time_to_first_token_min" + tag = TimeToFirstTokenBase.base_tag + "_min" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "Min Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_p25.py b/genai-perf/genai_perf/record/types/time_to_first_token_p25.py index cd472f67..070d3c17 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_p25.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_p25.py @@ -20,39 +20,14 @@ @total_ordering class TimeToFirstTokenP25(TimeToFirstTokenBase): """ - A record for perf_analyzer Time to first token metric + A record for p25 Time to first token metric """ - tag = "time_to_first_token_p25" + tag = TimeToFirstTokenBase.base_tag + "_p25" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p25 Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_p50.py b/genai-perf/genai_perf/record/types/time_to_first_token_p50.py index 68b8a5a8..cbc3eb4f 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_p50.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_p50.py @@ -20,39 +20,14 @@ @total_ordering class TimeToFirstTokenP50(TimeToFirstTokenBase): """ - A record for perf_analyzer Time to first token metric + A record for p50 Time to first token metric """ - tag = "time_to_first_token_p50" + tag = TimeToFirstTokenBase.base_tag + "_p50" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p50 Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_p75.py b/genai-perf/genai_perf/record/types/time_to_first_token_p75.py index 6c22469b..a55f3b26 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_p75.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_p75.py @@ -20,39 +20,14 @@ @total_ordering class TimeToFirstTokenP75(TimeToFirstTokenBase): """ - A record for perf_analyzer Time to first token metric + A record for p75 Time to first token metric """ - tag = "time_to_first_token_p75" + tag = TimeToFirstTokenBase.base_tag + "_p75" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p75 Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_p90.py b/genai-perf/genai_perf/record/types/time_to_first_token_p90.py index 431ce6eb..c44e2766 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_p90.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_p90.py @@ -20,39 +20,14 @@ @total_ordering class TimeToFirstTokenP90(TimeToFirstTokenBase): """ - A record for perf_analyzer Time to first token metric + A record for p90 Time to first token metric """ - tag = "time_to_first_token_p90" + tag = TimeToFirstTokenBase.base_tag + "_p90" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p90 Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_p95.py b/genai-perf/genai_perf/record/types/time_to_first_token_p95.py index 5b118301..74905500 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_p95.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_p95.py @@ -20,39 +20,14 @@ @total_ordering class TimeToFirstTokenP95(TimeToFirstTokenBase): """ - A record for perf_analyzer Time to first token metric + A record for p95 Time to first token metric """ - tag = "time_to_first_token_p95" + tag = TimeToFirstTokenBase.base_tag + "_p95" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p95 Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_p99.py b/genai-perf/genai_perf/record/types/time_to_first_token_p99.py index 68125f2e..dd5a16ed 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_p99.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_p99.py @@ -20,39 +20,14 @@ @total_ordering class TimeToFirstTokenP99(TimeToFirstTokenBase): """ - A record for perf_analyzer Time to first token metric + A record for p99 Time to first token metric """ - tag = "time_to_first_token_p99" + tag = TimeToFirstTokenBase.base_tag + "_p99" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p99 Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_std.py b/genai-perf/genai_perf/record/types/time_to_first_token_std.py new file mode 100644 index 00000000..148deb89 --- /dev/null +++ b/genai-perf/genai_perf/record/types/time_to_first_token_std.py @@ -0,0 +1,33 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.time_to_first_token_base import TimeToFirstTokenBase + + +@total_ordering +class TimeToFirstTokenMin(TimeToFirstTokenBase): + """ + A record for std Time to first token metric + """ + + tag = TimeToFirstTokenBase.base_tag + "_std" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Std Time To First Token (ms)" diff --git a/genai-perf/tests/test_record.py b/genai-perf/tests/test_record.py index 498664a2..83b04ef8 100644 --- a/genai-perf/tests/test_record.py +++ b/genai-perf/tests/test_record.py @@ -67,6 +67,7 @@ def setUp(self): "time_to_first_token_min", "time_to_first_token_max", "time_to_first_token_avg", + "time_to_first_token_std", "time_to_first_token_p25", "time_to_first_token_p50", "time_to_first_token_p75", From 00ba0947b898cc1760dd11416a7ef17aa0af7509 Mon Sep 17 00:00:00 2001 From: braf Date: Mon, 4 Nov 2024 23:08:29 +0000 Subject: [PATCH 04/16] Updating inter token latency records --- .../record/types/inter_token_latency_avg.py | 31 ++--------------- .../record/types/inter_token_latency_base.py | 33 +++++-------------- .../record/types/inter_token_latency_max.py | 31 ++--------------- .../record/types/inter_token_latency_min.py | 31 ++--------------- .../record/types/inter_token_latency_p25.py | 31 ++--------------- .../record/types/inter_token_latency_p50.py | 31 ++--------------- .../record/types/inter_token_latency_p75.py | 31 ++--------------- .../record/types/inter_token_latency_p90.py | 31 ++--------------- .../record/types/inter_token_latency_p95.py | 31 ++--------------- .../record/types/inter_token_latency_p99.py | 31 ++--------------- .../record/types/inter_token_latency_std.py | 33 +++++++++++++++++++ .../record/types/time_to_first_token_max.py | 2 +- genai-perf/tests/test_record.py | 1 + 13 files changed, 70 insertions(+), 278 deletions(-) create mode 100644 genai-perf/genai_perf/record/types/inter_token_latency_std.py diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_avg.py b/genai-perf/genai_perf/record/types/inter_token_latency_avg.py index d6a6f947..0c72abbf 100644 --- a/genai-perf/genai_perf/record/types/inter_token_latency_avg.py +++ b/genai-perf/genai_perf/record/types/inter_token_latency_avg.py @@ -20,39 +20,14 @@ @total_ordering class InterTokenLatencyAvg(InterTokenLatencyBase): """ - A record for perf_analyzer Inter token latency metric + A record for avg Inter token latency metric """ - tag = "inter_token_latency_avg" + tag = InterTokenLatencyBase.base_tag + "_avg" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "Avg Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_base.py b/genai-perf/genai_perf/record/types/inter_token_latency_base.py index f267969c..37aafc98 100644 --- a/genai-perf/genai_perf/record/types/inter_token_latency_base.py +++ b/genai-perf/genai_perf/record/types/inter_token_latency_base.py @@ -15,44 +15,27 @@ from functools import total_ordering from genai_perf.record.record import DecreasingRecord +from genai_perf.types import RecordValue @total_ordering class InterTokenLatencyBase(DecreasingRecord): """ - A record for perf_analyzer Inter token latency metric + A base class record for the inter-token latency metric """ - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ + base_tag = "inter_token_latency" + def __init__(self, value: RecordValue, timestamp: int = 0) -> None: super().__init__(value, timestamp) - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - + def __eq__(self, other: "InterTokenLatencyBase") -> bool: # type: ignore return self.value() == other.value() - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - + def __lt__(self, other: "InterTokenLatencyBase") -> bool: return self.value() > other.value() - def __add__(self, other): + def __add__(self, other: "InterTokenLatencyBase") -> "InterTokenLatencyBase": """ Allows adding two records together to produce a brand new record. @@ -60,7 +43,7 @@ def __add__(self, other): return self.__class__(value=(self.value() + other.value())) - def __sub__(self, other): + def __sub__(self, other: "InterTokenLatencyBase") -> "InterTokenLatencyBase": """ Allows subbing two records together to produce a brand new record. diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_max.py b/genai-perf/genai_perf/record/types/inter_token_latency_max.py index d50d8cd1..f926ca2c 100644 --- a/genai-perf/genai_perf/record/types/inter_token_latency_max.py +++ b/genai-perf/genai_perf/record/types/inter_token_latency_max.py @@ -20,39 +20,14 @@ @total_ordering class InterTokenLatencyMax(InterTokenLatencyBase): """ - A record for perf_analyzer Inter token latency metric + A record for max Inter token latency metric """ - tag = "inter_token_latency_max" + tag = InterTokenLatencyBase.base_tag + "_max" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "Max Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_min.py b/genai-perf/genai_perf/record/types/inter_token_latency_min.py index 4a848480..6152977a 100644 --- a/genai-perf/genai_perf/record/types/inter_token_latency_min.py +++ b/genai-perf/genai_perf/record/types/inter_token_latency_min.py @@ -20,39 +20,14 @@ @total_ordering class InterTokenLatencyMin(InterTokenLatencyBase): """ - A record for perf_analyzer Inter token latency metric + A record for min Inter token latency metric """ - tag = "inter_token_latency_min" + tag = InterTokenLatencyBase.base_tag + "_min" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "Min Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_p25.py b/genai-perf/genai_perf/record/types/inter_token_latency_p25.py index 261caae6..86dec41b 100644 --- a/genai-perf/genai_perf/record/types/inter_token_latency_p25.py +++ b/genai-perf/genai_perf/record/types/inter_token_latency_p25.py @@ -20,39 +20,14 @@ @total_ordering class InterTokenLatencyP25(InterTokenLatencyBase): """ - A record for perf_analyzer Inter token latency metric + A record for p25 Inter token latency metric """ - tag = "inter_token_latency_p25" + tag = InterTokenLatencyBase.base_tag + "_p25" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p25 Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_p50.py b/genai-perf/genai_perf/record/types/inter_token_latency_p50.py index 4f277996..7b047c9a 100644 --- a/genai-perf/genai_perf/record/types/inter_token_latency_p50.py +++ b/genai-perf/genai_perf/record/types/inter_token_latency_p50.py @@ -20,39 +20,14 @@ @total_ordering class InterTokenLatencyP50(InterTokenLatencyBase): """ - A record for perf_analyzer Inter token latency metric + A record for p50 Inter token latency metric """ - tag = "inter_token_latency_p50" + tag = InterTokenLatencyBase.base_tag + "_p50" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p50 Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_p75.py b/genai-perf/genai_perf/record/types/inter_token_latency_p75.py index f95a938a..73ddeea6 100644 --- a/genai-perf/genai_perf/record/types/inter_token_latency_p75.py +++ b/genai-perf/genai_perf/record/types/inter_token_latency_p75.py @@ -20,39 +20,14 @@ @total_ordering class InterTokenLatencyP75(InterTokenLatencyBase): """ - A record for perf_analyzer Inter token latency metric + A record for p75 Inter token latency metric """ - tag = "inter_token_latency_p75" + tag = InterTokenLatencyBase.base_tag + "_p75" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p75 Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_p90.py b/genai-perf/genai_perf/record/types/inter_token_latency_p90.py index 14b9e489..b316f9bd 100644 --- a/genai-perf/genai_perf/record/types/inter_token_latency_p90.py +++ b/genai-perf/genai_perf/record/types/inter_token_latency_p90.py @@ -20,39 +20,14 @@ @total_ordering class InterTokenLatencyP90(InterTokenLatencyBase): """ - A record for perf_analyzer Inter token latency metric + A record for p90 Inter token latency metric """ - tag = "inter_token_latency_p90" + tag = InterTokenLatencyBase.base_tag + "_p90" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p90 Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_p95.py b/genai-perf/genai_perf/record/types/inter_token_latency_p95.py index 685d25b6..ef350d82 100644 --- a/genai-perf/genai_perf/record/types/inter_token_latency_p95.py +++ b/genai-perf/genai_perf/record/types/inter_token_latency_p95.py @@ -20,39 +20,14 @@ @total_ordering class InterTokenLatencyP95(InterTokenLatencyBase): """ - A record for perf_analyzer Inter token latency metric + A record for p95 Inter token latency metric """ - tag = "inter_token_latency_p95" + tag = InterTokenLatencyBase.base_tag + "_p95" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p95 Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_p99.py b/genai-perf/genai_perf/record/types/inter_token_latency_p99.py index 73354707..61952c61 100644 --- a/genai-perf/genai_perf/record/types/inter_token_latency_p99.py +++ b/genai-perf/genai_perf/record/types/inter_token_latency_p99.py @@ -20,39 +20,14 @@ @total_ordering class InterTokenLatencyP99(InterTokenLatencyBase): """ - A record for perf_analyzer Inter token latency metric + A record for p99 Inter token latency metric """ - tag = "inter_token_latency_p99" + tag = InterTokenLatencyBase.base_tag + "_p99" def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - super().__init__(value, timestamp) @classmethod - def header(cls, aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(cls, aggregation_tag=False) -> str: return "p99 Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_std.py b/genai-perf/genai_perf/record/types/inter_token_latency_std.py new file mode 100644 index 00000000..9aa86721 --- /dev/null +++ b/genai-perf/genai_perf/record/types/inter_token_latency_std.py @@ -0,0 +1,33 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.inter_token_latency_base import InterTokenLatencyBase + + +@total_ordering +class InterTokenLatencyStd(InterTokenLatencyBase): + """ + A record for std Inter token latency metric + """ + + tag = InterTokenLatencyBase.base_tag + "_std" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Std Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_max.py b/genai-perf/genai_perf/record/types/time_to_first_token_max.py index 97ac5778..e4c35fea 100644 --- a/genai-perf/genai_perf/record/types/time_to_first_token_max.py +++ b/genai-perf/genai_perf/record/types/time_to_first_token_max.py @@ -20,7 +20,7 @@ @total_ordering class TimeToFirstTokenMax(TimeToFirstTokenBase): """ - A record for perf_analyzer Time to first token metric + A record for max Time to first token metric """ tag = TimeToFirstTokenBase.base_tag + "_max" diff --git a/genai-perf/tests/test_record.py b/genai-perf/tests/test_record.py index 83b04ef8..416b18a1 100644 --- a/genai-perf/tests/test_record.py +++ b/genai-perf/tests/test_record.py @@ -58,6 +58,7 @@ def setUp(self): "inter_token_latency_min", "inter_token_latency_max", "inter_token_latency_avg", + "inter_token_latency_std", "inter_token_latency_p25", "inter_token_latency_p50", "inter_token_latency_p75", From 3c4894eb46f8932e38e06e82dd14c4c641d806bf Mon Sep 17 00:00:00 2001 From: braf Date: Mon, 4 Nov 2024 23:19:31 +0000 Subject: [PATCH 05/16] Updaing output token throughput record --- genai-perf/genai_perf/record/record.py | 2 +- .../record/types/output_token_throughput.py | 52 +++---------------- 2 files changed, 9 insertions(+), 45 deletions(-) diff --git a/genai-perf/genai_perf/record/record.py b/genai-perf/genai_perf/record/record.py index ca19be15..1c2788d7 100644 --- a/genai-perf/genai_perf/record/record.py +++ b/genai-perf/genai_perf/record/record.py @@ -158,7 +158,7 @@ def create_class_from_checkpoint(cls, record_dict) -> "Record": setattr(record, key, record_dict[key]) return record - def value(self) -> Union[float, int]: + def value(self) -> RecordValue: """ This method returns the value of recorded metric diff --git a/genai-perf/genai_perf/record/types/output_token_throughput.py b/genai-perf/genai_perf/record/types/output_token_throughput.py index d5635491..084a5e32 100644 --- a/genai-perf/genai_perf/record/types/output_token_throughput.py +++ b/genai-perf/genai_perf/record/types/output_token_throughput.py @@ -15,27 +15,18 @@ from functools import total_ordering from genai_perf.record.record import IncreasingRecord +from genai_perf.types import RecordValue @total_ordering class OutputTokenThroughput(IncreasingRecord): """ - A record for perf_analyzer - metric 'Output Token Throughput' + A record for Output token throughput """ tag = "output_token_throughput" - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - The throughput from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - + def __init__(self, value: RecordValue, timestamp: int = 0) -> None: super().__init__(value, timestamp) @staticmethod @@ -50,43 +41,16 @@ def value_function(): return sum @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - + def header(aggregation_tag=False) -> str: return "Output Token Throughput (infer/sec)" - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - + def __eq__(self, other: "OutputTokenThroughput") -> bool: # type: ignore return self.value() == other.value() - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - + def __lt__(self, other: "OutputTokenThroughput") -> bool: return self.value() < other.value() - def __add__(self, other): + def __add__(self, other: "OutputTokenThroughput") -> "OutputTokenThroughput": """ Allows adding two records together to produce a brand new record. @@ -94,7 +58,7 @@ def __add__(self, other): return self.__class__(value=(self.value() + other.value())) - def __sub__(self, other): + def __sub__(self, other: "OutputTokenThroughput") -> "OutputTokenThroughput": """ Allows subtracting two records together to produce a brand new record. From 48116910ad9da9e134c09cbf04e58ce2c3f66ad4 Mon Sep 17 00:00:00 2001 From: braf Date: Mon, 4 Nov 2024 23:36:17 +0000 Subject: [PATCH 06/16] Adding output token throughput per request records --- .../record/types/output_token_throughput.py | 2 +- .../output_token_throughput_per_request.py | 103 ------------------ ...output_token_throughput_per_request_avg.py | 35 ++++++ ...utput_token_throughput_per_request_base.py | 56 ++++++++++ ...output_token_throughput_per_request_max.py | 35 ++++++ ...output_token_throughput_per_request_min.py | 35 ++++++ ...output_token_throughput_per_request_p25.py | 35 ++++++ ...output_token_throughput_per_request_p50.py | 35 ++++++ ...output_token_throughput_per_request_p75.py | 35 ++++++ ...output_token_throughput_per_request_p90.py | 35 ++++++ ...output_token_throughput_per_request_p95.py | 35 ++++++ ...output_token_throughput_per_request_p99.py | 35 ++++++ ...output_token_throughput_per_request_std.py | 35 ++++++ genai-perf/tests/test_record.py | 11 +- 14 files changed, 417 insertions(+), 105 deletions(-) delete mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_avg.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_base.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_max.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_min.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_p25.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_p50.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_p75.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_p90.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_p95.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_p99.py create mode 100644 genai-perf/genai_perf/record/types/output_token_throughput_per_request_std.py diff --git a/genai-perf/genai_perf/record/types/output_token_throughput.py b/genai-perf/genai_perf/record/types/output_token_throughput.py index 084a5e32..d773da85 100644 --- a/genai-perf/genai_perf/record/types/output_token_throughput.py +++ b/genai-perf/genai_perf/record/types/output_token_throughput.py @@ -42,7 +42,7 @@ def value_function(): @staticmethod def header(aggregation_tag=False) -> str: - return "Output Token Throughput (infer/sec)" + return "Output Token Throughput (tokens/sec)" def __eq__(self, other: "OutputTokenThroughput") -> bool: # type: ignore return self.value() == other.value() diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request.py deleted file mode 100644 index 04065ef3..00000000 --- a/genai-perf/genai_perf/record/types/output_token_throughput_per_request.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import IncreasingRecord - - -@total_ordering -class OutputTokenThroughputPerRequest(IncreasingRecord): - """ - A record for perf_analyzer - metric 'Output Token Throughput Per Request' - """ - - tag = "output_token_throughput_per_request" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - The throughput from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def value_function(): - """ - Returns the total value from a list - - Returns - ------- - Total value of the list - """ - return sum - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Output Token Throughput Per Request (infer/sec)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() < other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_avg.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_avg.py new file mode 100644 index 00000000..97dbd020 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_avg.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_token_throughput_per_request_base import ( + OutputTokenThroughputPerRequestBase, +) + + +@total_ordering +class OutputTokenThroughputPerRequestAvg(OutputTokenThroughputPerRequestBase): + """ + A record for avg output token per request metric + """ + + tag = OutputTokenThroughputPerRequestBase.base_tag + "_avg" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Avg. Output Token Per Request (tokens/sec)" diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_base.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_base.py new file mode 100644 index 00000000..321aad55 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_base.py @@ -0,0 +1,56 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import IncreasingRecord +from genai_perf.types import RecordValue + + +@total_ordering +class OutputTokenThroughputPerRequestBase(IncreasingRecord): + """ + A base class for the output token throughput per request metric + """ + + base_tag = "output_token_throughput_per_request" + + def __init__(self, value: RecordValue, timestamp: int = 0) -> None: + super().__init__(value, timestamp) + + def __eq__(self, other: "OutputTokenThroughputPerRequestBase") -> bool: # type: ignore + return self.value() == other.value() + + def __lt__(self, other: "OutputTokenThroughputPerRequestBase") -> bool: + return self.value() < other.value() + + def __add__( + self, other: "OutputTokenThroughputPerRequestBase" + ) -> "OutputTokenThroughputPerRequestBase": + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__( + self, other: "OutputTokenThroughputPerRequestBase" + ) -> "OutputTokenThroughputPerRequestBase": + """ + Allows subbing two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_max.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_max.py new file mode 100644 index 00000000..c15e7794 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_max.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_token_throughput_per_request_base import ( + OutputTokenThroughputPerRequestBase, +) + + +@total_ordering +class OutputTokenThroughputPerRequestMax(OutputTokenThroughputPerRequestBase): + """ + A record for max output token per request metric + """ + + tag = OutputTokenThroughputPerRequestBase.base_tag + "_max" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Max Output Token Per Request (tokens/sec)" diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_min.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_min.py new file mode 100644 index 00000000..b84ad598 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_min.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_token_throughput_per_request_base import ( + OutputTokenThroughputPerRequestBase, +) + + +@total_ordering +class OutputTokenThroughputPerRequestMin(OutputTokenThroughputPerRequestBase): + """ + A record for min output token per request metric + """ + + tag = OutputTokenThroughputPerRequestBase.base_tag + "_min" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Min Output Token Per Request (tokens/sec)" diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p25.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p25.py new file mode 100644 index 00000000..dcbd3e79 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p25.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_token_throughput_per_request_base import ( + OutputTokenThroughputPerRequestBase, +) + + +@total_ordering +class OutputTokenThroughputPerRequestP25(OutputTokenThroughputPerRequestBase): + """ + A record for p25 output token per request metric + """ + + tag = OutputTokenThroughputPerRequestBase.base_tag + "_p25" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p25 Output Token Per Request (tokens/sec)" diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p50.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p50.py new file mode 100644 index 00000000..70636297 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p50.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_token_throughput_per_request_base import ( + OutputTokenThroughputPerRequestBase, +) + + +@total_ordering +class OutputTokenThroughputPerRequestP50(OutputTokenThroughputPerRequestBase): + """ + A record for p50 output token per request metric + """ + + tag = OutputTokenThroughputPerRequestBase.base_tag + "_p50" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p50 Output Token Per Request (tokens/sec)" diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p75.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p75.py new file mode 100644 index 00000000..be01cb8a --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p75.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_token_throughput_per_request_base import ( + OutputTokenThroughputPerRequestBase, +) + + +@total_ordering +class OutputTokenThroughputPerRequestP75(OutputTokenThroughputPerRequestBase): + """ + A record for p75 output token per request metric + """ + + tag = OutputTokenThroughputPerRequestBase.base_tag + "_p75" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p75 Output Token Per Request (tokens/sec)" diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p90.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p90.py new file mode 100644 index 00000000..175ce97e --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p90.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_token_throughput_per_request_base import ( + OutputTokenThroughputPerRequestBase, +) + + +@total_ordering +class OutputTokenThroughputPerRequestP90(OutputTokenThroughputPerRequestBase): + """ + A record for p90 output token per request metric + """ + + tag = OutputTokenThroughputPerRequestBase.base_tag + "_p90" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p90 Output Token Per Request (tokens/sec)" diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p95.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p95.py new file mode 100644 index 00000000..5d1f2d83 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p95.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_token_throughput_per_request_base import ( + OutputTokenThroughputPerRequestBase, +) + + +@total_ordering +class OutputTokenThroughputPerRequestP95(OutputTokenThroughputPerRequestBase): + """ + A record for p95 output token per request metric + """ + + tag = OutputTokenThroughputPerRequestBase.base_tag + "_p95" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p95 Output Token Per Request (tokens/sec)" diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p99.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p99.py new file mode 100644 index 00000000..68261ab5 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_p99.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_token_throughput_per_request_base import ( + OutputTokenThroughputPerRequestBase, +) + + +@total_ordering +class OutputTokenThroughputPerRequestP99(OutputTokenThroughputPerRequestBase): + """ + A record for P99 output token per request metric + """ + + tag = OutputTokenThroughputPerRequestBase.base_tag + "_p99" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p99 Output Token Per Request (tokens/sec)" diff --git a/genai-perf/genai_perf/record/types/output_token_throughput_per_request_std.py b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_std.py new file mode 100644 index 00000000..22a5dc5c --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput_per_request_std.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_token_throughput_per_request_base import ( + OutputTokenThroughputPerRequestBase, +) + + +@total_ordering +class OutputTokenThroughputPerRequestStd(OutputTokenThroughputPerRequestBase): + """ + A record for std output token per request metric + """ + + tag = OutputTokenThroughputPerRequestBase.base_tag + "_std" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Std Output Token Per Request (tokens/sec)" diff --git a/genai-perf/tests/test_record.py b/genai-perf/tests/test_record.py index 416b18a1..704446b4 100644 --- a/genai-perf/tests/test_record.py +++ b/genai-perf/tests/test_record.py @@ -94,7 +94,16 @@ def setUp(self): "input_sequence_length", "output_sequence_length", "output_token_throughput", - "output_token_throughput_per_request", + "output_token_throughput_per_request_min", + "output_token_throughput_per_request_max", + "output_token_throughput_per_request_avg", + "output_token_throughput_per_request_std", + "output_token_throughput_per_request_p25", + "output_token_throughput_per_request_p50", + "output_token_throughput_per_request_p75", + "output_token_throughput_per_request_p90", + "output_token_throughput_per_request_p95", + "output_token_throughput_per_request_p99", "gpu_free_memory", "gpu_utilization", "cpu_available_ram", From 05e4f76ff468b4b6a0da7fe9c3e3075fd5b88187 Mon Sep 17 00:00:00 2001 From: braf Date: Tue, 5 Nov 2024 00:01:22 +0000 Subject: [PATCH 07/16] Adding output sequence length (OSL) records --- .../record/types/output_sequence_length.py | 103 ------------------ .../types/output_sequence_length_avg.py | 33 ++++++ .../types/output_sequence_length_base.py | 52 +++++++++ .../types/output_sequence_length_max.py | 33 ++++++ .../types/output_sequence_length_min.py | 33 ++++++ .../types/output_sequence_length_p25.py | 33 ++++++ .../types/output_sequence_length_p50.py | 33 ++++++ .../types/output_sequence_length_p75.py | 33 ++++++ .../types/output_sequence_length_p90.py | 33 ++++++ .../types/output_sequence_length_p95.py | 33 ++++++ .../types/output_sequence_length_p99.py | 33 ++++++ .../types/output_sequence_length_std.py | 33 ++++++ genai-perf/tests/test_record.py | 11 +- 13 files changed, 392 insertions(+), 104 deletions(-) delete mode 100644 genai-perf/genai_perf/record/types/output_sequence_length.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_avg.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_base.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_max.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_min.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_p25.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_p50.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_p75.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_p90.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_p95.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_p99.py create mode 100644 genai-perf/genai_perf/record/types/output_sequence_length_std.py diff --git a/genai-perf/genai_perf/record/types/output_sequence_length.py b/genai-perf/genai_perf/record/types/output_sequence_length.py deleted file mode 100644 index f42f0f18..00000000 --- a/genai-perf/genai_perf/record/types/output_sequence_length.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import IncreasingRecord - - -@total_ordering -class OutputSequenceLength(IncreasingRecord): - """ - A record for perf_analyzer - metric 'Output Sequence Length' - """ - - tag = "output_sequence_length" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - The throughput from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def value_function(): - """ - Returns the total value from a list - - Returns - ------- - Total value of the list - """ - return sum - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Output Sequence Length" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() < other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_avg.py b/genai-perf/genai_perf/record/types/output_sequence_length_avg.py new file mode 100644 index 00000000..d6613151 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_avg.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_sequence_length_base import OutputSequenceLengthBase + + +@total_ordering +class OutputSequenceLengthAvg(OutputSequenceLengthBase): + """ + A record for avg output sequence length (OSL) metric + """ + + tag = OutputSequenceLengthBase.base_tag + "_avg" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Avg. Output Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_base.py b/genai-perf/genai_perf/record/types/output_sequence_length_base.py new file mode 100644 index 00000000..5c92427d --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_base.py @@ -0,0 +1,52 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import IncreasingRecord +from genai_perf.types import RecordValue + + +@total_ordering +class OutputSequenceLengthBase(IncreasingRecord): + """ + A base class for the output sequence length (OSL) metric + """ + + base_tag = "output_sequence_length" + + def __init__(self, value: RecordValue, timestamp: int = 0) -> None: + super().__init__(value, timestamp) + + def __eq__(self, other: "OutputSequenceLengthBase") -> bool: # type: ignore + return self.value() == other.value() + + def __lt__(self, other: "OutputSequenceLengthBase") -> bool: + return self.value() < other.value() + + def __add__(self, other: "OutputSequenceLengthBase") -> "OutputSequenceLengthBase": + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other: "OutputSequenceLengthBase") -> "OutputSequenceLengthBase": + """ + Allows subbing two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_max.py b/genai-perf/genai_perf/record/types/output_sequence_length_max.py new file mode 100644 index 00000000..cafcb7b4 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_max.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_sequence_length_base import OutputSequenceLengthBase + + +@total_ordering +class OutputSequenceLengthMax(OutputSequenceLengthBase): + """ + A record for max output sequence length (OSL) metric + """ + + tag = OutputSequenceLengthBase.base_tag + "_max" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Max Output Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_min.py b/genai-perf/genai_perf/record/types/output_sequence_length_min.py new file mode 100644 index 00000000..2dbfb94e --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_min.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_sequence_length_base import OutputSequenceLengthBase + + +@total_ordering +class OutputSequenceLengthMin(OutputSequenceLengthBase): + """ + A record for min output sequence length (OSL) metric + """ + + tag = OutputSequenceLengthBase.base_tag + "_min" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Min Output Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_p25.py b/genai-perf/genai_perf/record/types/output_sequence_length_p25.py new file mode 100644 index 00000000..1a199072 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_p25.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_sequence_length_base import OutputSequenceLengthBase + + +@total_ordering +class OutputSequenceLengthP25(OutputSequenceLengthBase): + """ + A record for p25 output sequence length (OSL) metric + """ + + tag = OutputSequenceLengthBase.base_tag + "_p25" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p25 Output Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_p50.py b/genai-perf/genai_perf/record/types/output_sequence_length_p50.py new file mode 100644 index 00000000..930152ed --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_p50.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_sequence_length_base import OutputSequenceLengthBase + + +@total_ordering +class OutputSequenceLengthP50(OutputSequenceLengthBase): + """ + A record for p50 output sequence length (OSL) metric + """ + + tag = OutputSequenceLengthBase.base_tag + "_p50" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p50 Output Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_p75.py b/genai-perf/genai_perf/record/types/output_sequence_length_p75.py new file mode 100644 index 00000000..5758051e --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_p75.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_sequence_length_base import OutputSequenceLengthBase + + +@total_ordering +class OutputSequenceLengthP75(OutputSequenceLengthBase): + """ + A record for p75 output sequence length (OSL) metric + """ + + tag = OutputSequenceLengthBase.base_tag + "_p75" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p75 Output Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_p90.py b/genai-perf/genai_perf/record/types/output_sequence_length_p90.py new file mode 100644 index 00000000..d2fb719a --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_p90.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_sequence_length_base import OutputSequenceLengthBase + + +@total_ordering +class OutputSequenceLengthP90(OutputSequenceLengthBase): + """ + A record for p90 output sequence length (OSL) metric + """ + + tag = OutputSequenceLengthBase.base_tag + "_p90" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p90 Output Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_p95.py b/genai-perf/genai_perf/record/types/output_sequence_length_p95.py new file mode 100644 index 00000000..cce6c7a3 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_p95.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_sequence_length_base import OutputSequenceLengthBase + + +@total_ordering +class OutputSequenceLengthP95(OutputSequenceLengthBase): + """ + A record for p95 output sequence length (OSL) metric + """ + + tag = OutputSequenceLengthBase.base_tag + "_p95" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p95 Output Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_p99.py b/genai-perf/genai_perf/record/types/output_sequence_length_p99.py new file mode 100644 index 00000000..e419fc73 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_p99.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_sequence_length_base import OutputSequenceLengthBase + + +@total_ordering +class OutputSequenceLengthP99(OutputSequenceLengthBase): + """ + A record for p99 output sequence length (OSL) metric + """ + + tag = OutputSequenceLengthBase.base_tag + "_p99" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p99 Output Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/output_sequence_length_std.py b/genai-perf/genai_perf/record/types/output_sequence_length_std.py new file mode 100644 index 00000000..0ef8038f --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_sequence_length_std.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.output_sequence_length_base import OutputSequenceLengthBase + + +@total_ordering +class OutputSequenceLengthStd(OutputSequenceLengthBase): + """ + A record for std output sequence length (OSL) metric + """ + + tag = OutputSequenceLengthBase.base_tag + "_std" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Std Output Sequence Length (tokens)" diff --git a/genai-perf/tests/test_record.py b/genai-perf/tests/test_record.py index 704446b4..75bd30a3 100644 --- a/genai-perf/tests/test_record.py +++ b/genai-perf/tests/test_record.py @@ -92,7 +92,6 @@ def setUp(self): for t in [ "perf_throughput", "input_sequence_length", - "output_sequence_length", "output_token_throughput", "output_token_throughput_per_request_min", "output_token_throughput_per_request_max", @@ -104,6 +103,16 @@ def setUp(self): "output_token_throughput_per_request_p90", "output_token_throughput_per_request_p95", "output_token_throughput_per_request_p99", + "output_sequence_length_min", + "output_sequence_length_max", + "output_sequence_length_avg", + "output_sequence_length_std", + "output_sequence_length_p25", + "output_sequence_length_p50", + "output_sequence_length_p75", + "output_sequence_length_p90", + "output_sequence_length_p95", + "output_sequence_length_p99", "gpu_free_memory", "gpu_utilization", "cpu_available_ram", From b53e8f568e26c45d7118c93d19c243895b320f29 Mon Sep 17 00:00:00 2001 From: braf Date: Tue, 5 Nov 2024 00:16:03 +0000 Subject: [PATCH 08/16] Adding Input sequence length (ISL) records --- .../record/types/input_sequence_length.py | 103 ------------------ .../record/types/input_sequence_length_avg.py | 33 ++++++ .../types/input_sequence_length_base.py | 52 +++++++++ .../record/types/input_sequence_length_max.py | 33 ++++++ .../record/types/input_sequence_length_min.py | 33 ++++++ .../record/types/input_sequence_length_p25.py | 33 ++++++ .../record/types/input_sequence_length_p50.py | 33 ++++++ .../record/types/input_sequence_length_p75.py | 33 ++++++ .../record/types/input_sequence_length_p90.py | 33 ++++++ .../record/types/input_sequence_length_p95.py | 33 ++++++ .../record/types/input_sequence_length_p99.py | 33 ++++++ .../record/types/input_sequence_length_std.py | 33 ++++++ genai-perf/tests/test_record.py | 11 +- 13 files changed, 392 insertions(+), 104 deletions(-) delete mode 100644 genai-perf/genai_perf/record/types/input_sequence_length.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_avg.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_base.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_max.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_min.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_p25.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_p50.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_p75.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_p90.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_p95.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_p99.py create mode 100644 genai-perf/genai_perf/record/types/input_sequence_length_std.py diff --git a/genai-perf/genai_perf/record/types/input_sequence_length.py b/genai-perf/genai_perf/record/types/input_sequence_length.py deleted file mode 100644 index 8d7d4065..00000000 --- a/genai-perf/genai_perf/record/types/input_sequence_length.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import IncreasingRecord - - -@total_ordering -class InputSequenceLength(IncreasingRecord): - """ - A record for perf_analyzer - metric 'Input Sequence Length' - """ - - tag = "input_sequence_length" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - The throughput from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def value_function(): - """ - Returns the total value from a list - - Returns - ------- - Total value of the list - """ - return sum - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Input Sequence Length" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() < other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_avg.py b/genai-perf/genai_perf/record/types/input_sequence_length_avg.py new file mode 100644 index 00000000..2fc71b62 --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_avg.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.input_sequence_length_base import InputSequenceLengthBase + + +@total_ordering +class InputSequenceLengthAvg(InputSequenceLengthBase): + """ + A record for avg input sequence length (ISL) metric + """ + + tag = InputSequenceLengthBase.base_tag + "_avg" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Avg. Input Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_base.py b/genai-perf/genai_perf/record/types/input_sequence_length_base.py new file mode 100644 index 00000000..df15df5f --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_base.py @@ -0,0 +1,52 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import IncreasingRecord +from genai_perf.types import RecordValue + + +@total_ordering +class InputSequenceLengthBase(IncreasingRecord): + """ + A base class for the input sequence length (ISL) metric + """ + + base_tag = "input_sequence_length" + + def __init__(self, value: RecordValue, timestamp: int = 0) -> None: + super().__init__(value, timestamp) + + def __eq__(self, other: "InputSequenceLengthBase") -> bool: # type: ignore + return self.value() == other.value() + + def __lt__(self, other: "InputSequenceLengthBase") -> bool: + return self.value() < other.value() + + def __add__(self, other: "InputSequenceLengthBase") -> "InputSequenceLengthBase": + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other: "InputSequenceLengthBase") -> "InputSequenceLengthBase": + """ + Allows subbing two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_max.py b/genai-perf/genai_perf/record/types/input_sequence_length_max.py new file mode 100644 index 00000000..84e3964e --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_max.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.input_sequence_length_base import InputSequenceLengthBase + + +@total_ordering +class InputSequenceLengthMax(InputSequenceLengthBase): + """ + A record for max input sequence length (ISL) metric + """ + + tag = InputSequenceLengthBase.base_tag + "_max" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Max Input Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_min.py b/genai-perf/genai_perf/record/types/input_sequence_length_min.py new file mode 100644 index 00000000..a0950d55 --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_min.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.input_sequence_length_base import InputSequenceLengthBase + + +@total_ordering +class InputSequenceLengthMin(InputSequenceLengthBase): + """ + A record for min input sequence length (ISL) metric + """ + + tag = InputSequenceLengthBase.base_tag + "_min" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Min Input Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_p25.py b/genai-perf/genai_perf/record/types/input_sequence_length_p25.py new file mode 100644 index 00000000..4637c661 --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_p25.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.input_sequence_length_base import InputSequenceLengthBase + + +@total_ordering +class InputSequenceLengthP25(InputSequenceLengthBase): + """ + A record for p25 Input sequence length (ISL) metric + """ + + tag = InputSequenceLengthBase.base_tag + "_p25" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p25 Input Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_p50.py b/genai-perf/genai_perf/record/types/input_sequence_length_p50.py new file mode 100644 index 00000000..a5e305b4 --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_p50.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.input_sequence_length_base import InputSequenceLengthBase + + +@total_ordering +class InputSequenceLengthP50(InputSequenceLengthBase): + """ + A record for p50 input sequence length (ISL) metric + """ + + tag = InputSequenceLengthBase.base_tag + "_p50" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p50 Input Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_p75.py b/genai-perf/genai_perf/record/types/input_sequence_length_p75.py new file mode 100644 index 00000000..9b7e7d0c --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_p75.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.input_sequence_length_base import InputSequenceLengthBase + + +@total_ordering +class InputSequenceLengthP75(InputSequenceLengthBase): + """ + A record for p75 input sequence length (ISL) metric + """ + + tag = InputSequenceLengthBase.base_tag + "_p75" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p75 Input Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_p90.py b/genai-perf/genai_perf/record/types/input_sequence_length_p90.py new file mode 100644 index 00000000..90701d55 --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_p90.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.input_sequence_length_base import InputSequenceLengthBase + + +@total_ordering +class InputSequenceLengthP90(InputSequenceLengthBase): + """ + A record for p90 Input sequence length (ISL) metric + """ + + tag = InputSequenceLengthBase.base_tag + "_p90" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p90 Input Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_p95.py b/genai-perf/genai_perf/record/types/input_sequence_length_p95.py new file mode 100644 index 00000000..1a4a6492 --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_p95.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.input_sequence_length_base import InputSequenceLengthBase + + +@total_ordering +class InputSequenceLengthP95(InputSequenceLengthBase): + """ + A record for p95 input sequence length (OSL) metric + """ + + tag = InputSequenceLengthBase.base_tag + "_p95" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p95 Input Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_p99.py b/genai-perf/genai_perf/record/types/input_sequence_length_p99.py new file mode 100644 index 00000000..fbb39b08 --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_p99.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.input_sequence_length_base import InputSequenceLengthBase + + +@total_ordering +class InputSequenceLengthP99(InputSequenceLengthBase): + """ + A record for p99 input sequence length (ISL) metric + """ + + tag = InputSequenceLengthBase.base_tag + "_p99" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "p99 Input Sequence Length (tokens)" diff --git a/genai-perf/genai_perf/record/types/input_sequence_length_std.py b/genai-perf/genai_perf/record/types/input_sequence_length_std.py new file mode 100644 index 00000000..1d69f839 --- /dev/null +++ b/genai-perf/genai_perf/record/types/input_sequence_length_std.py @@ -0,0 +1,33 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.input_sequence_length_base import InputSequenceLengthBase + + +@total_ordering +class InputSequenceLengthStd(InputSequenceLengthBase): + """ + A record for std input sequence length (OSL) metric + """ + + tag = InputSequenceLengthBase.base_tag + "_std" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False) -> str: + return "Std Input Sequence Length (tokens)" diff --git a/genai-perf/tests/test_record.py b/genai-perf/tests/test_record.py index 75bd30a3..05513445 100644 --- a/genai-perf/tests/test_record.py +++ b/genai-perf/tests/test_record.py @@ -91,7 +91,6 @@ def setUp(self): record_types[t] for t in [ "perf_throughput", - "input_sequence_length", "output_token_throughput", "output_token_throughput_per_request_min", "output_token_throughput_per_request_max", @@ -113,6 +112,16 @@ def setUp(self): "output_sequence_length_p90", "output_sequence_length_p95", "output_sequence_length_p99", + "input_sequence_length_min", + "input_sequence_length_max", + "input_sequence_length_avg", + "input_sequence_length_std", + "input_sequence_length_p25", + "input_sequence_length_p50", + "input_sequence_length_p75", + "input_sequence_length_p90", + "input_sequence_length_p95", + "input_sequence_length_p99", "gpu_free_memory", "gpu_utilization", "cpu_available_ram", From 187159b1a7f03ace3e3c67da5b00f13f801ad866 Mon Sep 17 00:00:00 2001 From: braf Date: Tue, 5 Nov 2024 00:17:31 +0000 Subject: [PATCH 09/16] Removing non-GAP records --- .../record/types/perf_client_response_wait.py | 95 ------------------- .../record/types/perf_client_send_recv.py | 95 ------------------- .../record/types/perf_server_compute_infer.py | 95 ------------------- .../record/types/perf_server_compute_input.py | 95 ------------------- .../types/perf_server_compute_output.py | 95 ------------------- .../record/types/perf_server_queue.py | 95 ------------------- genai-perf/tests/test_record.py | 6 -- 7 files changed, 576 deletions(-) delete mode 100644 genai-perf/genai_perf/record/types/perf_client_response_wait.py delete mode 100644 genai-perf/genai_perf/record/types/perf_client_send_recv.py delete mode 100644 genai-perf/genai_perf/record/types/perf_server_compute_infer.py delete mode 100644 genai-perf/genai_perf/record/types/perf_server_compute_input.py delete mode 100644 genai-perf/genai_perf/record/types/perf_server_compute_output.py delete mode 100644 genai-perf/genai_perf/record/types/perf_server_queue.py diff --git a/genai-perf/genai_perf/record/types/perf_client_response_wait.py b/genai-perf/genai_perf/record/types/perf_client_response_wait.py deleted file mode 100644 index 0204634f..00000000 --- a/genai-perf/genai_perf/record/types/perf_client_response_wait.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import DecreasingRecord - - -@total_ordering -class PerfClientResponseWait(DecreasingRecord): - """ - A record for perf_analyzer - metric 'Client response wait' - """ - - tag = "perf_client_response_wait" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Response Wait Time (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return PerfClientResponseWait(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return PerfClientResponseWait(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/perf_client_send_recv.py b/genai-perf/genai_perf/record/types/perf_client_send_recv.py deleted file mode 100644 index e6eca6a2..00000000 --- a/genai-perf/genai_perf/record/types/perf_client_send_recv.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import DecreasingRecord - - -@total_ordering -class PerfClientSendRecv(DecreasingRecord): - """ - A record for perf_analyzer - metric 'Client send/recv' - """ - - tag = "perf_client_send_recv" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Client Send/Recv (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return PerfClientSendRecv(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return PerfClientSendRecv(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/perf_server_compute_infer.py b/genai-perf/genai_perf/record/types/perf_server_compute_infer.py deleted file mode 100644 index 5e65dd9e..00000000 --- a/genai-perf/genai_perf/record/types/perf_server_compute_infer.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import DecreasingRecord - - -@total_ordering -class PerfServerComputeInfer(DecreasingRecord): - """ - A record for Server compute infer time - from the perf analyzer - """ - - tag = "perf_server_compute_infer" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Server Compute Infer time (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return PerfServerComputeInfer(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return PerfServerComputeInfer(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/perf_server_compute_input.py b/genai-perf/genai_perf/record/types/perf_server_compute_input.py deleted file mode 100644 index 313f7a44..00000000 --- a/genai-perf/genai_perf/record/types/perf_server_compute_input.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import DecreasingRecord - - -@total_ordering -class PerfServerComputeInput(DecreasingRecord): - """ - A record for Server compute input time - from the perf analyzer - """ - - tag = "perf_server_compute_input" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Server Compute Input time (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return PerfServerComputeInput(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return PerfServerComputeInput(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/perf_server_compute_output.py b/genai-perf/genai_perf/record/types/perf_server_compute_output.py deleted file mode 100644 index 914e3cf5..00000000 --- a/genai-perf/genai_perf/record/types/perf_server_compute_output.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import DecreasingRecord - - -@total_ordering -class PerfServerComputeOutput(DecreasingRecord): - """ - A record for Server compute output time - from the perf analyzer - """ - - tag = "perf_server_compute_output" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Server Compute Output time (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return PerfServerComputeOutput(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return PerfServerComputeOutput(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/perf_server_queue.py b/genai-perf/genai_perf/record/types/perf_server_queue.py deleted file mode 100644 index 3f1c7144..00000000 --- a/genai-perf/genai_perf/record/types/perf_server_queue.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import DecreasingRecord - - -@total_ordering -class PerfServerQueue(DecreasingRecord): - """ - A record for Server queue time - from the perf analyzer - """ - - tag = "perf_server_queue" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - the latency extracted from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Server Queue time (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return PerfServerQueue(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return PerfServerQueue(value=(other.value() - self.value())) diff --git a/genai-perf/tests/test_record.py b/genai-perf/tests/test_record.py index 05513445..90a8bc2a 100644 --- a/genai-perf/tests/test_record.py +++ b/genai-perf/tests/test_record.py @@ -77,12 +77,6 @@ def setUp(self): "time_to_first_token_p99", "gpu_used_memory", "cpu_used_ram", - "perf_server_compute_infer", - "perf_server_queue", - "perf_client_response_wait", - "perf_server_compute_output", - "perf_client_send_recv", - "perf_server_compute_input", "gpu_power_usage", ] } From cc4b1aa72ae301dfc73057e5a373859eb8aa0c9c Mon Sep 17 00:00:00 2001 From: braf Date: Tue, 5 Nov 2024 14:37:26 +0000 Subject: [PATCH 10/16] Adding telemetry records --- .../record/types/cpu_available_ram.py | 91 ------------------ .../genai_perf/record/types/cpu_used_ram.py | 91 ------------------ .../record/types/gpu_energy_consumption.py | 56 +++++++++++ .../record/types/gpu_free_memory.py | 94 ------------------- .../record/types/gpu_memory_used.py | 45 +++++++++ .../record/types/gpu_power_limit.py | 52 ++++++++++ .../record/types/gpu_power_usage.py | 63 +------------ .../record/types/gpu_total_memory.py | 65 ++----------- .../record/types/gpu_used_memory.py | 94 ------------------- .../record/types/gpu_utilization.py | 64 +------------ genai-perf/tests/test_record.py | 9 +- 11 files changed, 175 insertions(+), 549 deletions(-) delete mode 100644 genai-perf/genai_perf/record/types/cpu_available_ram.py delete mode 100644 genai-perf/genai_perf/record/types/cpu_used_ram.py create mode 100644 genai-perf/genai_perf/record/types/gpu_energy_consumption.py delete mode 100644 genai-perf/genai_perf/record/types/gpu_free_memory.py create mode 100644 genai-perf/genai_perf/record/types/gpu_memory_used.py create mode 100644 genai-perf/genai_perf/record/types/gpu_power_limit.py delete mode 100644 genai-perf/genai_perf/record/types/gpu_used_memory.py diff --git a/genai-perf/genai_perf/record/types/cpu_available_ram.py b/genai-perf/genai_perf/record/types/cpu_available_ram.py deleted file mode 100644 index b4e069ac..00000000 --- a/genai-perf/genai_perf/record/types/cpu_available_ram.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import IncreasingRecord - - -@total_ordering -class CPUAvailableRAM(IncreasingRecord): - """ - The Available CPU memory - """ - - tag = "cpu_available_ram" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - CPU free memory - timestamp : int - The timestamp for the record in nanoseconds - """ - - super().__init__(value, timestamp) - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return ("Max " if aggregation_tag else "") + "RAM Available (MB)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() < other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return CPUAvailableRAM(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - - return CPUAvailableRAM(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/cpu_used_ram.py b/genai-perf/genai_perf/record/types/cpu_used_ram.py deleted file mode 100644 index 7ee7f1d4..00000000 --- a/genai-perf/genai_perf/record/types/cpu_used_ram.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import DecreasingRecord - - -@total_ordering -class CPUUsedRAM(DecreasingRecord): - """ - The CPU memory usage record - """ - - tag = "cpu_used_ram" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - CPU used memory - timestamp : int - The timestamp for the record in nanoseconds - """ - - super().__init__(value, timestamp) - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return ("Max " if aggregation_tag else "") + "RAM Usage (MB)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is better than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return CPUUsedRAM(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - - return CPUUsedRAM(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/gpu_energy_consumption.py b/genai-perf/genai_perf/record/types/gpu_energy_consumption.py new file mode 100644 index 00000000..15db4fb2 --- /dev/null +++ b/genai-perf/genai_perf/record/types/gpu_energy_consumption.py @@ -0,0 +1,56 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.gpu_record import DecreasingGPURecord + + +@total_ordering +class GPUEnergyConsumption(DecreasingGPURecord): + """ + GPU's energy consumption metric + """ + + tag = "energy_consumption" + + def __init__(self, value, device_uuid=None, timestamp=0): + super().__init__(value, device_uuid, timestamp) + + @staticmethod + def aggregation_function(): + def average(seq): + return sum(seq[1:], start=seq[0]) / len(seq) + + return average + + @staticmethod + def header(aggregation_tag=False): + return ("Average " if aggregation_tag else "") + "GPU Energy Consumption (MJ)" + + def __eq__(self, other: "GPUEnegryConsumption") -> bool: # type: ignore + return self.value() == other.value() + + def __lt__(self, other: "GPUEnergyConsumption") -> bool: + return other.value() < self.value() + + def __add__(self, other: "GPUEnergyConsumption") -> "GPUEnergyConsumption": + return GPUEnergyConsumption( + device_uuid=None, value=(self.value() + other.value()) + ) + + def __sub__(self, other: "GPUEnergyConsumption") -> "GPUEnergyConsumption": + return GPUEnergyConsumption( + device_uuid=None, value=(other.value() - self.value()) + ) diff --git a/genai-perf/genai_perf/record/types/gpu_free_memory.py b/genai-perf/genai_perf/record/types/gpu_free_memory.py deleted file mode 100644 index 21bf3f58..00000000 --- a/genai-perf/genai_perf/record/types/gpu_free_memory.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.gpu_record import IncreasingGPURecord - - -@total_ordering -class GPUFreeMemory(IncreasingGPURecord): - """ - The free memory in the GPU. - """ - - tag = "gpu_free_memory" - - def __init__(self, value, device_uuid=None, timestamp=0): - """ - Parameters - ---------- - value : float - The value of the GPU metrtic - device_uuid : str - The GPU device uuid this metric is associated - with. - timestamp : int - The timestamp for the record in nanoseconds - """ - - super().__init__(value, device_uuid, timestamp) - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return ("Max " if aggregation_tag else "") + "GPU Memory Available (MB)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() < other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return GPUFreeMemory(device_uuid=None, value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - - return GPUFreeMemory(device_uuid=None, value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/gpu_memory_used.py b/genai-perf/genai_perf/record/types/gpu_memory_used.py new file mode 100644 index 00000000..6f1ac81e --- /dev/null +++ b/genai-perf/genai_perf/record/types/gpu_memory_used.py @@ -0,0 +1,45 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.gpu_record import IncreasingGPURecord + + +@total_ordering +class GPUMemoryUsed(IncreasingGPURecord): + """ + GPU's used memory metric + """ + + tag = "gpu_memory_used" + + def __init__(self, value, device_uuid=None, timestamp=0): + super().__init__(value, device_uuid, timestamp) + + @staticmethod + def header(aggregation_tag=False) -> str: + return ("Max " if aggregation_tag else "") + "GPU Memory Used (GB)" + + def __eq__(self, other: "GPUMemoryUsed") -> bool: # type: ignore + return self.value() == other.value() + + def __lt__(self, other: "GPUMemoryUsed") -> bool: + return self.value() < other.value() + + def __add__(self, other: "GPUMemoryUsed") -> "GPUMemoryUsed": + return GPUMemoryUsed(device_uuid=None, value=(self.value() + other.value())) + + def __sub__(self, other: "GPUMemoryUsed") -> "GPUMemoryUsed": + return GPUMemoryUsed(device_uuid=None, value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/gpu_power_limit.py b/genai-perf/genai_perf/record/types/gpu_power_limit.py new file mode 100644 index 00000000..7e5cce2a --- /dev/null +++ b/genai-perf/genai_perf/record/types/gpu_power_limit.py @@ -0,0 +1,52 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.gpu_record import IncreasingGPURecord + + +@total_ordering +class GPUPowerLimit(IncreasingGPURecord): + """ + GPU's power limit metric + """ + + tag = "gpu_power_limit" + + def __init__(self, value, device_uuid=None, timestamp=0): + super().__init__(value, device_uuid, timestamp) + + @staticmethod + def aggregation_function(): + def average(seq): + return sum(seq[1:], start=seq[0]) / len(seq) + + return average + + @staticmethod + def header(aggregation_tag=False): + return ("Average " if aggregation_tag else "") + "GPU Power Limit (W)" + + def __eq__(self, other: "GPUPowerLimit") -> bool: # type: ignore + return self.value() == other.value() + + def __lt__(self, other: "GPUPowerLimit") -> bool: + return self.value() < other.value() + + def __add__(self, other: "GPUPowerLimit") -> "GPUPowerLimit": + return GPUPowerLimit(device_uuid=None, value=(self.value() + other.value())) + + def __sub__(self, other: "GPUPowerLimit") -> "GPUPowerLimit": + return GPUPowerLimit(device_uuid=None, value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/gpu_power_usage.py b/genai-perf/genai_perf/record/types/gpu_power_usage.py index 5f1fc49d..31ec2ac2 100644 --- a/genai-perf/genai_perf/record/types/gpu_power_usage.py +++ b/genai-perf/genai_perf/record/types/gpu_power_usage.py @@ -20,33 +20,16 @@ @total_ordering class GPUPowerUsage(DecreasingGPURecord): """ - GPU Power Usage + GPU's power usage metric """ tag = "gpu_power_usage" def __init__(self, value, device_uuid=None, timestamp=0): - """ - Parameters - ---------- - value : float - The value of the GPU metrtic - device_uuid : str - The GPU device uuid this metric is associated - with. - timestamp : int - The timestamp for the record in nanoseconds - """ - super().__init__(value, device_uuid, timestamp) @staticmethod def aggregation_function(): - """ - The function that is used to aggregate - this type of record - """ - def average(seq): return sum(seq[1:], start=seq[0]) / len(seq) @@ -54,52 +37,16 @@ def average(seq): @staticmethod def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed as part of the header - indicating that this record has been aggregated using max, min or - average etc. - - Returns - ------- - str - The full name of the - metric. - """ - return ("Average " if aggregation_tag else "") + "GPU Power Usage (W)" - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - + def __eq__(self, other: "GPUPowerUsage") -> bool: # type: ignore return self.value() == other.value() - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - + def __lt__(self, other: "GPUPowerUsage") -> bool: return other.value() < self.value() - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - + def __add__(self, other: "GPUPowerUsage") -> "GPUPowerUsage": return GPUPowerUsage(device_uuid=None, value=(self.value() + other.value())) - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - + def __sub__(self, other: "GPUPowerUsage") -> "GPUPowerUsage": return GPUPowerUsage(device_uuid=None, value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/gpu_total_memory.py b/genai-perf/genai_perf/record/types/gpu_total_memory.py index 45fdc300..fd43030b 100644 --- a/genai-perf/genai_perf/record/types/gpu_total_memory.py +++ b/genai-perf/genai_perf/record/types/gpu_total_memory.py @@ -20,75 +20,26 @@ @total_ordering class GPUTotalMemory(IncreasingGPURecord): """ - The total memory in the GPU. + GPU's total memory metric """ - tag = "gpu_total_memory" + tag = "total_gpu_memory" def __init__(self, value, device_uuid=None, timestamp=0): - """ - Parameters - ---------- - value : float - The value of the GPU metrtic - device_uuid : str - The GPU device uuid this metric is associated - with. - timestamp : int - The timestamp for the record in nanoseconds - """ - super().__init__(value, device_uuid, timestamp) @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return ("Max " if aggregation_tag else "") + "GPU Memory Available (MB)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ + def header(aggregation_tag=False) -> str: + return ("Max " if aggregation_tag else "") + "GPU Memory Available (GB)" + def __eq__(self, other: "GPUTotalMemory") -> bool: # type: ignore return self.value() == other.value() - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - + def __lt__(self, other: "GPUTotalMemory") -> bool: return self.value() < other.value() - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - + def __add__(self, other: "GPUTotalMemory") -> "GPUTotalMemory": return GPUTotalMemory(device_uuid=None, value=(self.value() + other.value())) - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - + def __sub__(self, other: "GPUTotalMemory") -> "GPUTotalMemory": return GPUTotalMemory(device_uuid=None, value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/gpu_used_memory.py b/genai-perf/genai_perf/record/types/gpu_used_memory.py deleted file mode 100644 index c4b1cc66..00000000 --- a/genai-perf/genai_perf/record/types/gpu_used_memory.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.gpu_record import DecreasingGPURecord - - -@total_ordering -class GPUUsedMemory(DecreasingGPURecord): - """ - The used memory in the GPU. - """ - - tag = "gpu_used_memory" - - def __init__(self, value, device_uuid=None, timestamp=0): - """ - Parameters - ---------- - value : float - The value of the GPU metrtic - device_uuid : str - The GPU device uuid this metric is associated - with. - timestamp : int - The timestamp for the record in nanoseconds - """ - - super().__init__(value, device_uuid, timestamp) - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return ("Max " if aggregation_tag else "") + "GPU Memory Usage (MB)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return GPUUsedMemory(device_uuid=None, value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - - return GPUUsedMemory(device_uuid=None, value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/gpu_utilization.py b/genai-perf/genai_perf/record/types/gpu_utilization.py index 67a71f64..0c6e6f65 100644 --- a/genai-perf/genai_perf/record/types/gpu_utilization.py +++ b/genai-perf/genai_perf/record/types/gpu_utilization.py @@ -20,33 +20,16 @@ @total_ordering class GPUUtilization(IncreasingGPURecord): """ - GPU utilization record + GPU's utilization percentage """ tag = "gpu_utilization" def __init__(self, value, device_uuid=None, timestamp=0): - """ - Parameters - ---------- - value : float - The value of the GPU metrtic - device_uuid : str - The GPU device uuid this metric is associated - with. - timestamp : int - The timestamp for the record in nanoseconds - """ - super().__init__(value, device_uuid, timestamp) @staticmethod def aggregation_function(): - """ - The function that is used to aggregate - this type of record - """ - def average(seq): return sum(seq[1:], start=seq[0]) / len(seq) @@ -54,53 +37,16 @@ def average(seq): @staticmethod def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - return ("Average " if aggregation_tag else "") + "GPU Utilization (%)" - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - + def __eq__(self, other: "GPUUtilization") -> bool: # type: ignore return self.value() == other.value() - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - + def __lt__(self, other: "GPUUtilization") -> bool: return self.value() < other.value() - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - + def __add__(self, other: "GPUUtilization") -> "GPUUtilization": return GPUUtilization(device_uuid=None, value=(self.value() + other.value())) - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - + def __sub__(self, other: "GPUUtilization") -> "GPUUtilization": return GPUUtilization(device_uuid=None, value=(self.value() - other.value())) diff --git a/genai-perf/tests/test_record.py b/genai-perf/tests/test_record.py index 90a8bc2a..ba4d0a8b 100644 --- a/genai-perf/tests/test_record.py +++ b/genai-perf/tests/test_record.py @@ -75,9 +75,8 @@ def setUp(self): "time_to_first_token_p90", "time_to_first_token_p95", "time_to_first_token_p99", - "gpu_used_memory", - "cpu_used_ram", "gpu_power_usage", + "energy_consumption", ] } @@ -116,10 +115,10 @@ def setUp(self): "input_sequence_length_p90", "input_sequence_length_p95", "input_sequence_length_p99", - "gpu_free_memory", + "gpu_power_limit", "gpu_utilization", - "cpu_available_ram", - "gpu_total_memory", + "total_gpu_memory", + "gpu_memory_used", ] } From 58f0dd07817c9246ceeacb51824114e1221dc9c3 Mon Sep 17 00:00:00 2001 From: braf Date: Tue, 5 Nov 2024 15:10:24 +0000 Subject: [PATCH 11/16] Fixing unit testing --- .../measurements/model_config_measurement.py | 3 +- ...hput.py => output_token_throughput_avg.py} | 33 ++---- .../record/types/perf_throughput.py | 103 ------------------ .../record/types/request_throughput_avg.py | 49 +++++++++ .../tests/test_model_config_measurement.py | 24 ++-- genai-perf/tests/test_record.py | 6 +- .../tests/test_run_config_measurement.py | 34 +++--- genai-perf/tests/test_utils.py | 16 ++- 8 files changed, 101 insertions(+), 167 deletions(-) rename genai-perf/genai_perf/record/types/{output_token_throughput.py => output_token_throughput_avg.py} (60%) delete mode 100644 genai-perf/genai_perf/record/types/perf_throughput.py create mode 100644 genai-perf/genai_perf/record/types/request_throughput_avg.py diff --git a/genai-perf/genai_perf/measurements/model_config_measurement.py b/genai-perf/genai_perf/measurements/model_config_measurement.py index aecedf0f..7fb6d75f 100644 --- a/genai-perf/genai_perf/measurements/model_config_measurement.py +++ b/genai-perf/genai_perf/measurements/model_config_measurement.py @@ -20,6 +20,7 @@ from typing import Any, Dict, Optional, TypeAlias from genai_perf.record.record import Record +from genai_perf.record.types.request_throughput_avg import RequestThroughputAvg from genai_perf.types import ( CheckpointObject, MetricObjectives, @@ -39,7 +40,7 @@ ########################################################################### @dataclass(frozen=True) class ModelConfigMeasurementDefaults: - METRIC_OBJECTIVE = {"perf_throughput": 1.0} + METRIC_OBJECTIVE = {RequestThroughputAvg.tag: 1.0} SELF_IS_BETTER = 1 OTHER_IS_BETTER = -1 diff --git a/genai-perf/genai_perf/record/types/output_token_throughput.py b/genai-perf/genai_perf/record/types/output_token_throughput_avg.py similarity index 60% rename from genai-perf/genai_perf/record/types/output_token_throughput.py rename to genai-perf/genai_perf/record/types/output_token_throughput_avg.py index d773da85..c8b83960 100644 --- a/genai-perf/genai_perf/record/types/output_token_throughput.py +++ b/genai-perf/genai_perf/record/types/output_token_throughput_avg.py @@ -19,49 +19,32 @@ @total_ordering -class OutputTokenThroughput(IncreasingRecord): +class OutputTokenThroughputAvg(IncreasingRecord): """ - A record for Output token throughput + A record for avg output token throughput metric """ - tag = "output_token_throughput" + tag = "output_token_throughput_avg" def __init__(self, value: RecordValue, timestamp: int = 0) -> None: super().__init__(value, timestamp) @staticmethod def value_function(): - """ - Returns the total value from a list - - Returns - ------- - Total value of the list - """ return sum @staticmethod def header(aggregation_tag=False) -> str: - return "Output Token Throughput (tokens/sec)" + return "Avg. Output Token Throughput (tokens/sec)" - def __eq__(self, other: "OutputTokenThroughput") -> bool: # type: ignore + def __eq__(self, other: "OutputTokenThroughputAvg") -> bool: # type: ignore return self.value() == other.value() - def __lt__(self, other: "OutputTokenThroughput") -> bool: + def __lt__(self, other: "OutputTokenThroughputAvg") -> bool: return self.value() < other.value() - def __add__(self, other: "OutputTokenThroughput") -> "OutputTokenThroughput": - """ - Allows adding two records together - to produce a brand new record. - """ - + def __add__(self, other: "OutputTokenThroughputAvg") -> "OutputTokenThroughputAvg": return self.__class__(value=(self.value() + other.value())) - def __sub__(self, other: "OutputTokenThroughput") -> "OutputTokenThroughput": - """ - Allows subtracting two records together - to produce a brand new record. - """ - + def __sub__(self, other: "OutputTokenThroughputAvg") -> "OutputTokenThroughputAvg": return self.__class__(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/perf_throughput.py b/genai-perf/genai_perf/record/types/perf_throughput.py deleted file mode 100644 index 4d52d342..00000000 --- a/genai-perf/genai_perf/record/types/perf_throughput.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import total_ordering - -from genai_perf.record.record import IncreasingRecord - - -@total_ordering -class PerfThroughput(IncreasingRecord): - """ - A record for perf_analyzer - metric 'Throughput' - """ - - tag = "perf_throughput" - - def __init__(self, value, timestamp=0): - """ - Parameters - ---------- - value : float - The throughput from the perf analyzer output - timestamp : float - Elapsed time from start of program - """ - - super().__init__(value, timestamp) - - @staticmethod - def value_function(): - """ - Returns the total value from a list - - Returns - ------- - Total value of the list - """ - return sum - - @staticmethod - def header(aggregation_tag=False): - """ - Parameters - ---------- - aggregation_tag: bool - An optional tag that may be displayed - as part of the header indicating that - this record has been aggregated using - max, min or average etc. - - Returns - ------- - str - The full name of the - metric. - """ - - return "Throughput (infer/sec)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() < other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return PerfThroughput(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subtracting two records together - to produce a brand new record. - """ - - return PerfThroughput(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/request_throughput_avg.py b/genai-perf/genai_perf/record/types/request_throughput_avg.py new file mode 100644 index 00000000..96caf8f3 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_throughput_avg.py @@ -0,0 +1,49 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import IncreasingRecord + + +@total_ordering +class RequestThroughputAvg(IncreasingRecord): + """ + A record avg request throughput metric + """ + + tag = "request_throughput_avg" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @staticmethod + def value_function(): + return sum + + @staticmethod + def header(aggregation_tag=False) -> str: + return "Throughput (requests/sec)" + + def __eq__(self, other: "RequestThroughputAvg") -> bool: # type: ignore + return self.value() == other.value() + + def __lt__(self, other: "RequestThroughputAvg") -> bool: + return self.value() < other.value() + + def __add__(self, other: "RequestThroughputAvg") -> "RequestThroughputAvg": + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other: "RequestThroughputAvg") -> "RequestThroughputAvg": + return self.__class__(value=(self.value() - other.value())) diff --git a/genai-perf/tests/test_model_config_measurement.py b/genai-perf/tests/test_model_config_measurement.py index ee73e77f..ba6fd27f 100644 --- a/genai-perf/tests/test_model_config_measurement.py +++ b/genai-perf/tests/test_model_config_measurement.py @@ -21,8 +21,8 @@ ModelConfigMeasurement, ModelConfigMeasurementDefaults, ) -from genai_perf.record.types.perf_throughput import PerfThroughput from genai_perf.record.types.request_latency_p99 import RequestLatencyP99 +from genai_perf.record.types.request_throughput_avg import RequestThroughputAvg from genai_perf.record.types.time_to_first_token_avg import TimeToFirstTokenAvg @@ -32,21 +32,21 @@ class TestModelConfigMeasurement(unittest.TestCase): ########################################################################### def setUp(self): - self.throughput_recordA = PerfThroughput(1000) + self.throughput_recordA = RequestThroughputAvg(1000) self.latency_recordA = RequestLatencyP99(20) self.perf_metricsA = { - PerfThroughput.tag: self.throughput_recordA, + RequestThroughputAvg.tag: self.throughput_recordA, RequestLatencyP99.tag: self.latency_recordA, } self.mcmA = ModelConfigMeasurement(self.perf_metricsA) - self.throughput_recordB = PerfThroughput(500) + self.throughput_recordB = RequestThroughputAvg(500) self.latency_recordB = RequestLatencyP99(10) self.perf_metricsB = { - PerfThroughput.tag: self.throughput_recordB, + RequestThroughputAvg.tag: self.throughput_recordB, RequestLatencyP99.tag: self.latency_recordB, } @@ -67,7 +67,7 @@ def test_basic_accessor_methods(self): self.mcmA.get_perf_metric(RequestLatencyP99.tag), self.latency_recordA ) self.assertEqual( - self.mcmA.get_perf_metric_value(PerfThroughput.tag, return_value=-1), + self.mcmA.get_perf_metric_value(RequestThroughputAvg.tag, return_value=-1), self.throughput_recordA.value(), ) self.assertEqual( @@ -86,9 +86,9 @@ def test_set_metric_objective(self): ) self.mcmA.set_metric_objectives( - {PerfThroughput.tag: 2, RequestLatencyP99.tag: 3} + {RequestThroughputAvg.tag: 2, RequestLatencyP99.tag: 3} ) - expected_mw = {PerfThroughput.tag: 2 / 5, RequestLatencyP99.tag: 3 / 5} + expected_mw = {RequestThroughputAvg.tag: 2 / 5, RequestLatencyP99.tag: 3 / 5} self.assertEqual(expected_mw, self.mcmA._metric_objectives) def test_get_weighted_score(self): @@ -160,10 +160,10 @@ def test_calculate_weighted_percentage_gain(self): # mcmA has 50% worse throughput, but 100% better latency # mcmB has 100% better latency, but 50% worse throughput self.mcmA.set_metric_objectives( - {PerfThroughput.tag: 1, RequestLatencyP99.tag: 1} + {RequestThroughputAvg.tag: 1, RequestLatencyP99.tag: 1} ) self.mcmB.set_metric_objectives( - {PerfThroughput.tag: 1, RequestLatencyP99.tag: 1} + {RequestThroughputAvg.tag: 1, RequestLatencyP99.tag: 1} ) self.assertEqual(self.mcmA, self.mcmB) self.assertEqual(self.mcmA.calculate_weighted_percentage_gain(self.mcmB), 25) @@ -176,7 +176,7 @@ def test_is_better_than(self): """ Test that individual metric comparison works as expected """ - self.mcmA.set_metric_objectives({PerfThroughput.tag: 1}) + self.mcmA.set_metric_objectives({RequestThroughputAvg.tag: 1}) # throughput: 1000 is better than 500 self.assertTrue(self.mcmA.is_better_than(self.mcmB)) @@ -195,7 +195,7 @@ def test_is_better_than_combo(self): # throuhput: 2000 vs. 1000 (better), latency: 20 vs. 10 (worse) # with latency bias mcmB is better self.mcmA.set_metric_objectives( - {PerfThroughput.tag: 1, RequestLatencyP99.tag: 3} + {RequestThroughputAvg.tag: 1, RequestLatencyP99.tag: 3} ) self.assertFalse(self.mcmA.is_better_than(self.mcmB)) diff --git a/genai-perf/tests/test_record.py b/genai-perf/tests/test_record.py index ba4d0a8b..62bf0215 100644 --- a/genai-perf/tests/test_record.py +++ b/genai-perf/tests/test_record.py @@ -83,8 +83,8 @@ def setUp(self): self.more_is_better_types = { record_types[t] for t in [ - "perf_throughput", - "output_token_throughput", + "request_throughput_avg", + "output_token_throughput_avg", "output_token_throughput_per_request_min", "output_token_throughput_per_request_max", "output_token_throughput_per_request_avg", @@ -232,7 +232,7 @@ def test_value(self): ].value_function()([10, 50, 100, 40]) total_value = RecordType.get_all_record_types()[ - "perf_throughput" + "request_throughput_avg" ].value_function()([10, 50, 100, 40]) self.assertEqual(avg_value, 50) diff --git a/genai-perf/tests/test_run_config_measurement.py b/genai-perf/tests/test_run_config_measurement.py index 94a3c810..9f88c5b7 100644 --- a/genai-perf/tests/test_run_config_measurement.py +++ b/genai-perf/tests/test_run_config_measurement.py @@ -23,8 +23,8 @@ from genai_perf.measurements.run_constraints import RunConstraints from genai_perf.record.types.gpu_power_usage import GPUPowerUsage from genai_perf.record.types.gpu_utilization import GPUUtilization -from genai_perf.record.types.perf_throughput import PerfThroughput from genai_perf.record.types.request_latency_p99 import RequestLatencyP99 +from genai_perf.record.types.request_throughput_avg import RequestThroughputAvg class TestRunConfigMeasurement(unittest.TestCase): @@ -78,39 +78,39 @@ def _create_gpu_metrics(self) -> None: def _create_perf_metrics(self) -> None: # # Record A - self.throughput_recordA = PerfThroughput(1000) + self.throughput_recordA = RequestThroughputAvg(1000) self.latency_recordA = RequestLatencyP99(40) self.perf_metricsA = { - PerfThroughput.tag: self.throughput_recordA, + RequestThroughputAvg.tag: self.throughput_recordA, RequestLatencyP99.tag: self.latency_recordA, } # # Record B - self.throughput_recordB = PerfThroughput(500) + self.throughput_recordB = RequestThroughputAvg(500) self.latency_recordB = RequestLatencyP99(30) self.perf_metricsB = { - PerfThroughput.tag: self.throughput_recordB, + RequestThroughputAvg.tag: self.throughput_recordB, RequestLatencyP99.tag: self.latency_recordB, } # # Record MM - self.throughput_recordMM_0 = PerfThroughput(1000) + self.throughput_recordMM_0 = RequestThroughputAvg(1000) self.latency_recordMM_0 = RequestLatencyP99(20) - self.throughput_recordMM_1 = PerfThroughput(2000) + self.throughput_recordMM_1 = RequestThroughputAvg(2000) self.latency_recordMM_1 = RequestLatencyP99(30) self.perf_metricsMM_0 = { - PerfThroughput.tag: self.throughput_recordMM_0, + RequestThroughputAvg.tag: self.throughput_recordMM_0, RequestLatencyP99.tag: self.latency_recordMM_0, } self.perf_metricsMM_1 = { - PerfThroughput.tag: self.throughput_recordMM_1, + RequestThroughputAvg.tag: self.throughput_recordMM_1, RequestLatencyP99.tag: self.latency_recordMM_1, } @@ -170,12 +170,12 @@ def test_basic_accessor_methods(self): self.assertEqual(expected_all_perf_metrics_dict, rcmA.get_all_perf_metrics()) self.assertEqual(self.perf_metricsA, rcmA.get_model_perf_metrics("test_model")) self.assertEqual( - self.perf_metricsA[PerfThroughput.tag], - rcmA.get_model_perf_metric("test_model", PerfThroughput.tag), + self.perf_metricsA[RequestThroughputAvg.tag], + rcmA.get_model_perf_metric("test_model", RequestThroughputAvg.tag), ) self.assertEqual( - self.perf_metricsA[PerfThroughput.tag].value(), - rcmA.get_model_perf_metric_value("test_model", PerfThroughput.tag), + self.perf_metricsA[RequestThroughputAvg.tag].value(), + rcmA.get_model_perf_metric_value("test_model", RequestThroughputAvg.tag), ) self.assertEqual( 10, @@ -191,13 +191,13 @@ def test_basic_accessor_methods(self): rcmMM.set_model_weighting(model_weights) expected_weighted_perf_metric_values = { - "modelMM_0": self.perf_metricsMM_0[PerfThroughput.tag].value() * 0.8, - "modelMM_1": self.perf_metricsMM_1[PerfThroughput.tag].value() * 0.2, + "modelMM_0": self.perf_metricsMM_0[RequestThroughputAvg.tag].value() * 0.8, + "modelMM_1": self.perf_metricsMM_1[RequestThroughputAvg.tag].value() * 0.2, } self.assertEqual( expected_weighted_perf_metric_values, - rcmMM.get_weighted_perf_metric_values(PerfThroughput.tag), + rcmMM.get_weighted_perf_metric_values(RequestThroughputAvg.tag), ) def test_set_gpu_metric_objectives(self): @@ -259,7 +259,7 @@ def test_is_better_than_perf_metric(self): # Changing the metric objectives to bias latency # this tips the scale in the favor of RCMB latency_bias_objectives = { - "test_model": {PerfThroughput.tag: 1, RequestLatencyP99.tag: 4} + "test_model": {RequestThroughputAvg.tag: 1, RequestLatencyP99.tag: 4} } rcmA.set_perf_metric_objectives(latency_bias_objectives) rcmB.set_perf_metric_objectives(latency_bias_objectives) diff --git a/genai-perf/tests/test_utils.py b/genai-perf/tests/test_utils.py index 5caa702e..4278408b 100644 --- a/genai-perf/tests/test_utils.py +++ b/genai-perf/tests/test_utils.py @@ -35,10 +35,10 @@ from genai_perf.metrics.statistics import Statistics from genai_perf.record.types.gpu_power_usage import GPUPowerUsage from genai_perf.record.types.gpu_utilization import GPUUtilization -from genai_perf.record.types.input_sequence_length import InputSequenceLength -from genai_perf.record.types.output_sequence_length import OutputSequenceLength -from genai_perf.record.types.perf_throughput import PerfThroughput +from genai_perf.record.types.input_sequence_length_p99 import InputSequenceLengthP99 +from genai_perf.record.types.output_sequence_length_p99 import OutputSequenceLengthP99 from genai_perf.record.types.request_latency_p99 import RequestLatencyP99 +from genai_perf.record.types.request_throughput_avg import RequestThroughputAvg from genai_perf.types import GpuId, ModelObjectiveParameters, PerfRecords @@ -67,13 +67,17 @@ def create_perf_metrics( ) -> PerfRecords: perf_metrics: PerfRecords = {} if throughput: - perf_metrics[PerfThroughput.tag] = PerfThroughput(throughput) + perf_metrics[RequestThroughputAvg.tag] = RequestThroughputAvg(throughput) if latency: perf_metrics[RequestLatencyP99.tag] = RequestLatencyP99(latency) if input_seq_length: - perf_metrics[InputSequenceLength.tag] = InputSequenceLength(input_seq_length) + perf_metrics[InputSequenceLengthP99.tag] = InputSequenceLengthP99( + input_seq_length + ) if output_seq_length: - perf_metrics[OutputSequenceLength.tag] = OutputSequenceLength(output_seq_length) + perf_metrics[OutputSequenceLengthP99.tag] = OutputSequenceLengthP99( + output_seq_length + ) return perf_metrics From da5d4768da24e1f3fb4a8d23454a930d59c199cc Mon Sep 17 00:00:00 2001 From: braf Date: Tue, 5 Nov 2024 15:24:08 +0000 Subject: [PATCH 12/16] Adding request goodput record --- .../record/types/request_goodput_avg.py | 49 +++++++++++++++++++ .../record/types/request_throughput_avg.py | 2 +- genai-perf/tests/test_record.py | 1 + 3 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 genai-perf/genai_perf/record/types/request_goodput_avg.py diff --git a/genai-perf/genai_perf/record/types/request_goodput_avg.py b/genai-perf/genai_perf/record/types/request_goodput_avg.py new file mode 100644 index 00000000..3d9c2394 --- /dev/null +++ b/genai-perf/genai_perf/record/types/request_goodput_avg.py @@ -0,0 +1,49 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import IncreasingRecord + + +@total_ordering +class RequestGoodputAvg(IncreasingRecord): + """ + A record avg request goodput metric + """ + + tag = "request_goodput_avg" + + def __init__(self, value, timestamp=0): + super().__init__(value, timestamp) + + @staticmethod + def value_function(): + return sum + + @staticmethod + def header(aggregation_tag=False) -> str: + return "Request Goodput (requests/sec)" + + def __eq__(self, other: "RequestGoodputAvg") -> bool: # type: ignore + return self.value() == other.value() + + def __lt__(self, other: "RequestGoodputAvg") -> bool: + return self.value() < other.value() + + def __add__(self, other: "RequestGoodputAvg") -> "RequestGoodputAvg": + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other: "RequestGoodputAvg") -> "RequestGoodputAvg": + return self.__class__(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/request_throughput_avg.py b/genai-perf/genai_perf/record/types/request_throughput_avg.py index 96caf8f3..15b8b453 100644 --- a/genai-perf/genai_perf/record/types/request_throughput_avg.py +++ b/genai-perf/genai_perf/record/types/request_throughput_avg.py @@ -34,7 +34,7 @@ def value_function(): @staticmethod def header(aggregation_tag=False) -> str: - return "Throughput (requests/sec)" + return "Request Throughput (requests/sec)" def __eq__(self, other: "RequestThroughputAvg") -> bool: # type: ignore return self.value() == other.value() diff --git a/genai-perf/tests/test_record.py b/genai-perf/tests/test_record.py index 62bf0215..baf493a0 100644 --- a/genai-perf/tests/test_record.py +++ b/genai-perf/tests/test_record.py @@ -84,6 +84,7 @@ def setUp(self): record_types[t] for t in [ "request_throughput_avg", + "request_goodput_avg", "output_token_throughput_avg", "output_token_throughput_per_request_min", "output_token_throughput_per_request_max", From f6176c7eb12c9cc4f0ec45b1d7873e17d104c197 Mon Sep 17 00:00:00 2001 From: braf Date: Tue, 5 Nov 2024 19:53:38 +0000 Subject: [PATCH 13/16] Adding method to create records from statistics --- genai-perf/genai_perf/metrics/statistics.py | 25 +++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/genai-perf/genai_perf/metrics/statistics.py b/genai-perf/genai_perf/metrics/statistics.py index 71f7c3ea..a7c03247 100755 --- a/genai-perf/genai_perf/metrics/statistics.py +++ b/genai-perf/genai_perf/metrics/statistics.py @@ -32,8 +32,10 @@ import numpy as np import pandas as pd +from genai_perf.exceptions import GenAIPerfException from genai_perf.metrics.metrics import Metrics from genai_perf.metrics.telemetry_metrics import TelemetryMetrics +from genai_perf.record.record import Record, RecordType class Statistics: @@ -192,3 +194,26 @@ def export_parquet(self, artifact_dir: Path, filename: str) -> None: filepath = artifact_dir / f"{filename}.gzip" df.to_parquet(filepath, compression="gzip") + + def create_records(self) -> List[Record]: + """ + Populates and returns a list of Records + """ + statistic_records = [] + for metric_base_name, metric_info in self.stats_dict.items(): + for metric_post_name, metric_value in metric_info.items(): + if metric_post_name == "unit": + continue + + metric_name = metric_base_name + "_" + metric_post_name + + try: + new_record = RecordType.get_all_record_types()[metric_name]( + metric_value + ) + except KeyError: + GenAIPerfException(f"{metric_name} is not a valid Record tag.") + + statistic_records.append(new_record) + + return statistic_records From e1049ce0d69518403d401a60e3a85d51acc6f45b Mon Sep 17 00:00:00 2001 From: braf Date: Tue, 5 Nov 2024 21:45:43 +0000 Subject: [PATCH 14/16] Added very basic unit testing --- genai-perf/tests/test_llm_profile_data_parser.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/genai-perf/tests/test_llm_profile_data_parser.py b/genai-perf/tests/test_llm_profile_data_parser.py index d3f213cb..01b03b0d 100644 --- a/genai-perf/tests/test_llm_profile_data_parser.py +++ b/genai-perf/tests/test_llm_profile_data_parser.py @@ -32,6 +32,7 @@ from genai_perf.metrics import LLMMetrics from genai_perf.metrics.statistics import Statistics from genai_perf.profile_data_parser import LLMProfileDataParser +from genai_perf.record.types.request_throughput_avg import RequestThroughputAvg from genai_perf.tokenizer import DEFAULT_TOKENIZER, get_tokenizer from .test_utils import check_statistics, ns_to_sec @@ -216,6 +217,11 @@ def test_triton_llm_profile_data( check_llm_metrics(metrics, expected_metrics) check_statistics(statistics, expected_statistics) + # Check that Records can be created + records = statistics.create_records() + assert records is not None + assert records[0].tag == RequestThroughputAvg.tag + # check non-existing profile data with pytest.raises(KeyError): pd.get_statistics(infer_mode="concurrency", load_level="30") From 79248fa08964570cd0d94c7455a30cc6510ef00d Mon Sep 17 00:00:00 2001 From: braf Date: Wed, 6 Nov 2024 16:22:17 +0000 Subject: [PATCH 15/16] Remove demo file (accidental commit) --- genai-perf/genai_perf/demo_for_visualize.py | 132 -------------------- 1 file changed, 132 deletions(-) delete mode 100644 genai-perf/genai_perf/demo_for_visualize.py diff --git a/genai-perf/genai_perf/demo_for_visualize.py b/genai-perf/genai_perf/demo_for_visualize.py deleted file mode 100644 index 21e317bc..00000000 --- a/genai-perf/genai_perf/demo_for_visualize.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import random -import sys - -from genai_perf.config.generate.search_parameters import SearchParameters -from genai_perf.config.generate.sweep_objective_generator import SweepObjectiveGenerator -from genai_perf.config.input.config_command import ConfigCommand -from genai_perf.config.run.results import Results -from genai_perf.config.run.run_config import RunConfig -from genai_perf.measurements.model_constraints import ModelConstraints -from genai_perf.measurements.run_constraints import RunConstraints -from genai_perf.record.types.input_sequence_length import InputSequenceLength -from genai_perf.record.types.perf_latency_p99 import PerfLatencyP99 -from genai_perf.record.types.perf_throughput import PerfThroughput -from genai_perf.types import ModelSearchParameters -from tests.test_utils import create_run_config - - -def print_run_config(run_config: RunConfig) -> None: - throughput = run_config.get_model_perf_metric_value( - "test_model", PerfThroughput.tag - ) - latency = run_config.get_model_perf_metric_value("test_model", PerfLatencyP99.tag) - isl = run_config.get_model_perf_metric_value("test_model", InputSequenceLength.tag) - pa_parameters = run_config.perf_analyzer_config.get_parameters() - concurrency = pa_parameters["concurrency"] - - print( - f"\t{run_config.name} \t concurrency: {concurrency} \t ISL: {isl} \t throughput: {throughput} \t latency: {latency}" - ) - - -def main(): - random.seed(10) - - # This is a demonstration of how sweep/analyze would run in - # GenAI-Perf and how the output (Results class) can be used - # by visualization - - # We don't have a new Config/CLI interface yet, so for now I've created - # a dataclass that allows you to set values. For this first example, - # we will use all default values (these will not be our actual defaults - # in product) - config = ConfigCommand(model_names=["test_model"]) - - # In this next section we will determine what the search space is - # by default right now this sweeps over concurrency from 1 to 1024 (by powers of 2) - model_search_parameters = {"test_model": SearchParameters(config.analyze)} - - # Now we instance the Sweep Objective Generator which will create GenAI-Perf & PA - # configs based on the user config and the model's search parameters - sweep_objective_generator = SweepObjectiveGenerator(config, model_search_parameters) - - # Next we iterate through the generator - in the real world we would call PA - # to find the metrics for each config profiled. For this example I will use a - # test utility created to generate metrics - - # Each profile (or iteration) creates a RunConfig instance and the list of these - # are stored in the Results class - results = Results() - for count, objective in enumerate(sweep_objective_generator.get_objectives()): - - # A RunConfig consists of a unique name, the GenAI-Perf config, the PA config - # and GPU + Performance metrics. This test utility uses the information provided - # here to create this - run_config = create_run_config( - # These values are used to set the GAP/PA config - run_config_name="test_model_run_config_" + str(count), - model_objective_parameters=objective, - config=config, - # Telemetry metrics - gpu_power=random.randint(400, 500), - gpu_utilization=random.randint(1, 100), - # Performance metrics - throughput=random.randint(100, 300), - latency=random.randint(50, 100), - input_seq_length=random.randint(20, 80), - output_seq_length=random.randint(30, 60), - ) - - # Now we add the RunConfig to the Results class - results.add_run_config(run_config) - - # At this point Analyze would be complete and the Results would be saved to a checkpoint file/ - # When visualize is called the checkpoint file would be read and the Results class would be - # restored. I am omitting these steps as they are not relevant to the visualize work and - # you can assume that when visualize is called the Results class will be passed in - - # Now I will demonstrate how Results and RunConfig can be utilized via the APIs - - # Results is a list of RunConfigs sorted by objective - for my "fake" config I've - # set the default to be throughput. Results is always sorted based on objective with - # the first entry being the best - print("\nExample 1 - Objective is highest throughput:") - for run_config in results.run_configs: - print_run_config(run_config) - - # Now lets change the objective to latency - results.set_perf_metric_objectives({"test_model": {PerfLatencyP99.tag: 1}}) - - print("\nExample 2 - Objective is lowest latency:") - for run_config in results.run_configs: - print_run_config(run_config) - - # Now lets set the objective back to throughput, but place a constraint that latency has to - # be below a certain value - results.set_perf_metric_objectives({"test_model": {PerfThroughput.tag: 1}}) - - model_constraints = ModelConstraints({PerfLatencyP99.tag: 70}) - run_constraints = RunConstraints({"test_model": model_constraints}) - results.set_constraints(run_constraints) - - print("\nExample 3 - Objective is throughput w/ a latency constraint of 70 ms:") - for run_config in results.get_results_passing_constraints().run_configs: - print_run_config(run_config) - - -if __name__ == "__main__": - sys.exit(main()) From 6a8ae32968a1b61b9dde1301fd4960b5444f8bc0 Mon Sep 17 00:00:00 2001 From: braf Date: Wed, 6 Nov 2024 18:00:54 +0000 Subject: [PATCH 16/16] Fix codeql error --- genai-perf/genai_perf/metrics/statistics.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/genai-perf/genai_perf/metrics/statistics.py b/genai-perf/genai_perf/metrics/statistics.py index a7c03247..c2e4ffb6 100755 --- a/genai-perf/genai_perf/metrics/statistics.py +++ b/genai-perf/genai_perf/metrics/statistics.py @@ -212,7 +212,9 @@ def create_records(self) -> List[Record]: metric_value ) except KeyError: - GenAIPerfException(f"{metric_name} is not a valid Record tag.") + raise GenAIPerfException( + f"{metric_name} is not a valid Record tag." + ) statistic_records.append(new_record)