-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Dev] Improve benchmark scripts (#99)
* Refactor BatchMatMulEmitter and BatchMatMulSelector for improved readability and maintainability * Refactor import statements for improved readability and maintainability * Refactor import statements for improved readability and maintainability * disable failure email for ci * remove email notifications. * move relax pass from testing to mlc_llm * Refactor scripts with se check_eual_ref_scripts_with_emitter function * Lint Fix * Refactor scripts with se check_eual_ref_scripts_with_emitter function * bug fix in test * lint fix. * test cuda i4 kernel * Refactor copyright notice in i4matmul.hpp * Refactor BitBLASLinear test module for improved readability and maintainability * refactor test as version below python 3.9 cannot handle int32 overflow. * format lint for test * Refactor test_int4b_fp16_convert.py for improved readability and maintainability * remove unused design file * move tile device from package to base * dummy impl for codegen * Refactor file structure for ladder_permutate module * Refactor backend class and fix typos in comments * Deep refactor Lib related code. * remove ci pull. * LintFix * refactor builder for whl build * Refactor TIRWrapper.wrap() method to include an assertion for the optimized module * Refactor lib_generator to set library and source paths * lint fix * BitNet vllm integration * chore: update codespell to version 2.3.0 * Lintfix * Bump version to 0.0.1.dev13 * lint fix * disable fast decoding [u]int4xint8 by default. * optimize from dict design in Hint * Implement SplitK * bitnet benchmark generation. * Add benchmark script for BitNet integration * AtomicAdd Support * LintFix * ci fix when 3rdparty tvm is initialized. * bug fix for setup * fix a bug in block reduce * typo fix * BUG Fix for block reduce. * Lint fix * Refactor block reduce schedule template * transform branch from bitblas to bitblas_tl * Fix subproject commit reference in 3rdparty/tvm * chore: update submodule branch from bitblas to bitblas_tl * force update config.cmake * Bug fix * Fix subproject commit reference in 3rdparty/cutlass * chore: Add submodule for cutlass library * update tl cutlass path * Refactor BitBLASLinear test module for improved readability and maintainability * format fix * Copy CUTLASS to the package directory * Refactor setup.py to include additional TVM header files * lint fix * bug fix * Refactor BitBLASLinear test module for improved readability and maintainability * Implement Matmul Benchmark Design * chore: Update BitBLAS Matmul benchmark script * lint fix * Refactor BitBLASMatmulOpsBenchmark for improved readability and maintainability * Refactor BitBLASMatmulOpsBenchmark to disable tuning during benchmark run * lint fix * Benchmark bot test * Refactor BitBLASMatmulOpsBenchmark to disable tuning during benchmark run * Refactor BitBLASMatmulOpsBenchmark to disable tuning during benchmark run * Refactor BitBLASMatmulOpsBenchmark to disable tuning during benchmark run * Refactor BitBLASMatmulOpsBenchmark to disable tuning during benchmark run * Refactor BitBLASMatmulOpsBenchmark to disable tuning during benchmark run
- Loading branch information
1 parent
a2d3bb0
commit 75ce23e
Showing
4 changed files
with
213 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT License. | ||
|
||
import argparse | ||
from benchmark_ops_matmul import BitblasMatmulOpsBenchmark, HELPER_MESSAGE | ||
from tabulate import tabulate | ||
from typing import Tuple | ||
|
||
|
||
def compare(base: BitblasMatmulOpsBenchmark, head: BitblasMatmulOpsBenchmark): | ||
"""Generate and print a report of the benchmark results.""" | ||
for name, results in head.benchmark_results.items(): | ||
table_data = [ | ||
["TAG:", name, "Device:", head.benchmark_target], | ||
[ | ||
"Shape (M-N-K / N-K_M)", | ||
"Time (ms)", | ||
"Throughput (TFLOPS)", | ||
"Tune Time (s)", | ||
], | ||
] | ||
|
||
def get_suffix(base, head): | ||
symbol = "↑" if head > base else "↓" if head < base else "=" | ||
ratio = f"{((head - base) / base) * 100:.2f}%" if base is not None else "N/A" | ||
return f"{symbol}({ratio})" | ||
|
||
def legalize_shape(M, N, K, dyn_prof_shape): | ||
"""Generate a string representation of the operator shape. | ||
Args: | ||
M: The M dimension (can be an int or a tuple). | ||
N: The N dimension (must be an int). | ||
K: The K dimension (must be an int). | ||
dyn_prof_shape: The dynamic profiling shape (dict with 'M' key if M is dynamic). | ||
Returns: | ||
A string representing the shape in either 'M-N-K' or 'N-K_M' format. | ||
""" | ||
if isinstance(M, int): | ||
return f"{M}-{N}-{K}" | ||
elif dyn_prof_shape and "M" in dyn_prof_shape: | ||
return f"{N}-{K}_{dyn_prof_shape['M']}" | ||
else: | ||
# Calculate the average of tuple M | ||
opt_m = sum(M) / len(M) | ||
return f"{N}-{K}_{opt_m}" | ||
|
||
for i, (latency, tuning_time) in enumerate(results): | ||
op_config = head.benchmark_sets[name][i][1] | ||
dyn_prof_shape = head.benchmark_sets[name][i][2] | ||
shape = legalize_shape(op_config.M, op_config.N, op_config.K, dyn_prof_shape) | ||
|
||
benchmark_M = ( | ||
sum(op_config.M) / | ||
len(op_config.M) if isinstance(op_config.M, Tuple) else op_config.M) | ||
|
||
base_latency = base.benchmark_results[name][i][0] | ||
if latency is not None: | ||
throughput = (2 * benchmark_M * op_config.N * op_config.K / (latency * 1e-3) / 1e12) | ||
base_throughput = (2 * benchmark_M * op_config.N * op_config.K / | ||
(base_latency * 1e-3) / 1e12) | ||
throughput = f"{throughput:.3f}{get_suffix(base_throughput, throughput)}" | ||
else: | ||
throughput = "N/A" | ||
|
||
if base_latency is not None: | ||
latency_str = f"{latency:.3f}{get_suffix(base_latency, latency)}" | ||
else: | ||
latency_str = "N/A" | ||
|
||
base_tuning_time = base.benchmark_results[name][i][1] | ||
if tuning_time is not None: | ||
tuning_time_str = f"{tuning_time:.3f}{get_suffix(base_tuning_time, tuning_time)}" | ||
else: | ||
tuning_time_str = "N/A" | ||
|
||
table_data.append([shape, latency_str, throughput, tuning_time_str]) | ||
|
||
print(tabulate(table_data, headers="firstrow", tablefmt="fancy_grid")) | ||
print(HELPER_MESSAGE) | ||
|
||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"--base", | ||
type=str, | ||
help="the base commit id", | ||
) | ||
parser.add_argument( | ||
"--head", | ||
type=str, | ||
help="the head commit id", | ||
) | ||
args = parser.parse_args() | ||
|
||
base_benchmark = BitblasMatmulOpsBenchmark.deserialize_from_logs(args.base) | ||
|
||
head_benchmark = BitblasMatmulOpsBenchmark.deserialize_from_logs(args.head) | ||
|
||
compare(base_benchmark, head_benchmark) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters