diff --git a/benchmarks/DeepLearning/Ops/CMakeLists.txt b/benchmarks/DeepLearning/Ops/CMakeLists.txt index 2978cc80..d989d8d4 100644 --- a/benchmarks/DeepLearning/Ops/CMakeLists.txt +++ b/benchmarks/DeepLearning/Ops/CMakeLists.txt @@ -14,4 +14,5 @@ add_subdirectory(BatchMatMulOp) add_subdirectory(MathExpOp) add_subdirectory(ReduceAddfOp) add_subdirectory(ReduceMaxfOp) -add_subdirectory(SoftmaxExpSumDivOp) \ No newline at end of file +add_subdirectory(SoftmaxExpSumDivOp) +add_subdirectory(RFFTOp) diff --git a/benchmarks/DeepLearning/Ops/RFFTOp/CMakeLists.txt b/benchmarks/DeepLearning/Ops/RFFTOp/CMakeLists.txt new file mode 100644 index 00000000..35a81578 --- /dev/null +++ b/benchmarks/DeepLearning/Ops/RFFTOp/CMakeLists.txt @@ -0,0 +1,46 @@ +cmake_minimum_required(VERSION 3.10) +project(RFFTBenchmark) + +add_custom_command( + OUTPUT + ${CMAKE_CURRENT_SOURCE_DIR}/result.txt + COMMAND + python3 ${CMAKE_CURRENT_SOURCE_DIR}/RFFT.py ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Generating result.txt" +) + +add_custom_target(generate_result_txt ALL + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/result.txt + COMMENT "Ensure result.txt is generated" +) + +# Define variables for the cross-compilation toolchain and options. +if (CROSS_COMPILE_RVV) + set(RISCV_GNU_TOOLCHAIN ${BUDDY_MLIR_BUILD_DIR}/thirdparty/riscv-gnu-toolchain) + set(RISCV_GNU_TOOLCHAIN_SYSROOT ${RISCV_GNU_TOOLCHAIN}/sysroot) + set(BUDDY_OPT_TRIPLE riscv64) + set(BUDDY_OPT_ATTR +v,+m) +endif() + +# Add the executable for the benchmark +add_executable(dl-op-linalg-RFFT-benchmark + GoogleBenchmarkMain.cpp +) + +# Define the benchmark tool +set(BenchmarkTool GoogleBenchmark) + +target_link_directories(dl-op-linalg-RFFT-benchmark PRIVATE + ${BUDDY_MLIR_LIB_DIR} + ${LLVM_MLIR_LIBRARY_DIR} +) + +# Link the benchmark executable with the necessary libraries +target_link_libraries(dl-op-linalg-RFFT-benchmark PRIVATE + ${BenchmarkTool} + BuddyLibDAP + mlir_c_runner_utils + GoogleBenchmark +) + +add_dependencies(dl-op-linalg-RFFT-benchmark generate_result_txt) diff --git a/benchmarks/DeepLearning/Ops/RFFTOp/GoogleBenchmarkMain.cpp b/benchmarks/DeepLearning/Ops/RFFTOp/GoogleBenchmarkMain.cpp new file mode 100644 index 00000000..0811fc83 --- /dev/null +++ b/benchmarks/DeepLearning/Ops/RFFTOp/GoogleBenchmarkMain.cpp @@ -0,0 +1,120 @@ +//===- GoogleBenchmarkMain.cpp---------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +// +// This file implements the benchmark for RFFT operation. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +#define testLength 20 + +// Helper functions and variables. +namespace { +const std::string PASS = "\033[32mPASS\033[0m"; +const std::string FAIL = "\033[31mFAIL\033[0m"; + +bool areArraysEqual(float array1[], float array2[], int size) { + for (int i = 0; i < size; ++i) { + if (array1[i] != array2[i]) { + return false; + } + } + return true; +} +double *inputAlign0 = new double[testLength]; +intptr_t inputSizes0[1] = {testLength}; + +void initializeInputAlign0() { + + for (int i = 0; i < testLength; ++i) { + inputAlign0[i] = static_cast(i); + } +} + +} // namespace + +static void BUDDY_RFFT(benchmark::State &state) { + MemRef inputMemRef0(inputAlign0, inputSizes0); + for (auto _ : state) { + for (int i = 0; i < state.range(0); ++i) { + dap::RFFT(&inputMemRef0); + } + } +} + +BENCHMARK(BUDDY_RFFT)->Arg(1)->Unit(benchmark::kMillisecond); + +void verification() { + + std::vector fileData; + std::ifstream inputFile( + "../../benchmarks/DeepLearning/Ops/RFFTOp/result.txt"); + + double value; + while (inputFile >> value) { + fileData.push_back(value); + } + inputFile.close(); + + double *inputAlign = new double[testLength]; + for (int i = 0; i < testLength; ++i) { + inputAlign[i] = static_cast(i); + } + intptr_t inputSizes[1] = {testLength}; + MemRef inputMemRef(inputAlign, inputSizes); + + dap::RFFT(&inputMemRef); + + // Get the result array. + auto resultRFFT = inputMemRef.getData(); + + std::cout << "Length : " << fileData.size() << std::endl; + + bool isEqual = true; + double tolerance = 1e-2; + size_t minSize = fileData.size(); + for (size_t i = 0; i < minSize; ++i) { + if (std::abs(resultRFFT[i] - fileData[i]) > tolerance) { + isEqual = false; + } + } + + std::cout << "-----------------------------------------------------------" + << std::endl; + std::cout << "Correctness Verification: " << (isEqual ? PASS : FAIL) + << std::endl; + std::cout << "-----------------------------------------------------------" + << std::endl; + +} + +int main(int argc, char **argv) { + // Run benchmark. + initializeInputAlign0(); + + ::benchmark::Initialize(&argc, argv); + + ::benchmark::RunSpecifiedBenchmarks(); + + // Run correctness verification. + verification(); + return 0; +} diff --git a/benchmarks/DeepLearning/Ops/RFFTOp/RFFT.py b/benchmarks/DeepLearning/Ops/RFFTOp/RFFT.py new file mode 100644 index 00000000..ee7b1454 --- /dev/null +++ b/benchmarks/DeepLearning/Ops/RFFTOp/RFFT.py @@ -0,0 +1,48 @@ +import numpy as np +import sys +import os +import timeit + +if len(sys.argv) != 2: + print("need file path") + sys.exit(1) + +output_path = sys.argv[1] +output_file = os.path.join(output_path, 'result.txt') +time_output_file = os.path.join(output_path, 'time_result.txt') + +if os.path.exists(output_file): + os.remove(output_file) +if os.path.exists(time_output_file): + os.remove(time_output_file) + +def compute_rfft(): + arr = np.arange(0, 20) + rfft_result = np.fft.rfft(arr) + output_lines = [] + first_value = rfft_result[0].real + output_lines.append(f"{first_value:.0f}") + for c in rfft_result[1:]: + real_part = c.real + imag_part = c.imag + output_lines.append(f"{real_part:.0f}") + output_lines.append(f"{imag_part:.2f}") + return output_lines + +# set a timer with timeit +execution_time = timeit.timeit(compute_rfft, number=1) * 1000 + +# get RFFT result +rfft_result_lines = compute_rfft() + +# record the result +with open(output_file, 'w') as f: + for line in rfft_result_lines: + f.write(line + '\n') + +with open(time_output_file, 'w') as f_time: + f_time.write(f"Execution time for RFFT: {execution_time:.6f} milliseconds\n") + + +print(f"RFFT result saved to '{output_file}'") +print(f"Execution time saved to '{time_output_file}'") diff --git a/benchmarks/DeepLearning/README.md b/benchmarks/DeepLearning/README.md index 8c643e71..0e5a25d1 100644 --- a/benchmarks/DeepLearning/README.md +++ b/benchmarks/DeepLearning/README.md @@ -39,6 +39,7 @@ The table below lists the benchmark cases at the operation level. | Reduce Addf | `ninja dl-op-reduce-addf-benchmark` | This benchmark evaluates optimization strategies for the `reduce.addf` operation. The benchmark size can be adjusted in [this file](./Ops/ReduceAddfOp/GoogleBenchmarkMain.cpp). | | Reduce Maxf | `ninja dl-op-reduce-maxf-benchmark` | This benchmark evaluates optimization strategies for the `reduce.maxf` operation. The benchmark size can be adjusted in [this file](./Ops/ReduceMaxfOp/GoogleBenchmarkMain.cpp). | | Softmax Exp Sum Div | `ninja dl-op-softmax-exp-sum-div-benchmark` | This benchmark evaluates optimization strategies for the `softmax.exp_sum_div` operation. The benchmark size can be adjusted in [this file](./Ops/SoftmaxExpSumDivOp/GoogleBenchmarkMain.cpp). | +| RFFT | `ninja dl-op-linalg-RFFT-benchmark` | This benchmark evaluates the efficiency of the RFFT operator and verifies its correctness by comparing it with the RFFT function in numpy. The benchmark size can be adjusted in [this file](./Ops/RFFTOp/GoogleBenchmarkMain.cpp). And you can compare the time consumption with Numpy library in the `time_result.txt`.| ### Enter Python virtual environment We recommend you to use anaconda3 to create python virtual environment. You should install python packages as buddy-mlir/requirements.