-
Notifications
You must be signed in to change notification settings - Fork 144
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* added working example for 5D input using 1D kernel * example with 5D input tensor and 2d kernel - not working: issues with arguments * added updated version of 3d device op - changed descriptors/dims * added example file to check kernel * fixed descriptor and isSupportedArgument stride problem * added and modified kernel for 3d - updated tids/loop * adding some more 5d example files * fixed some issues * changes made for testing * working version: fixed error in stride for A, still a bit inefficient * cleaned up formatting/comments * updating formatting * more formatting fixes * fixing cmake, adding back gpu targets in cmake script * adding client example * added instances for client example * fixed errors in client example * implemented client ex with device_elementwise.hpp and device_elementwise_3d_impl.hpp * removed extra files * minor formatting and naming fixes * adding test files and profiler * fixing minor error * minor fix * removed unneccesary comments, renamed files * updated instance list for client example, added different layout example * removing instances * fixed error in instance generation * remove comments * update profiler and client example tensor layouts * fixed errors in test/profiler * updated vector dim access to enable vector load * updated test/profiler files * updated example with 1d kernel * updating profiler * renamed files * disabled device op for MI300 * skip elementwise_permute_2d on gfx94x * Update CMakeLists.txt * fixing CMake - disabling some GPU targets * added transpose profiler to CMake * fixed transpose profiler errors * fixed instances for tests/profiler * cleaned up code in transpose profiler source code * added some comments, updated copyright * made function arguments const where possible --------- Co-authored-by: Jing Zhang <[email protected]> Co-authored-by: Jing Zhang <[email protected]> Co-authored-by: zjing14 <[email protected]>
- Loading branch information
1 parent
fbf31a2
commit aa3e2d7
Showing
8 changed files
with
151 additions
and
116 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved. | ||
|
||
#include <iostream> | ||
#include <numeric> | ||
#include <initializer_list> | ||
#include <cstdlib> | ||
|
||
#include "profiler/profile_transpose_impl.hpp" | ||
#include "profiler_operation_registry.hpp" | ||
|
||
enum struct DataType | ||
{ | ||
F32_F32_F32_F32_F32, // 0 | ||
F16_F16_F16_F16_F16, // 1 | ||
}; | ||
|
||
#define OP_NAME "transpose" | ||
#define OP_DESC "Transpose" | ||
|
||
struct TransposeArgParser | ||
{ | ||
std::unordered_map<std::string, std::vector<int>> long_opts = {{"lengths", {}}}; | ||
|
||
bool parse_opt(const int argc, char* argv[], const std::string& key, int i) | ||
{ | ||
if(std::string("--") + key == argv[i]) | ||
{ | ||
const int pos = i; | ||
while(++i < argc && argv[i][0] != '-') {} | ||
int end = i; | ||
for(int j = pos + 1; j < end; j++) | ||
{ | ||
long_opts[key].push_back(std::stoi(argv[j])); | ||
} | ||
return true; | ||
} | ||
return false; | ||
} | ||
|
||
void operator()(int argc, char* argv[]) | ||
{ | ||
for(auto& kv : long_opts) | ||
{ | ||
for(int i = 1; i < argc; i++) | ||
{ | ||
if(parse_opt(argc, argv, kv.first, i)) | ||
break; | ||
} | ||
} | ||
} | ||
}; | ||
|
||
static void print_helper_msg() | ||
{ | ||
printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"); | ||
printf("arg2: data type (0: fp32; 1: fp16)\n"); | ||
printf("arg3: verification (0: no; 1: yes)\n"); | ||
printf("arg4: initialization (0: no init; 1: integer value; 2: decimal value)\n"); | ||
printf("arg5: print tensor value (0: no; 1: yes)\n"); | ||
printf("arg6: time kernel (0=no, 1=yes)\n"); | ||
printf("arg7: --lengths: N, C, D, H, W\n"); | ||
} | ||
|
||
int profile_transpose(int argc, char* argv[]) | ||
{ | ||
if(argc != 7) | ||
{ | ||
print_helper_msg(); | ||
exit(1); | ||
} | ||
TransposeArgParser arg_parser; | ||
|
||
const auto data_type = static_cast<DataType>(std::stoi(argv[2])); | ||
const bool do_verification = std::stoi(argv[3]); | ||
const int init_method = std::stoi(argv[4]); | ||
const bool do_log = std::stoi(argv[5]); | ||
const bool time_kernel = std::stoi(argv[6]); | ||
arg_parser(argc, argv); | ||
const std::vector<ck::index_t> lengths = arg_parser.long_opts["lengths"]; | ||
|
||
using F32 = float; | ||
using F16 = ck::half_t; | ||
|
||
auto profile = [&](auto a_type, auto b_type) { | ||
using ADataType = decltype(a_type); | ||
using BDataType = decltype(b_type); | ||
constexpr ck::index_t NumDim = 5; | ||
|
||
bool pass = ck::profiler::profile_transpose_impl<ADataType, BDataType, NumDim>( | ||
do_verification, init_method, do_log, time_kernel, lengths); | ||
|
||
return pass ? 0 : 1; | ||
}; | ||
|
||
if(data_type == DataType::F32_F32_F32_F32_F32) | ||
{ | ||
return profile(F32{}, F32{}); | ||
} | ||
else if(data_type == DataType::F16_F16_F16_F16_F16) | ||
{ | ||
return profile(F16{}, F16{}); | ||
} | ||
else | ||
{ | ||
std::cout << "this data_type & layout is not implemented" << std::endl; | ||
|
||
return 1; | ||
} | ||
} | ||
|
||
REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_transpose); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,27 +1,35 @@ | ||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. | ||
|
||
#include <tuple> | ||
|
||
#include "gtest/gtest.h" | ||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp" | ||
#include "test_transpose_util.hpp" | ||
#include "profiler/profile_transpose_impl.hpp" | ||
|
||
using F16 = ck::half_t; | ||
using F32 = float; | ||
using ck::index_t; | ||
|
||
template <typename Tuple> | ||
class TestTranspose : public ::testing::Test | ||
{ | ||
protected: | ||
using ADataType = std::tuple_element_t<0, Tuple>; | ||
using BDataType = std::tuple_element_t<1, Tuple>; | ||
|
||
void Run() | ||
{ | ||
std::vector<std::vector<ck::index_t>> lengths = { | ||
{4, 16, 16, 32, 5}, {8, 16, 16, 32, 8} /**{32, 16, 16, 32, 8},**/}; | ||
|
||
for(auto length : lengths) | ||
{ | ||
bool success = ck::profiler::profile_transpose_impl<ADataType, BDataType, 5>( | ||
true, 2, false, false, length); | ||
EXPECT_TRUE(success); | ||
} | ||
} | ||
}; | ||
|
||
// clang-format off | ||
using KernelTypes = ::testing::Types< | ||
std::tuple< F16, F16>, | ||
std::tuple< F32, F32> | ||
>; | ||
// clang-format on | ||
using KernelTypes = ::testing::Types<std::tuple<F16, F16>, std::tuple<F32, F32>>; | ||
|
||
TYPED_TEST_SUITE(TestTranspose, KernelTypes); | ||
|
||
//#include "test_transpose_ut_cases.inc" | ||
TYPED_TEST(TestTranspose, Test_FP16) { this->Run(); } | ||
TYPED_TEST(TestTranspose, Test_FP32) { this->Run(); } |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.