-
Notifications
You must be signed in to change notification settings - Fork 498
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[XLA:GPU] Create cuda-specific api for the runtime to populate the te…
…nsor map parameter. See child cl for how this is called. PiperOrigin-RevId: 715377639
- Loading branch information
1 parent
f80d088
commit 2940811
Showing
8 changed files
with
312 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
/* Copyright 2025 The OpenXLA Authors. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
==============================================================================*/ | ||
|
||
#include "xla/stream_executor/cuda/tma_util.h" | ||
|
||
#include "absl/status/status.h" | ||
#include "absl/status/statusor.h" | ||
#include "absl/strings/str_format.h" | ||
#include "third_party/gpus/cuda/include/cuda.h" | ||
#include "xla/stream_executor/gpu/tma_metadata.h" | ||
|
||
namespace stream_executor::gpu { | ||
|
||
absl::StatusOr<CUtensorMapDataType> GetTensorMapDataType(int element_size) { | ||
switch (element_size) { | ||
case 1: | ||
return CU_TENSOR_MAP_DATA_TYPE_UINT8; | ||
case 2: | ||
return CU_TENSOR_MAP_DATA_TYPE_UINT16; | ||
case 4: | ||
return CU_TENSOR_MAP_DATA_TYPE_UINT32; | ||
case 8: | ||
return CU_TENSOR_MAP_DATA_TYPE_UINT64; | ||
default: | ||
return absl::InvalidArgumentError( | ||
absl::StrFormat("unsupported element size: %d", element_size)); | ||
} | ||
} | ||
|
||
CUtensorMapSwizzle GetTensorMapSwizzle(TmaDescriptor::TmaSwizzle swizzle) { | ||
switch (swizzle) { | ||
case TmaDescriptor::TmaSwizzle::kNone: | ||
return CU_TENSOR_MAP_SWIZZLE_NONE; | ||
case TmaDescriptor::TmaSwizzle::k32B: | ||
return CU_TENSOR_MAP_SWIZZLE_32B; | ||
case TmaDescriptor::TmaSwizzle::k64B: | ||
return CU_TENSOR_MAP_SWIZZLE_64B; | ||
case TmaDescriptor::TmaSwizzle::k128B: | ||
return CU_TENSOR_MAP_SWIZZLE_128B; | ||
} | ||
} | ||
|
||
CUtensorMapL2promotion GetTensorMapL2Promotion( | ||
TmaDescriptor::TmaL2Promotion l2_promotion) { | ||
switch (l2_promotion) { | ||
case TmaDescriptor::TmaL2Promotion::kNone: | ||
return CU_TENSOR_MAP_L2_PROMOTION_NONE; | ||
case TmaDescriptor::TmaL2Promotion::k64B: | ||
return CU_TENSOR_MAP_L2_PROMOTION_L2_64B; | ||
case TmaDescriptor::TmaL2Promotion::k128B: | ||
return CU_TENSOR_MAP_L2_PROMOTION_L2_128B; | ||
case TmaDescriptor::TmaL2Promotion::k256B: | ||
return CU_TENSOR_MAP_L2_PROMOTION_L2_256B; | ||
} | ||
} | ||
|
||
CUtensorMapFloatOOBfill GetTensorMapFloatOOBFill( | ||
TmaDescriptor::TmaFloatOobFill oob_fill) { | ||
switch (oob_fill) { | ||
case TmaDescriptor::TmaFloatOobFill::kNone: | ||
return CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE; | ||
case TmaDescriptor::TmaFloatOobFill::kNanRequestZeroFma: | ||
return CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA; | ||
} | ||
} | ||
|
||
CUtensorMapInterleave GetTensorMapInterleave( | ||
TmaDescriptor::TmaInterleave interleave) { | ||
switch (interleave) { | ||
case TmaDescriptor::TmaInterleave::kNone: | ||
return CU_TENSOR_MAP_INTERLEAVE_NONE; | ||
case TmaDescriptor::TmaInterleave::k16B: | ||
return CU_TENSOR_MAP_INTERLEAVE_16B; | ||
case TmaDescriptor::TmaInterleave::k32B: | ||
return CU_TENSOR_MAP_INTERLEAVE_32B; | ||
} | ||
} | ||
|
||
} // namespace stream_executor::gpu |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
/* Copyright 2025 The OpenXLA Authors. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
==============================================================================*/ | ||
|
||
#ifndef XLA_STREAM_EXECUTOR_CUDA_TMA_UTIL_H_ | ||
#define XLA_STREAM_EXECUTOR_CUDA_TMA_UTIL_H_ | ||
|
||
#include "absl/status/statusor.h" | ||
#include "third_party/gpus/cuda/include/cuda.h" | ||
#include "xla/stream_executor/gpu/tma_metadata.h" | ||
|
||
namespace stream_executor::gpu { | ||
|
||
absl::StatusOr<CUtensorMapDataType> GetTensorMapDataType(int element_size); | ||
|
||
CUtensorMapSwizzle GetTensorMapSwizzle(TmaDescriptor::TmaSwizzle swizzle); | ||
|
||
CUtensorMapL2promotion GetTensorMapL2Promotion( | ||
TmaDescriptor::TmaL2Promotion l2_promotion); | ||
|
||
CUtensorMapFloatOOBfill GetTensorMapFloatOOBFill( | ||
TmaDescriptor::TmaFloatOobFill oob_fill); | ||
|
||
CUtensorMapInterleave GetTensorMapInterleave( | ||
TmaDescriptor::TmaInterleave interleave); | ||
|
||
} // namespace stream_executor::gpu | ||
|
||
#endif // XLA_STREAM_EXECUTOR_CUDA_TMA_UTIL_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
/* Copyright 2025 The OpenXLA Authors. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
==============================================================================*/ | ||
|
||
#include "xla/stream_executor/cuda/tma_util.h" | ||
|
||
#include <gmock/gmock.h> | ||
#include <gtest/gtest.h> | ||
#include "absl/status/status.h" | ||
#include "third_party/gpus/cuda/include/cuda.h" | ||
#include "xla/stream_executor/gpu/tma_metadata.h" | ||
#include "xla/tsl/platform/status_matchers.h" | ||
|
||
namespace stream_executor::gpu { | ||
namespace { | ||
|
||
using ::tsl::testing::IsOkAndHolds; | ||
using ::tsl::testing::StatusIs; | ||
|
||
TEST(TmaUtilTest, GetTensorMapDataTypeReturnsCorrectDataType) { | ||
EXPECT_THAT(GetTensorMapDataType(1), | ||
IsOkAndHolds(CU_TENSOR_MAP_DATA_TYPE_UINT8)); | ||
EXPECT_THAT(GetTensorMapDataType(2), | ||
IsOkAndHolds(CU_TENSOR_MAP_DATA_TYPE_UINT16)); | ||
EXPECT_THAT(GetTensorMapDataType(4), | ||
IsOkAndHolds(CU_TENSOR_MAP_DATA_TYPE_UINT32)); | ||
EXPECT_THAT(GetTensorMapDataType(8), | ||
IsOkAndHolds(CU_TENSOR_MAP_DATA_TYPE_UINT64)); | ||
} | ||
|
||
TEST(TmaUtilTest, GetTensorMapDataTypeFailsGracefully) { | ||
EXPECT_THAT(GetTensorMapDataType(0), | ||
StatusIs(absl::StatusCode::kInvalidArgument)); | ||
EXPECT_THAT(GetTensorMapDataType(16), | ||
StatusIs(absl::StatusCode::kInvalidArgument)); | ||
} | ||
|
||
TEST(TmaUtilTest, GetTensorMapSwizzleReturnsCorrectSwizzle) { | ||
EXPECT_EQ(GetTensorMapSwizzle(TmaDescriptor::TmaSwizzle::kNone), | ||
CU_TENSOR_MAP_SWIZZLE_NONE); | ||
EXPECT_EQ(GetTensorMapSwizzle(TmaDescriptor::TmaSwizzle::k32B), | ||
CU_TENSOR_MAP_SWIZZLE_32B); | ||
EXPECT_EQ(GetTensorMapSwizzle(TmaDescriptor::TmaSwizzle::k64B), | ||
CU_TENSOR_MAP_SWIZZLE_64B); | ||
EXPECT_EQ(GetTensorMapSwizzle(TmaDescriptor::TmaSwizzle::k128B), | ||
CU_TENSOR_MAP_SWIZZLE_128B); | ||
} | ||
|
||
TEST(TmaUtilTest, GetTensorMapL2PromotionReturnsCorrectL2Promotion) { | ||
EXPECT_EQ(GetTensorMapL2Promotion(TmaDescriptor::TmaL2Promotion::kNone), | ||
CU_TENSOR_MAP_L2_PROMOTION_NONE); | ||
EXPECT_EQ(GetTensorMapL2Promotion(TmaDescriptor::TmaL2Promotion::k64B), | ||
CU_TENSOR_MAP_L2_PROMOTION_L2_64B); | ||
EXPECT_EQ(GetTensorMapL2Promotion(TmaDescriptor::TmaL2Promotion::k128B), | ||
CU_TENSOR_MAP_L2_PROMOTION_L2_128B); | ||
EXPECT_EQ(GetTensorMapL2Promotion(TmaDescriptor::TmaL2Promotion::k256B), | ||
CU_TENSOR_MAP_L2_PROMOTION_L2_256B); | ||
} | ||
|
||
TEST(TmaUtilTest, GetTensorMapFloatOobFillReturnsCorrectFloatOobFill) { | ||
EXPECT_EQ(GetTensorMapFloatOOBFill(TmaDescriptor::TmaFloatOobFill::kNone), | ||
CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE); | ||
EXPECT_EQ(GetTensorMapFloatOOBFill( | ||
TmaDescriptor::TmaFloatOobFill::kNanRequestZeroFma), | ||
CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA); | ||
} | ||
|
||
TEST(TmaUtilTest, GetTensorMapInterleaveReturnsCorrectInterleave) { | ||
EXPECT_EQ(GetTensorMapInterleave(TmaDescriptor::TmaInterleave::kNone), | ||
CU_TENSOR_MAP_INTERLEAVE_NONE); | ||
EXPECT_EQ(GetTensorMapInterleave(TmaDescriptor::TmaInterleave::k16B), | ||
CU_TENSOR_MAP_INTERLEAVE_16B); | ||
EXPECT_EQ(GetTensorMapInterleave(TmaDescriptor::TmaInterleave::k32B), | ||
CU_TENSOR_MAP_INTERLEAVE_32B); | ||
} | ||
|
||
} // namespace | ||
} // namespace stream_executor::gpu |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters