From ba9c6035c193a6d273a83a4b1369eca813e3cbeb Mon Sep 17 00:00:00 2001 From: Ted Themistokleous Date: Tue, 28 Jan 2025 17:32:15 -0800 Subject: [PATCH] Remove inline default transposeHelper and ensure we use the proper check via CanUse_hipBlasTransposeHelper_MLFloat16 --- onnxruntime/core/providers/rocm/shared_inc/fpgeneric.h | 1 - 1 file changed, 1 deletion(-) diff --git a/onnxruntime/core/providers/rocm/shared_inc/fpgeneric.h b/onnxruntime/core/providers/rocm/shared_inc/fpgeneric.h index 39d5306b15b7e..9d32fcb65d0d5 100644 --- a/onnxruntime/core/providers/rocm/shared_inc/fpgeneric.h +++ b/onnxruntime/core/providers/rocm/shared_inc/fpgeneric.h @@ -501,7 +501,6 @@ inline hipblasStatus_t hipblasTransposeHelper(hipStream_t /*stream*/, hipblasHan return hipblasDgeam(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc); } -inline bool CanUse_hipblasTransposeHelper_MLFloat16(int /*m*/, int /*n*/) { return true; } // CUDA has a limited grid size of 65536, ROCm has higher limits. hipblasStatus_t hipblasTransposeHelper(hipStream_t stream, hipblasHandle_t, hipblasOperation_t, hipblasOperation_t, int m, int n, const half*, const half* A, int, const half*, const half*, int, half* C, int); // copy