From 888894f3e424e298817cc8919edd052c4fdcc26c Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Thu, 12 Dec 2024 12:40:40 +0000 Subject: [PATCH 1/5] fix naive conv kernel for large tensors --- src/solver/conv/conv_direct_naive_conv.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/solver/conv/conv_direct_naive_conv.cpp b/src/solver/conv/conv_direct_naive_conv.cpp index 505f5c9376..fd9b5b1a48 100644 --- a/src/solver/conv/conv_direct_naive_conv.cpp +++ b/src/solver/conv/conv_direct_naive_conv.cpp @@ -357,11 +357,12 @@ GetConv2DFWDSolution(const ExecutionContext& ctx, const ::miopen::conv::ProblemD int c_per_group = c / group; int k_per_group = k / group; - size_t block_size = 256; + size_t block_size = 512; size_t grid_size = 1; if(problem.IsLayoutDefault()) { - grid_size = static_cast(n) * k; + // grid_size = static_cast(n) * k; + grid_size = (static_cast(n) * k + block_size - 1) / block_size; } else if(problem.IsLayoutNHWC()) { From 64a278fb5f117110ca100b810622b0c172294bed Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Thu, 12 Dec 2024 15:11:46 +0000 Subject: [PATCH 2/5] hand nhwc case for conv reference kernel --- src/solver/conv/conv_direct_naive_conv.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/solver/conv/conv_direct_naive_conv.cpp b/src/solver/conv/conv_direct_naive_conv.cpp index fd9b5b1a48..e22b73bcf6 100644 --- a/src/solver/conv/conv_direct_naive_conv.cpp +++ b/src/solver/conv/conv_direct_naive_conv.cpp @@ -361,12 +361,11 @@ GetConv2DFWDSolution(const ExecutionContext& ctx, const ::miopen::conv::ProblemD size_t grid_size = 1; if(problem.IsLayoutDefault()) { - // grid_size = static_cast(n) * k; grid_size = (static_cast(n) * k + block_size - 1) / block_size; } else if(problem.IsLayoutNHWC()) { - grid_size = static_cast(group) * n * ho; + grid_size = (static_cast(group) * n * ho + block_size - 1) / block_size; } else MIOPEN_THROW("Unsupported layout"); From 166572671fca9a672387ba1b21a95adf7d448f7d Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Thu, 12 Dec 2024 15:37:06 +0000 Subject: [PATCH 3/5] fix bwd 2D and 3D --- src/solver/conv/conv_direct_naive_conv.cpp | 26 +++++++++++++--------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/solver/conv/conv_direct_naive_conv.cpp b/src/solver/conv/conv_direct_naive_conv.cpp index e22b73bcf6..aeae185dc4 100644 --- a/src/solver/conv/conv_direct_naive_conv.cpp +++ b/src/solver/conv/conv_direct_naive_conv.cpp @@ -505,15 +505,17 @@ GetConv3DFWDSolution(const ExecutionContext& ctx, const ::miopen::conv::ProblemD int c_per_group = c / group; int k_per_group = k / group; - size_t block_size = 256; + size_t block_size = 512; size_t grid_size = 1; if(problem.IsLayoutDefault()) { - grid_size = static_cast(n) * k; + grid_size = (static_cast(n) * k + block_size - 1) / block_size; + ; } else if(problem.IsLayoutNHWC()) { - grid_size = static_cast(group) * n * do_; + grid_size = (static_cast(group) * n * do_ + block_size - 1) / block_size; + ; } else MIOPEN_THROW("Unsupported layout"); @@ -621,7 +623,7 @@ GetConv2DWRWSolution(const ExecutionContext& ctx, const ::miopen::conv::ProblemD int c_per_group = c / group; int k_per_group = k / group; - size_t block_size = 256; + size_t block_size = 512; size_t grid_size = static_cast(k); KernelInfo kernel; @@ -758,7 +760,7 @@ GetConv3DWRWSolution(const ExecutionContext& ctx, const ::miopen::conv::ProblemD int c_per_group = c / group; int k_per_group = k / group; - size_t block_size = 256; + size_t block_size = 512; size_t grid_size = static_cast(k); KernelInfo kernel; @@ -863,15 +865,15 @@ GetConv2DBWDSolution(const ExecutionContext& ctx, const ::miopen::conv::ProblemD int c_per_group = c / group; int k_per_group = k / group; - size_t block_size = 256; + size_t block_size = 512; size_t grid_size = 1; if(problem.IsLayoutDefault()) { - grid_size = static_cast(n) * c; + grid_size = (static_cast(n) * c + block_size - 1) / block_size; } else if(problem.IsLayoutNHWC()) { - grid_size = static_cast(group) * n * hi; + grid_size = (static_cast(group) * n * hi + block_size - 1) / block_size; } else { @@ -1013,15 +1015,17 @@ GetConv3DBWDSolution(const ExecutionContext& ctx, const ::miopen::conv::ProblemD int c_per_group = c / group; int k_per_group = k / group; - size_t block_size = 256; + size_t block_size = 512; size_t grid_size = 1; if(problem.IsLayoutDefault()) { - grid_size = static_cast(n) * c; + grid_size = (static_cast(n) * c + block_size - 1) / block_size; + ; } else if(problem.IsLayoutNHWC()) { - grid_size = static_cast(group) * n * di; + grid_size = (static_cast(group) * n * di + block_size - 1) / block_size; + ; } else { From 36fd522d6fb3f33a2d366abd429ab95ccdfe48d9 Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Thu, 12 Dec 2024 18:07:53 +0000 Subject: [PATCH 4/5] handle smaller case too --- src/solver/conv/conv_direct_naive_conv.cpp | 87 ++++++++++++++++++---- 1 file changed, 74 insertions(+), 13 deletions(-) diff --git a/src/solver/conv/conv_direct_naive_conv.cpp b/src/solver/conv/conv_direct_naive_conv.cpp index aeae185dc4..ea9f75ce87 100644 --- a/src/solver/conv/conv_direct_naive_conv.cpp +++ b/src/solver/conv/conv_direct_naive_conv.cpp @@ -357,15 +357,31 @@ GetConv2DFWDSolution(const ExecutionContext& ctx, const ::miopen::conv::ProblemD int c_per_group = c / group; int k_per_group = k / group; - size_t block_size = 512; + size_t block_size = 256; size_t grid_size = 1; if(problem.IsLayoutDefault()) { - grid_size = (static_cast(n) * k + block_size - 1) / block_size; + size_t all_workload = static_cast(n) * k; + if(all_workload <= block_size) + { + grid_size = all_workload; + } + else + { + grid_size = (all_workload + block_size - 1) / block_size; + } } else if(problem.IsLayoutNHWC()) { - grid_size = (static_cast(group) * n * ho + block_size - 1) / block_size; + size_t all_workload = static_cast(group) * n * ho; + if(all_workload <= block_size) + { + grid_size = all_workload; + } + else + { + grid_size = (all_workload + block_size - 1) / block_size; + } } else MIOPEN_THROW("Unsupported layout"); @@ -507,15 +523,30 @@ GetConv3DFWDSolution(const ExecutionContext& ctx, const ::miopen::conv::ProblemD size_t block_size = 512; size_t grid_size = 1; + if(problem.IsLayoutDefault()) { - grid_size = (static_cast(n) * k + block_size - 1) / block_size; - ; + size_t all_workload = static_cast(n) * k; + if(all_workload <= block_size) + { + grid_size = all_workload; + } + else + { + grid_size = (all_workload + block_size - 1) / block_size; + } } else if(problem.IsLayoutNHWC()) { - grid_size = (static_cast(group) * n * do_ + block_size - 1) / block_size; - ; + size_t all_workload = static_cast(group) * n * do_; + if(all_workload <= block_size) + { + grid_size = all_workload; + } + else + { + grid_size = (all_workload + block_size - 1) / block_size; + } } else MIOPEN_THROW("Unsupported layout"); @@ -869,11 +900,27 @@ GetConv2DBWDSolution(const ExecutionContext& ctx, const ::miopen::conv::ProblemD size_t grid_size = 1; if(problem.IsLayoutDefault()) { - grid_size = (static_cast(n) * c + block_size - 1) / block_size; + size_t all_workload = static_cast(n) * c; + if(all_workload <= block_size) + { + grid_size = all_workload; + } + else + { + grid_size = (all_workload + block_size - 1) / block_size; + } } else if(problem.IsLayoutNHWC()) { - grid_size = (static_cast(group) * n * hi + block_size - 1) / block_size; + size_t all_workload = static_cast(group) * n * hi; + if(all_workload <= block_size) + { + grid_size = all_workload; + } + else + { + grid_size = (all_workload + block_size - 1) / block_size; + } } else { @@ -1019,13 +1066,27 @@ GetConv3DBWDSolution(const ExecutionContext& ctx, const ::miopen::conv::ProblemD size_t grid_size = 1; if(problem.IsLayoutDefault()) { - grid_size = (static_cast(n) * c + block_size - 1) / block_size; - ; + size_t all_workload = static_cast(n) * c; + if(all_workload <= block_size) + { + grid_size = all_workload; + } + else + { + grid_size = (all_workload + block_size - 1) / block_size; + } } else if(problem.IsLayoutNHWC()) { - grid_size = (static_cast(group) * n * di + block_size - 1) / block_size; - ; + size_t all_workload = static_cast(group) * n * di; + if(all_workload <= block_size) + { + grid_size = all_workload; + } + else + { + grid_size = (all_workload + block_size - 1) / block_size; + } } else { From 3554b20b20c76711bb1772a491ccde46fd004b01 Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Thu, 12 Dec 2024 18:12:01 +0000 Subject: [PATCH 5/5] revert bloc_size --- src/solver/conv/conv_direct_naive_conv.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/solver/conv/conv_direct_naive_conv.cpp b/src/solver/conv/conv_direct_naive_conv.cpp index ea9f75ce87..bb591e3159 100644 --- a/src/solver/conv/conv_direct_naive_conv.cpp +++ b/src/solver/conv/conv_direct_naive_conv.cpp @@ -521,7 +521,7 @@ GetConv3DFWDSolution(const ExecutionContext& ctx, const ::miopen::conv::ProblemD int c_per_group = c / group; int k_per_group = k / group; - size_t block_size = 512; + size_t block_size = 256; size_t grid_size = 1; if(problem.IsLayoutDefault()) @@ -654,7 +654,7 @@ GetConv2DWRWSolution(const ExecutionContext& ctx, const ::miopen::conv::ProblemD int c_per_group = c / group; int k_per_group = k / group; - size_t block_size = 512; + size_t block_size = 256; size_t grid_size = static_cast(k); KernelInfo kernel; @@ -791,7 +791,7 @@ GetConv3DWRWSolution(const ExecutionContext& ctx, const ::miopen::conv::ProblemD int c_per_group = c / group; int k_per_group = k / group; - size_t block_size = 512; + size_t block_size = 256; size_t grid_size = static_cast(k); KernelInfo kernel; @@ -896,7 +896,7 @@ GetConv2DBWDSolution(const ExecutionContext& ctx, const ::miopen::conv::ProblemD int c_per_group = c / group; int k_per_group = k / group; - size_t block_size = 512; + size_t block_size = 256; size_t grid_size = 1; if(problem.IsLayoutDefault()) { @@ -1062,7 +1062,7 @@ GetConv3DBWDSolution(const ExecutionContext& ctx, const ::miopen::conv::ProblemD int c_per_group = c / group; int k_per_group = k / group; - size_t block_size = 512; + size_t block_size = 256; size_t grid_size = 1; if(problem.IsLayoutDefault()) {