[onert] Use permute type on permute task and coordinate conversion (#…

…13805) This commit changes appendPermuteTasks() and convertCoordinates() function to use permute type. ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh <[email protected]>
Samsung · Aug 28, 2024 · 9b6e7d9 · 9b6e7d9
1 parent 6b763ab
commit 9b6e7d9
Show file tree

Hide file tree

Showing 5 changed files with 64 additions and 61 deletions.
diff --git a/runtime/onert/core/include/ir/Coordinates.h b/runtime/onert/core/include/ir/Coordinates.h
@@ -119,8 +119,13 @@ class Coordinates final
   std::vector<int32_t> _coordinates;
 };
 
-Coordinates convertCoordinates(const Coordinates &from_coordinates, Layout from_layout,
-                               Layout to_layout);
+/**
+ * @brief     Convert coordinate for layout change
+ * @param[in] coord Coordinates to be converted
+ * @param[in] type  Permutation type to be applied to coordinates
+ * @return    Converted coordinates based on permutation type
+ */
+Coordinates convertCoordinates(const Coordinates &coords, const PermuteType &type);
 
 } // namespace ir
 } // namespace onert

diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
@@ -110,7 +110,7 @@ void PermuteLayer::optimize()
               const auto copy_len = loop_shape.dim(copy_axis) * data_size;
               loop_shape.dim(copy_axis) = 1;
 
-              appendPermuteTasks(src, dst, loop_shape, copy_len);
+              appendPermuteTasks(src, dst, loop_shape, copy_len, permute_type);
             }
           }
           else
@@ -121,7 +121,7 @@ void PermuteLayer::optimize()
             const auto loop_shape = src_tensor.getShape();
             const auto copy_len = data_size;
 
-            appendPermuteTasks(src, dst, loop_shape, copy_len);
+            appendPermuteTasks(src, dst, loop_shape, copy_len, permute_type);
           }
         });
       };
@@ -136,11 +136,12 @@ void PermuteLayer::optimize()
 }
 
 void PermuteLayer::appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor,
-                                      const ir::Shape &loop_shape, size_t size)
+                                      const ir::Shape &loop_shape, size_t size,
+                                      const ir::PermuteType &permute_type)
 {
   size_t distributed_dim = 0;
   auto src_shape = src_tensor->getShape();
-  if (src_tensor->layout() == dst_tensor->layout())
+  if (permute_type == ir::PermuteType::COPY)
   {
     for (int i = 1; i < src_shape.rank() - 1; ++i)
     {
@@ -165,7 +166,8 @@ void PermuteLayer::appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_te
     start_coords.set(distributed_dim, start);
     int end = start + (distributed_dim_val - start) / (thread_count - i);
     one_thread_loop_shape.dim(distributed_dim) = end - start;
-    tasks.emplace_back(*src_tensor, *dst_tensor, start_coords, one_thread_loop_shape, size);
+    tasks.emplace_back(*src_tensor, *dst_tensor, start_coords, one_thread_loop_shape, size,
+                       permute_type);
     start = end;
   }
   assert(tasks.size() >= 1);

diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h
@@ -47,7 +47,8 @@ class PermuteLayer : public onert::exec::IPermuteFunction
 
 private:
   void appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor,
-                          const ir::Shape &loop_shape, size_t size);
+                          const ir::Shape &loop_shape, size_t size,
+                          const ir::PermuteType &permute_type);
 
   void runPermuteTasks(backend::ITensor *src, uint8_t *dst_buffer);
 
@@ -56,25 +57,23 @@ class PermuteLayer : public onert::exec::IPermuteFunction
     using Strides = ir::Coordinates;
 
     PermuteWorkerTask(const ITensor &src_tensor, ITensor &dst_tensor,
-                      const ir::Coordinates &start_coords, const ir::Shape &loop_shape, size_t size)
+                      const ir::Coordinates &start_coords, const ir::Shape &loop_shape, size_t size,
+                      const ir::PermuteType &permute_type)
       : _src_buffer{src_tensor.buffer()}, _dst_buffer{dst_tensor.buffer()},
         _src_start_offset{src_tensor.calcOffset(start_coords)},
         _dst_start_offset{dst_tensor.calcOffset(start_coords)}, _src_strides{}, _dst_strides{},
-        _loop_shape{loop_shape}, _size{size}, _src_layout{src_tensor.layout()},
-        _dst_layout{dst_tensor.layout()}, _is_permutation{true}
+        _loop_shape{loop_shape}, _size{size}, _permute_type{permute_type}
     {
       // Set strides
       setStrides(src_tensor, &_src_strides);
       setStrides(dst_tensor, &_dst_strides);
-
-      _is_permutation = (_src_layout != _dst_layout && loop_shape.rank() == 4);
     }
     // Constructor for a copy
     PermuteWorkerTask(const uint8_t *src_buffer, uint8_t *dst_buffer, uint32_t src_start_offset,
                       uint32_t dst_start_offset, size_t size)
       : _src_buffer{src_buffer}, _dst_buffer{dst_buffer}, _src_start_offset{src_start_offset},
         _dst_start_offset{dst_start_offset}, _src_strides{0}, _dst_strides{0}, _loop_shape{1},
-        _size{size}, _src_layout{}, _dst_layout{}, _is_permutation{false}
+        _size{size}, _permute_type{ir::PermuteType::COPY}
     {
       // DO NOTHING
     }
@@ -90,9 +89,9 @@ class PermuteLayer : public onert::exec::IPermuteFunction
         size_t dst_offset = _dst_start_offset;
         assert(static_cast<size_t>(_loop_shape.rank()) == coords.size());
         ir::Coordinates dst_coords = coords;
-        if (_is_permutation)
+        if (_permute_type != ir::PermuteType::COPY && _loop_shape.rank() == 4)
         {
-          dst_coords = ir::convertCoordinates(coords, _src_layout, _dst_layout);
+          dst_coords = ir::convertCoordinates(coords, _permute_type);
         }
         for (auto i = 0; i < _loop_shape.rank(); ++i)
         {
@@ -136,9 +135,7 @@ class PermuteLayer : public onert::exec::IPermuteFunction
     Strides _dst_strides;
     const ir::Shape _loop_shape;
     const size_t _size;
-    const ir::Layout _src_layout;
-    const ir::Layout _dst_layout;
-    bool _is_permutation;
+    const ir::PermuteType _permute_type;
   };
   std::unordered_map<const ITensor *, std::vector<PermuteWorkerTask>> _tasks_map;
 };

diff --git a/runtime/onert/core/src/exec/IPermuteFunction.cc b/runtime/onert/core/src/exec/IPermuteFunction.cc
@@ -51,7 +51,8 @@ inline nnfw::cker::Shape getShape(const backend::ITensor *tensor)
 
 // Quantize per element
 template <typename InputT, typename OutputT>
-void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
+                         const ir::PermuteType &type)
 {
   const auto scale = dst_tensor->data_scale();
   const auto zero_point = dst_tensor->data_zero_point();
@@ -60,17 +61,14 @@ void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *d
   int max_val = std::numeric_limits<OutputT>::max();
 
   auto loop_shape = src_tensor->getShape();
-  const auto src_layout = src_tensor->layout();
-  const auto dst_layout = dst_tensor->layout();
-  const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4;
+  const bool is_permutation = type != ir::PermuteType::COPY && loop_shape.rank() == 4;
   ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
     const InputT *input_data =
       reinterpret_cast<const InputT *>(src_tensor->buffer() + src_tensor->calcOffset(coords));
     int32_t unclamped = static_cast<int32_t>(round(*input_data / scale)) + zero_point;
     int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
 
-    ir::Coordinates dst_coords =
-      is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords;
+    ir::Coordinates dst_coords = is_permutation ? ir::convertCoordinates(coords, type) : coords;
     OutputT *output_data =
       reinterpret_cast<OutputT *>(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords));
     *output_data = clamped;
@@ -79,10 +77,11 @@ void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *d
 
 // TODO Optimize the case where tensors has the same layout
 template <typename InputT, typename OutputT>
-void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
+              const ir::PermuteType &type)
 {
-  if (!src_tensor->has_padding() && !dst_tensor->has_padding() &&
-      src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic())
+  if (!src_tensor->has_padding() && !dst_tensor->has_padding() && type == ir::PermuteType::COPY &&
+      !src_tensor->is_dynamic())
   {
     assert(!dst_tensor->is_dynamic());
 
@@ -94,28 +93,26 @@ void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
   }
   else
   {
-    elementwiseQuantize<InputT, OutputT>(src_tensor, dst_tensor);
+    elementwiseQuantize<InputT, OutputT>(src_tensor, dst_tensor, type);
   }
 }
 
 // Dequantize per element
 template <typename InputT, typename OutputT>
-void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
+                           const ir::PermuteType &type)
 {
   const auto scale = src_tensor->data_scale();
   const auto zero_point = src_tensor->data_zero_point();
 
   auto loop_shape = src_tensor->getShape();
-  const auto src_layout = src_tensor->layout();
-  const auto dst_layout = dst_tensor->layout();
-  const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4;
+  const bool is_permutation = type != ir::PermuteType::COPY && loop_shape.rank() == 4;
   ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
     const InputT *input_data =
       reinterpret_cast<const InputT *>(src_tensor->buffer() + src_tensor->calcOffset(coords));
     const OutputT result = static_cast<OutputT>(scale * (*input_data - zero_point));
 
-    ir::Coordinates dst_coords =
-      is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords;
+    ir::Coordinates dst_coords = is_permutation ? ir::convertCoordinates(coords, type) : coords;
     OutputT *output_data =
       reinterpret_cast<OutputT *>(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords));
     *output_data = result;
@@ -124,10 +121,11 @@ void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor
 
 // TODO Optimize the case where tensors has the same layout
 template <typename InputT, typename OutputT>
-void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
+                const ir::PermuteType &type)
 {
-  if (!src_tensor->has_padding() && !dst_tensor->has_padding() &&
-      src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic())
+  if (!src_tensor->has_padding() && !dst_tensor->has_padding() && type == ir::PermuteType::COPY &&
+      !src_tensor->is_dynamic())
   {
     assert(!dst_tensor->is_dynamic());
 
@@ -139,15 +137,15 @@ void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor
   }
   else
   {
-    elementwiseDequantize<InputT, OutputT>(src_tensor, dst_tensor);
+    elementwiseDequantize<InputT, OutputT>(src_tensor, dst_tensor, type);
   }
 }
 
 template <typename SRC_T, typename DST_T,
           std::enable_if_t<std::is_base_of<backend::ITensor, SRC_T>::value &&
                              std::is_base_of<backend::ITensor, DST_T>::value,
                            bool> = true>
-void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor)
+void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor, const ir::PermuteType &type)
 {
   // TODO Support other types
   if (src_tensor->data_type() == ir::DataType::FLOAT32)
@@ -156,17 +154,17 @@ void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor)
     {
       case ir::DataType::QUANT_UINT8_ASYMM:
       {
-        quantize<float, uint8_t>(src_tensor, dst_tensor);
+        quantize<float, uint8_t>(src_tensor, dst_tensor, type);
         break;
       }
       case ir::DataType::QUANT_INT8_SYMM:
       {
-        quantize<float, int8_t>(src_tensor, dst_tensor);
+        quantize<float, int8_t>(src_tensor, dst_tensor, type);
         break;
       }
       case ir::DataType::QUANT_INT16_SYMM:
       {
-        quantize<float, int16_t>(src_tensor, dst_tensor);
+        quantize<float, int16_t>(src_tensor, dst_tensor, type);
         break;
       }
       default:
@@ -182,17 +180,17 @@ void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor)
     {
       case ir::DataType::QUANT_UINT8_ASYMM:
       {
-        dequantize<uint8_t, float>(src_tensor, dst_tensor);
+        dequantize<uint8_t, float>(src_tensor, dst_tensor, type);
         break;
       }
       case ir::DataType::QUANT_INT8_SYMM:
       {
-        dequantize<int8_t, float>(src_tensor, dst_tensor);
+        dequantize<int8_t, float>(src_tensor, dst_tensor, type);
         break;
       }
       case ir::DataType::QUANT_INT16_SYMM:
       {
-        dequantize<int16_t, float>(src_tensor, dst_tensor);
+        dequantize<int16_t, float>(src_tensor, dst_tensor, type);
         break;
       }
       default:
@@ -256,7 +254,7 @@ void IPermuteFunction::permute(backend::ITensor *src_tensor, backend::ITensor *d
   assert(src_tensor != dst_tensor);
   if (underlying_type(src_tensor->data_type()) != underlying_type(dst_tensor->data_type()))
   {
-    typeAwareQuantize(src_tensor, dst_tensor);
+    typeAwareQuantize(src_tensor, dst_tensor, permute_type);
     return;
   }
 

diff --git a/runtime/onert/core/src/ir/Coordinates.cc b/runtime/onert/core/src/ir/Coordinates.cc
@@ -23,24 +23,25 @@ namespace onert
 namespace ir
 {
 
-Coordinates convertCoordinates(const Coordinates &from_coordinates, Layout from_layout,
-                               Layout to_layout)
+Coordinates convertCoordinates(const Coordinates &coords, const PermuteType &type)
 {
-  assert(from_coordinates.size() == 4);
-  Coordinates to{from_coordinates};
-  if (from_layout == Layout::NHWC && to_layout == Layout::NCHW)
+  assert(coords.size() == 4);
+  Coordinates to{coords};
+  if (type == PermuteType::COPY)
+    return to;
+
+  if (type == PermuteType::NHWC_TO_NCHW)
   {
-    to.set(0, from_coordinates[0]);
-    to.set(1, from_coordinates[3]);
-    to.set(2, from_coordinates[1]);
-    to.set(3, from_coordinates[2]);
+    to.set(1, coords[3]);
+    to.set(2, coords[1]);
+    to.set(3, coords[2]);
   }
-  else if (from_layout == Layout::NCHW && to_layout == Layout::NHWC)
+  else
   {
-    to.set(0, from_coordinates[0]);
-    to.set(1, from_coordinates[2]);
-    to.set(2, from_coordinates[3]);
-    to.set(3, from_coordinates[1]);
+    assert(type == PermuteType::NCHW_TO_NHWC);
+    to.set(1, coords[2]);
+    to.set(2, coords[3]);
+    to.set(3, coords[1]);
   }
 
   return to;