diff --git a/runtime/onert/core/include/ir/Coordinates.h b/runtime/onert/core/include/ir/Coordinates.h index 9963cab4c1b..7ab19822ebf 100644 --- a/runtime/onert/core/include/ir/Coordinates.h +++ b/runtime/onert/core/include/ir/Coordinates.h @@ -119,8 +119,13 @@ class Coordinates final std::vector _coordinates; }; -Coordinates convertCoordinates(const Coordinates &from_coordinates, Layout from_layout, - Layout to_layout); +/** + * @brief Convert coordinate for layout change + * @param[in] coord Coordinates to be converted + * @param[in] type Permutation type to be applied to coordinates + * @return Converted coordinates based on permutation type + */ +Coordinates convertCoordinates(const Coordinates &coords, const PermuteType &type); } // namespace ir } // namespace onert diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc index 1dfa20720f4..d22229934b9 100644 --- a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc +++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc @@ -110,7 +110,7 @@ void PermuteLayer::optimize() const auto copy_len = loop_shape.dim(copy_axis) * data_size; loop_shape.dim(copy_axis) = 1; - appendPermuteTasks(src, dst, loop_shape, copy_len); + appendPermuteTasks(src, dst, loop_shape, copy_len, permute_type); } } else @@ -121,7 +121,7 @@ void PermuteLayer::optimize() const auto loop_shape = src_tensor.getShape(); const auto copy_len = data_size; - appendPermuteTasks(src, dst, loop_shape, copy_len); + appendPermuteTasks(src, dst, loop_shape, copy_len, permute_type); } }); }; @@ -136,11 +136,12 @@ void PermuteLayer::optimize() } void PermuteLayer::appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor, - const ir::Shape &loop_shape, size_t size) + const ir::Shape &loop_shape, size_t size, + const ir::PermuteType &permute_type) { size_t distributed_dim = 0; auto src_shape = src_tensor->getShape(); - if (src_tensor->layout() == dst_tensor->layout()) + if (permute_type == ir::PermuteType::COPY) { for (int i = 1; i < src_shape.rank() - 1; ++i) { @@ -165,7 +166,8 @@ void PermuteLayer::appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_te start_coords.set(distributed_dim, start); int end = start + (distributed_dim_val - start) / (thread_count - i); one_thread_loop_shape.dim(distributed_dim) = end - start; - tasks.emplace_back(*src_tensor, *dst_tensor, start_coords, one_thread_loop_shape, size); + tasks.emplace_back(*src_tensor, *dst_tensor, start_coords, one_thread_loop_shape, size, + permute_type); start = end; } assert(tasks.size() >= 1); diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h index 6ae6eb9bd62..59f9cfe8c81 100644 --- a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h +++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h @@ -47,7 +47,8 @@ class PermuteLayer : public onert::exec::IPermuteFunction private: void appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor, - const ir::Shape &loop_shape, size_t size); + const ir::Shape &loop_shape, size_t size, + const ir::PermuteType &permute_type); void runPermuteTasks(backend::ITensor *src, uint8_t *dst_buffer); @@ -56,25 +57,23 @@ class PermuteLayer : public onert::exec::IPermuteFunction using Strides = ir::Coordinates; PermuteWorkerTask(const ITensor &src_tensor, ITensor &dst_tensor, - const ir::Coordinates &start_coords, const ir::Shape &loop_shape, size_t size) + const ir::Coordinates &start_coords, const ir::Shape &loop_shape, size_t size, + const ir::PermuteType &permute_type) : _src_buffer{src_tensor.buffer()}, _dst_buffer{dst_tensor.buffer()}, _src_start_offset{src_tensor.calcOffset(start_coords)}, _dst_start_offset{dst_tensor.calcOffset(start_coords)}, _src_strides{}, _dst_strides{}, - _loop_shape{loop_shape}, _size{size}, _src_layout{src_tensor.layout()}, - _dst_layout{dst_tensor.layout()}, _is_permutation{true} + _loop_shape{loop_shape}, _size{size}, _permute_type{permute_type} { // Set strides setStrides(src_tensor, &_src_strides); setStrides(dst_tensor, &_dst_strides); - - _is_permutation = (_src_layout != _dst_layout && loop_shape.rank() == 4); } // Constructor for a copy PermuteWorkerTask(const uint8_t *src_buffer, uint8_t *dst_buffer, uint32_t src_start_offset, uint32_t dst_start_offset, size_t size) : _src_buffer{src_buffer}, _dst_buffer{dst_buffer}, _src_start_offset{src_start_offset}, _dst_start_offset{dst_start_offset}, _src_strides{0}, _dst_strides{0}, _loop_shape{1}, - _size{size}, _src_layout{}, _dst_layout{}, _is_permutation{false} + _size{size}, _permute_type{ir::PermuteType::COPY} { // DO NOTHING } @@ -90,9 +89,9 @@ class PermuteLayer : public onert::exec::IPermuteFunction size_t dst_offset = _dst_start_offset; assert(static_cast(_loop_shape.rank()) == coords.size()); ir::Coordinates dst_coords = coords; - if (_is_permutation) + if (_permute_type != ir::PermuteType::COPY && _loop_shape.rank() == 4) { - dst_coords = ir::convertCoordinates(coords, _src_layout, _dst_layout); + dst_coords = ir::convertCoordinates(coords, _permute_type); } for (auto i = 0; i < _loop_shape.rank(); ++i) { @@ -136,9 +135,7 @@ class PermuteLayer : public onert::exec::IPermuteFunction Strides _dst_strides; const ir::Shape _loop_shape; const size_t _size; - const ir::Layout _src_layout; - const ir::Layout _dst_layout; - bool _is_permutation; + const ir::PermuteType _permute_type; }; std::unordered_map> _tasks_map; }; diff --git a/runtime/onert/core/src/exec/IPermuteFunction.cc b/runtime/onert/core/src/exec/IPermuteFunction.cc index 10794dba749..95198373c16 100644 --- a/runtime/onert/core/src/exec/IPermuteFunction.cc +++ b/runtime/onert/core/src/exec/IPermuteFunction.cc @@ -51,7 +51,8 @@ inline nnfw::cker::Shape getShape(const backend::ITensor *tensor) // Quantize per element template -void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor) +void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor, + const ir::PermuteType &type) { const auto scale = dst_tensor->data_scale(); const auto zero_point = dst_tensor->data_zero_point(); @@ -60,17 +61,14 @@ void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *d int max_val = std::numeric_limits::max(); auto loop_shape = src_tensor->getShape(); - const auto src_layout = src_tensor->layout(); - const auto dst_layout = dst_tensor->layout(); - const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4; + const bool is_permutation = type != ir::PermuteType::COPY && loop_shape.rank() == 4; ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) { const InputT *input_data = reinterpret_cast(src_tensor->buffer() + src_tensor->calcOffset(coords)); int32_t unclamped = static_cast(round(*input_data / scale)) + zero_point; int32_t clamped = std::min(std::max(unclamped, min_val), max_val); - ir::Coordinates dst_coords = - is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords; + ir::Coordinates dst_coords = is_permutation ? ir::convertCoordinates(coords, type) : coords; OutputT *output_data = reinterpret_cast(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords)); *output_data = clamped; @@ -79,10 +77,11 @@ void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *d // TODO Optimize the case where tensors has the same layout template -void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor) +void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor, + const ir::PermuteType &type) { - if (!src_tensor->has_padding() && !dst_tensor->has_padding() && - src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic()) + if (!src_tensor->has_padding() && !dst_tensor->has_padding() && type == ir::PermuteType::COPY && + !src_tensor->is_dynamic()) { assert(!dst_tensor->is_dynamic()); @@ -94,28 +93,26 @@ void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor) } else { - elementwiseQuantize(src_tensor, dst_tensor); + elementwiseQuantize(src_tensor, dst_tensor, type); } } // Dequantize per element template -void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor) +void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor, + const ir::PermuteType &type) { const auto scale = src_tensor->data_scale(); const auto zero_point = src_tensor->data_zero_point(); auto loop_shape = src_tensor->getShape(); - const auto src_layout = src_tensor->layout(); - const auto dst_layout = dst_tensor->layout(); - const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4; + const bool is_permutation = type != ir::PermuteType::COPY && loop_shape.rank() == 4; ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) { const InputT *input_data = reinterpret_cast(src_tensor->buffer() + src_tensor->calcOffset(coords)); const OutputT result = static_cast(scale * (*input_data - zero_point)); - ir::Coordinates dst_coords = - is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords; + ir::Coordinates dst_coords = is_permutation ? ir::convertCoordinates(coords, type) : coords; OutputT *output_data = reinterpret_cast(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords)); *output_data = result; @@ -124,10 +121,11 @@ void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor // TODO Optimize the case where tensors has the same layout template -void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor) +void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor, + const ir::PermuteType &type) { - if (!src_tensor->has_padding() && !dst_tensor->has_padding() && - src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic()) + if (!src_tensor->has_padding() && !dst_tensor->has_padding() && type == ir::PermuteType::COPY && + !src_tensor->is_dynamic()) { assert(!dst_tensor->is_dynamic()); @@ -139,7 +137,7 @@ void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor } else { - elementwiseDequantize(src_tensor, dst_tensor); + elementwiseDequantize(src_tensor, dst_tensor, type); } } @@ -147,7 +145,7 @@ template ::value && std::is_base_of::value, bool> = true> -void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor) +void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor, const ir::PermuteType &type) { // TODO Support other types if (src_tensor->data_type() == ir::DataType::FLOAT32) @@ -156,17 +154,17 @@ void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor) { case ir::DataType::QUANT_UINT8_ASYMM: { - quantize(src_tensor, dst_tensor); + quantize(src_tensor, dst_tensor, type); break; } case ir::DataType::QUANT_INT8_SYMM: { - quantize(src_tensor, dst_tensor); + quantize(src_tensor, dst_tensor, type); break; } case ir::DataType::QUANT_INT16_SYMM: { - quantize(src_tensor, dst_tensor); + quantize(src_tensor, dst_tensor, type); break; } default: @@ -182,17 +180,17 @@ void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor) { case ir::DataType::QUANT_UINT8_ASYMM: { - dequantize(src_tensor, dst_tensor); + dequantize(src_tensor, dst_tensor, type); break; } case ir::DataType::QUANT_INT8_SYMM: { - dequantize(src_tensor, dst_tensor); + dequantize(src_tensor, dst_tensor, type); break; } case ir::DataType::QUANT_INT16_SYMM: { - dequantize(src_tensor, dst_tensor); + dequantize(src_tensor, dst_tensor, type); break; } default: @@ -256,7 +254,7 @@ void IPermuteFunction::permute(backend::ITensor *src_tensor, backend::ITensor *d assert(src_tensor != dst_tensor); if (underlying_type(src_tensor->data_type()) != underlying_type(dst_tensor->data_type())) { - typeAwareQuantize(src_tensor, dst_tensor); + typeAwareQuantize(src_tensor, dst_tensor, permute_type); return; } diff --git a/runtime/onert/core/src/ir/Coordinates.cc b/runtime/onert/core/src/ir/Coordinates.cc index a02a56567ef..161b706bfd4 100644 --- a/runtime/onert/core/src/ir/Coordinates.cc +++ b/runtime/onert/core/src/ir/Coordinates.cc @@ -23,24 +23,25 @@ namespace onert namespace ir { -Coordinates convertCoordinates(const Coordinates &from_coordinates, Layout from_layout, - Layout to_layout) +Coordinates convertCoordinates(const Coordinates &coords, const PermuteType &type) { - assert(from_coordinates.size() == 4); - Coordinates to{from_coordinates}; - if (from_layout == Layout::NHWC && to_layout == Layout::NCHW) + assert(coords.size() == 4); + Coordinates to{coords}; + if (type == PermuteType::COPY) + return to; + + if (type == PermuteType::NHWC_TO_NCHW) { - to.set(0, from_coordinates[0]); - to.set(1, from_coordinates[3]); - to.set(2, from_coordinates[1]); - to.set(3, from_coordinates[2]); + to.set(1, coords[3]); + to.set(2, coords[1]); + to.set(3, coords[2]); } - else if (from_layout == Layout::NCHW && to_layout == Layout::NHWC) + else { - to.set(0, from_coordinates[0]); - to.set(1, from_coordinates[2]); - to.set(2, from_coordinates[3]); - to.set(3, from_coordinates[1]); + assert(type == PermuteType::NCHW_TO_NHWC); + to.set(1, coords[2]); + to.set(2, coords[3]); + to.set(3, coords[1]); } return to;