Skip to content

Commit

Permalink
[onert] Use permute type on permute task and coordinate conversion (#…
Browse files Browse the repository at this point in the history
…13805)

This commit changes appendPermuteTasks() and convertCoordinates() function to use permute type.

ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh <[email protected]>
  • Loading branch information
hseok-oh authored Aug 28, 2024
1 parent 6b763ab commit 9b6e7d9
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 61 deletions.
9 changes: 7 additions & 2 deletions runtime/onert/core/include/ir/Coordinates.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,13 @@ class Coordinates final
std::vector<int32_t> _coordinates;
};

Coordinates convertCoordinates(const Coordinates &from_coordinates, Layout from_layout,
Layout to_layout);
/**
* @brief Convert coordinate for layout change
* @param[in] coord Coordinates to be converted
* @param[in] type Permutation type to be applied to coordinates
* @return Converted coordinates based on permutation type
*/
Coordinates convertCoordinates(const Coordinates &coords, const PermuteType &type);

} // namespace ir
} // namespace onert
Expand Down
12 changes: 7 additions & 5 deletions runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ void PermuteLayer::optimize()
const auto copy_len = loop_shape.dim(copy_axis) * data_size;
loop_shape.dim(copy_axis) = 1;

appendPermuteTasks(src, dst, loop_shape, copy_len);
appendPermuteTasks(src, dst, loop_shape, copy_len, permute_type);
}
}
else
Expand All @@ -121,7 +121,7 @@ void PermuteLayer::optimize()
const auto loop_shape = src_tensor.getShape();
const auto copy_len = data_size;

appendPermuteTasks(src, dst, loop_shape, copy_len);
appendPermuteTasks(src, dst, loop_shape, copy_len, permute_type);
}
});
};
Expand All @@ -136,11 +136,12 @@ void PermuteLayer::optimize()
}

void PermuteLayer::appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor,
const ir::Shape &loop_shape, size_t size)
const ir::Shape &loop_shape, size_t size,
const ir::PermuteType &permute_type)
{
size_t distributed_dim = 0;
auto src_shape = src_tensor->getShape();
if (src_tensor->layout() == dst_tensor->layout())
if (permute_type == ir::PermuteType::COPY)
{
for (int i = 1; i < src_shape.rank() - 1; ++i)
{
Expand All @@ -165,7 +166,8 @@ void PermuteLayer::appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_te
start_coords.set(distributed_dim, start);
int end = start + (distributed_dim_val - start) / (thread_count - i);
one_thread_loop_shape.dim(distributed_dim) = end - start;
tasks.emplace_back(*src_tensor, *dst_tensor, start_coords, one_thread_loop_shape, size);
tasks.emplace_back(*src_tensor, *dst_tensor, start_coords, one_thread_loop_shape, size,
permute_type);
start = end;
}
assert(tasks.size() >= 1);
Expand Down
21 changes: 9 additions & 12 deletions runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ class PermuteLayer : public onert::exec::IPermuteFunction

private:
void appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor,
const ir::Shape &loop_shape, size_t size);
const ir::Shape &loop_shape, size_t size,
const ir::PermuteType &permute_type);

void runPermuteTasks(backend::ITensor *src, uint8_t *dst_buffer);

Expand All @@ -56,25 +57,23 @@ class PermuteLayer : public onert::exec::IPermuteFunction
using Strides = ir::Coordinates;

PermuteWorkerTask(const ITensor &src_tensor, ITensor &dst_tensor,
const ir::Coordinates &start_coords, const ir::Shape &loop_shape, size_t size)
const ir::Coordinates &start_coords, const ir::Shape &loop_shape, size_t size,
const ir::PermuteType &permute_type)
: _src_buffer{src_tensor.buffer()}, _dst_buffer{dst_tensor.buffer()},
_src_start_offset{src_tensor.calcOffset(start_coords)},
_dst_start_offset{dst_tensor.calcOffset(start_coords)}, _src_strides{}, _dst_strides{},
_loop_shape{loop_shape}, _size{size}, _src_layout{src_tensor.layout()},
_dst_layout{dst_tensor.layout()}, _is_permutation{true}
_loop_shape{loop_shape}, _size{size}, _permute_type{permute_type}
{
// Set strides
setStrides(src_tensor, &_src_strides);
setStrides(dst_tensor, &_dst_strides);

_is_permutation = (_src_layout != _dst_layout && loop_shape.rank() == 4);
}
// Constructor for a copy
PermuteWorkerTask(const uint8_t *src_buffer, uint8_t *dst_buffer, uint32_t src_start_offset,
uint32_t dst_start_offset, size_t size)
: _src_buffer{src_buffer}, _dst_buffer{dst_buffer}, _src_start_offset{src_start_offset},
_dst_start_offset{dst_start_offset}, _src_strides{0}, _dst_strides{0}, _loop_shape{1},
_size{size}, _src_layout{}, _dst_layout{}, _is_permutation{false}
_size{size}, _permute_type{ir::PermuteType::COPY}
{
// DO NOTHING
}
Expand All @@ -90,9 +89,9 @@ class PermuteLayer : public onert::exec::IPermuteFunction
size_t dst_offset = _dst_start_offset;
assert(static_cast<size_t>(_loop_shape.rank()) == coords.size());
ir::Coordinates dst_coords = coords;
if (_is_permutation)
if (_permute_type != ir::PermuteType::COPY && _loop_shape.rank() == 4)
{
dst_coords = ir::convertCoordinates(coords, _src_layout, _dst_layout);
dst_coords = ir::convertCoordinates(coords, _permute_type);
}
for (auto i = 0; i < _loop_shape.rank(); ++i)
{
Expand Down Expand Up @@ -136,9 +135,7 @@ class PermuteLayer : public onert::exec::IPermuteFunction
Strides _dst_strides;
const ir::Shape _loop_shape;
const size_t _size;
const ir::Layout _src_layout;
const ir::Layout _dst_layout;
bool _is_permutation;
const ir::PermuteType _permute_type;
};
std::unordered_map<const ITensor *, std::vector<PermuteWorkerTask>> _tasks_map;
};
Expand Down
54 changes: 26 additions & 28 deletions runtime/onert/core/src/exec/IPermuteFunction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ inline nnfw::cker::Shape getShape(const backend::ITensor *tensor)

// Quantize per element
template <typename InputT, typename OutputT>
void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
const ir::PermuteType &type)
{
const auto scale = dst_tensor->data_scale();
const auto zero_point = dst_tensor->data_zero_point();
Expand All @@ -60,17 +61,14 @@ void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *d
int max_val = std::numeric_limits<OutputT>::max();

auto loop_shape = src_tensor->getShape();
const auto src_layout = src_tensor->layout();
const auto dst_layout = dst_tensor->layout();
const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4;
const bool is_permutation = type != ir::PermuteType::COPY && loop_shape.rank() == 4;
ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
const InputT *input_data =
reinterpret_cast<const InputT *>(src_tensor->buffer() + src_tensor->calcOffset(coords));
int32_t unclamped = static_cast<int32_t>(round(*input_data / scale)) + zero_point;
int32_t clamped = std::min(std::max(unclamped, min_val), max_val);

ir::Coordinates dst_coords =
is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords;
ir::Coordinates dst_coords = is_permutation ? ir::convertCoordinates(coords, type) : coords;
OutputT *output_data =
reinterpret_cast<OutputT *>(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords));
*output_data = clamped;
Expand All @@ -79,10 +77,11 @@ void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *d

// TODO Optimize the case where tensors has the same layout
template <typename InputT, typename OutputT>
void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
const ir::PermuteType &type)
{
if (!src_tensor->has_padding() && !dst_tensor->has_padding() &&
src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic())
if (!src_tensor->has_padding() && !dst_tensor->has_padding() && type == ir::PermuteType::COPY &&
!src_tensor->is_dynamic())
{
assert(!dst_tensor->is_dynamic());

Expand All @@ -94,28 +93,26 @@ void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
}
else
{
elementwiseQuantize<InputT, OutputT>(src_tensor, dst_tensor);
elementwiseQuantize<InputT, OutputT>(src_tensor, dst_tensor, type);
}
}

// Dequantize per element
template <typename InputT, typename OutputT>
void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
const ir::PermuteType &type)
{
const auto scale = src_tensor->data_scale();
const auto zero_point = src_tensor->data_zero_point();

auto loop_shape = src_tensor->getShape();
const auto src_layout = src_tensor->layout();
const auto dst_layout = dst_tensor->layout();
const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4;
const bool is_permutation = type != ir::PermuteType::COPY && loop_shape.rank() == 4;
ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
const InputT *input_data =
reinterpret_cast<const InputT *>(src_tensor->buffer() + src_tensor->calcOffset(coords));
const OutputT result = static_cast<OutputT>(scale * (*input_data - zero_point));

ir::Coordinates dst_coords =
is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords;
ir::Coordinates dst_coords = is_permutation ? ir::convertCoordinates(coords, type) : coords;
OutputT *output_data =
reinterpret_cast<OutputT *>(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords));
*output_data = result;
Expand All @@ -124,10 +121,11 @@ void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor

// TODO Optimize the case where tensors has the same layout
template <typename InputT, typename OutputT>
void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
const ir::PermuteType &type)
{
if (!src_tensor->has_padding() && !dst_tensor->has_padding() &&
src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic())
if (!src_tensor->has_padding() && !dst_tensor->has_padding() && type == ir::PermuteType::COPY &&
!src_tensor->is_dynamic())
{
assert(!dst_tensor->is_dynamic());

Expand All @@ -139,15 +137,15 @@ void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor
}
else
{
elementwiseDequantize<InputT, OutputT>(src_tensor, dst_tensor);
elementwiseDequantize<InputT, OutputT>(src_tensor, dst_tensor, type);
}
}

template <typename SRC_T, typename DST_T,
std::enable_if_t<std::is_base_of<backend::ITensor, SRC_T>::value &&
std::is_base_of<backend::ITensor, DST_T>::value,
bool> = true>
void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor)
void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor, const ir::PermuteType &type)
{
// TODO Support other types
if (src_tensor->data_type() == ir::DataType::FLOAT32)
Expand All @@ -156,17 +154,17 @@ void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor)
{
case ir::DataType::QUANT_UINT8_ASYMM:
{
quantize<float, uint8_t>(src_tensor, dst_tensor);
quantize<float, uint8_t>(src_tensor, dst_tensor, type);
break;
}
case ir::DataType::QUANT_INT8_SYMM:
{
quantize<float, int8_t>(src_tensor, dst_tensor);
quantize<float, int8_t>(src_tensor, dst_tensor, type);
break;
}
case ir::DataType::QUANT_INT16_SYMM:
{
quantize<float, int16_t>(src_tensor, dst_tensor);
quantize<float, int16_t>(src_tensor, dst_tensor, type);
break;
}
default:
Expand All @@ -182,17 +180,17 @@ void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor)
{
case ir::DataType::QUANT_UINT8_ASYMM:
{
dequantize<uint8_t, float>(src_tensor, dst_tensor);
dequantize<uint8_t, float>(src_tensor, dst_tensor, type);
break;
}
case ir::DataType::QUANT_INT8_SYMM:
{
dequantize<int8_t, float>(src_tensor, dst_tensor);
dequantize<int8_t, float>(src_tensor, dst_tensor, type);
break;
}
case ir::DataType::QUANT_INT16_SYMM:
{
dequantize<int16_t, float>(src_tensor, dst_tensor);
dequantize<int16_t, float>(src_tensor, dst_tensor, type);
break;
}
default:
Expand Down Expand Up @@ -256,7 +254,7 @@ void IPermuteFunction::permute(backend::ITensor *src_tensor, backend::ITensor *d
assert(src_tensor != dst_tensor);
if (underlying_type(src_tensor->data_type()) != underlying_type(dst_tensor->data_type()))
{
typeAwareQuantize(src_tensor, dst_tensor);
typeAwareQuantize(src_tensor, dst_tensor, permute_type);
return;
}

Expand Down
29 changes: 15 additions & 14 deletions runtime/onert/core/src/ir/Coordinates.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,25 @@ namespace onert
namespace ir
{

Coordinates convertCoordinates(const Coordinates &from_coordinates, Layout from_layout,
Layout to_layout)
Coordinates convertCoordinates(const Coordinates &coords, const PermuteType &type)
{
assert(from_coordinates.size() == 4);
Coordinates to{from_coordinates};
if (from_layout == Layout::NHWC && to_layout == Layout::NCHW)
assert(coords.size() == 4);
Coordinates to{coords};
if (type == PermuteType::COPY)
return to;

if (type == PermuteType::NHWC_TO_NCHW)
{
to.set(0, from_coordinates[0]);
to.set(1, from_coordinates[3]);
to.set(2, from_coordinates[1]);
to.set(3, from_coordinates[2]);
to.set(1, coords[3]);
to.set(2, coords[1]);
to.set(3, coords[2]);
}
else if (from_layout == Layout::NCHW && to_layout == Layout::NHWC)
else
{
to.set(0, from_coordinates[0]);
to.set(1, from_coordinates[2]);
to.set(2, from_coordinates[3]);
to.set(3, from_coordinates[1]);
assert(type == PermuteType::NCHW_TO_NHWC);
to.set(1, coords[2]);
to.set(2, coords[3]);
to.set(3, coords[1]);
}

return to;
Expand Down

0 comments on commit 9b6e7d9

Please sign in to comment.