Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[onert] Use permute type on permute task and coordinate conversion #13805

Merged
merged 1 commit into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions runtime/onert/core/include/ir/Coordinates.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,13 @@ class Coordinates final
std::vector<int32_t> _coordinates;
};

Coordinates convertCoordinates(const Coordinates &from_coordinates, Layout from_layout,
Layout to_layout);
/**
* @brief Convert coordinate for layout change
* @param[in] coord Coordinates to be converted
* @param[in] type Permutation type to be applied to coordinates
* @return Converted coordinates based on permutation type
*/
Coordinates convertCoordinates(const Coordinates &coords, const PermuteType &type);

} // namespace ir
} // namespace onert
Expand Down
12 changes: 7 additions & 5 deletions runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ void PermuteLayer::optimize()
const auto copy_len = loop_shape.dim(copy_axis) * data_size;
loop_shape.dim(copy_axis) = 1;

appendPermuteTasks(src, dst, loop_shape, copy_len);
appendPermuteTasks(src, dst, loop_shape, copy_len, permute_type);
}
}
else
Expand All @@ -121,7 +121,7 @@ void PermuteLayer::optimize()
const auto loop_shape = src_tensor.getShape();
const auto copy_len = data_size;

appendPermuteTasks(src, dst, loop_shape, copy_len);
appendPermuteTasks(src, dst, loop_shape, copy_len, permute_type);
}
});
};
Expand All @@ -136,11 +136,12 @@ void PermuteLayer::optimize()
}

void PermuteLayer::appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor,
const ir::Shape &loop_shape, size_t size)
const ir::Shape &loop_shape, size_t size,
const ir::PermuteType &permute_type)
{
size_t distributed_dim = 0;
auto src_shape = src_tensor->getShape();
if (src_tensor->layout() == dst_tensor->layout())
if (permute_type == ir::PermuteType::COPY)
{
for (int i = 1; i < src_shape.rank() - 1; ++i)
{
Expand All @@ -165,7 +166,8 @@ void PermuteLayer::appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_te
start_coords.set(distributed_dim, start);
int end = start + (distributed_dim_val - start) / (thread_count - i);
one_thread_loop_shape.dim(distributed_dim) = end - start;
tasks.emplace_back(*src_tensor, *dst_tensor, start_coords, one_thread_loop_shape, size);
tasks.emplace_back(*src_tensor, *dst_tensor, start_coords, one_thread_loop_shape, size,
permute_type);
start = end;
}
assert(tasks.size() >= 1);
Expand Down
21 changes: 9 additions & 12 deletions runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ class PermuteLayer : public onert::exec::IPermuteFunction

private:
void appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor,
const ir::Shape &loop_shape, size_t size);
const ir::Shape &loop_shape, size_t size,
const ir::PermuteType &permute_type);

void runPermuteTasks(backend::ITensor *src, uint8_t *dst_buffer);

Expand All @@ -56,25 +57,23 @@ class PermuteLayer : public onert::exec::IPermuteFunction
using Strides = ir::Coordinates;

PermuteWorkerTask(const ITensor &src_tensor, ITensor &dst_tensor,
const ir::Coordinates &start_coords, const ir::Shape &loop_shape, size_t size)
const ir::Coordinates &start_coords, const ir::Shape &loop_shape, size_t size,
const ir::PermuteType &permute_type)
: _src_buffer{src_tensor.buffer()}, _dst_buffer{dst_tensor.buffer()},
_src_start_offset{src_tensor.calcOffset(start_coords)},
_dst_start_offset{dst_tensor.calcOffset(start_coords)}, _src_strides{}, _dst_strides{},
_loop_shape{loop_shape}, _size{size}, _src_layout{src_tensor.layout()},
_dst_layout{dst_tensor.layout()}, _is_permutation{true}
_loop_shape{loop_shape}, _size{size}, _permute_type{permute_type}
{
// Set strides
setStrides(src_tensor, &_src_strides);
setStrides(dst_tensor, &_dst_strides);

_is_permutation = (_src_layout != _dst_layout && loop_shape.rank() == 4);
}
// Constructor for a copy
PermuteWorkerTask(const uint8_t *src_buffer, uint8_t *dst_buffer, uint32_t src_start_offset,
uint32_t dst_start_offset, size_t size)
: _src_buffer{src_buffer}, _dst_buffer{dst_buffer}, _src_start_offset{src_start_offset},
_dst_start_offset{dst_start_offset}, _src_strides{0}, _dst_strides{0}, _loop_shape{1},
_size{size}, _src_layout{}, _dst_layout{}, _is_permutation{false}
_size{size}, _permute_type{ir::PermuteType::COPY}
{
// DO NOTHING
}
Expand All @@ -90,9 +89,9 @@ class PermuteLayer : public onert::exec::IPermuteFunction
size_t dst_offset = _dst_start_offset;
assert(static_cast<size_t>(_loop_shape.rank()) == coords.size());
ir::Coordinates dst_coords = coords;
if (_is_permutation)
if (_permute_type != ir::PermuteType::COPY && _loop_shape.rank() == 4)
{
dst_coords = ir::convertCoordinates(coords, _src_layout, _dst_layout);
dst_coords = ir::convertCoordinates(coords, _permute_type);
}
for (auto i = 0; i < _loop_shape.rank(); ++i)
{
Expand Down Expand Up @@ -136,9 +135,7 @@ class PermuteLayer : public onert::exec::IPermuteFunction
Strides _dst_strides;
const ir::Shape _loop_shape;
const size_t _size;
const ir::Layout _src_layout;
const ir::Layout _dst_layout;
bool _is_permutation;
const ir::PermuteType _permute_type;
};
std::unordered_map<const ITensor *, std::vector<PermuteWorkerTask>> _tasks_map;
};
Expand Down
54 changes: 26 additions & 28 deletions runtime/onert/core/src/exec/IPermuteFunction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ inline nnfw::cker::Shape getShape(const backend::ITensor *tensor)

// Quantize per element
template <typename InputT, typename OutputT>
void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
const ir::PermuteType &type)
{
const auto scale = dst_tensor->data_scale();
const auto zero_point = dst_tensor->data_zero_point();
Expand All @@ -60,17 +61,14 @@ void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *d
int max_val = std::numeric_limits<OutputT>::max();

auto loop_shape = src_tensor->getShape();
const auto src_layout = src_tensor->layout();
const auto dst_layout = dst_tensor->layout();
const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4;
const bool is_permutation = type != ir::PermuteType::COPY && loop_shape.rank() == 4;
ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
const InputT *input_data =
reinterpret_cast<const InputT *>(src_tensor->buffer() + src_tensor->calcOffset(coords));
int32_t unclamped = static_cast<int32_t>(round(*input_data / scale)) + zero_point;
int32_t clamped = std::min(std::max(unclamped, min_val), max_val);

ir::Coordinates dst_coords =
is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords;
ir::Coordinates dst_coords = is_permutation ? ir::convertCoordinates(coords, type) : coords;
OutputT *output_data =
reinterpret_cast<OutputT *>(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords));
*output_data = clamped;
Expand All @@ -79,10 +77,11 @@ void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *d

// TODO Optimize the case where tensors has the same layout
template <typename InputT, typename OutputT>
void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
const ir::PermuteType &type)
{
if (!src_tensor->has_padding() && !dst_tensor->has_padding() &&
src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic())
if (!src_tensor->has_padding() && !dst_tensor->has_padding() && type == ir::PermuteType::COPY &&
!src_tensor->is_dynamic())
{
assert(!dst_tensor->is_dynamic());

Expand All @@ -94,28 +93,26 @@ void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
}
else
{
elementwiseQuantize<InputT, OutputT>(src_tensor, dst_tensor);
elementwiseQuantize<InputT, OutputT>(src_tensor, dst_tensor, type);
}
}

// Dequantize per element
template <typename InputT, typename OutputT>
void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
const ir::PermuteType &type)
{
const auto scale = src_tensor->data_scale();
const auto zero_point = src_tensor->data_zero_point();

auto loop_shape = src_tensor->getShape();
const auto src_layout = src_tensor->layout();
const auto dst_layout = dst_tensor->layout();
const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4;
const bool is_permutation = type != ir::PermuteType::COPY && loop_shape.rank() == 4;
ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
const InputT *input_data =
reinterpret_cast<const InputT *>(src_tensor->buffer() + src_tensor->calcOffset(coords));
const OutputT result = static_cast<OutputT>(scale * (*input_data - zero_point));

ir::Coordinates dst_coords =
is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords;
ir::Coordinates dst_coords = is_permutation ? ir::convertCoordinates(coords, type) : coords;
OutputT *output_data =
reinterpret_cast<OutputT *>(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords));
*output_data = result;
Expand All @@ -124,10 +121,11 @@ void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor

// TODO Optimize the case where tensors has the same layout
template <typename InputT, typename OutputT>
void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
const ir::PermuteType &type)
{
if (!src_tensor->has_padding() && !dst_tensor->has_padding() &&
src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic())
if (!src_tensor->has_padding() && !dst_tensor->has_padding() && type == ir::PermuteType::COPY &&
!src_tensor->is_dynamic())
{
assert(!dst_tensor->is_dynamic());

Expand All @@ -139,15 +137,15 @@ void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor
}
else
{
elementwiseDequantize<InputT, OutputT>(src_tensor, dst_tensor);
elementwiseDequantize<InputT, OutputT>(src_tensor, dst_tensor, type);
}
}

template <typename SRC_T, typename DST_T,
std::enable_if_t<std::is_base_of<backend::ITensor, SRC_T>::value &&
std::is_base_of<backend::ITensor, DST_T>::value,
bool> = true>
void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor)
void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor, const ir::PermuteType &type)
{
// TODO Support other types
if (src_tensor->data_type() == ir::DataType::FLOAT32)
Expand All @@ -156,17 +154,17 @@ void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor)
{
case ir::DataType::QUANT_UINT8_ASYMM:
{
quantize<float, uint8_t>(src_tensor, dst_tensor);
quantize<float, uint8_t>(src_tensor, dst_tensor, type);
break;
}
case ir::DataType::QUANT_INT8_SYMM:
{
quantize<float, int8_t>(src_tensor, dst_tensor);
quantize<float, int8_t>(src_tensor, dst_tensor, type);
break;
}
case ir::DataType::QUANT_INT16_SYMM:
{
quantize<float, int16_t>(src_tensor, dst_tensor);
quantize<float, int16_t>(src_tensor, dst_tensor, type);
break;
}
default:
Expand All @@ -182,17 +180,17 @@ void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor)
{
case ir::DataType::QUANT_UINT8_ASYMM:
{
dequantize<uint8_t, float>(src_tensor, dst_tensor);
dequantize<uint8_t, float>(src_tensor, dst_tensor, type);
break;
}
case ir::DataType::QUANT_INT8_SYMM:
{
dequantize<int8_t, float>(src_tensor, dst_tensor);
dequantize<int8_t, float>(src_tensor, dst_tensor, type);
break;
}
case ir::DataType::QUANT_INT16_SYMM:
{
dequantize<int16_t, float>(src_tensor, dst_tensor);
dequantize<int16_t, float>(src_tensor, dst_tensor, type);
break;
}
default:
Expand Down Expand Up @@ -256,7 +254,7 @@ void IPermuteFunction::permute(backend::ITensor *src_tensor, backend::ITensor *d
assert(src_tensor != dst_tensor);
if (underlying_type(src_tensor->data_type()) != underlying_type(dst_tensor->data_type()))
{
typeAwareQuantize(src_tensor, dst_tensor);
typeAwareQuantize(src_tensor, dst_tensor, permute_type);
return;
}

Expand Down
29 changes: 15 additions & 14 deletions runtime/onert/core/src/ir/Coordinates.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,25 @@ namespace onert
namespace ir
{

Coordinates convertCoordinates(const Coordinates &from_coordinates, Layout from_layout,
Layout to_layout)
Coordinates convertCoordinates(const Coordinates &coords, const PermuteType &type)
{
assert(from_coordinates.size() == 4);
Coordinates to{from_coordinates};
if (from_layout == Layout::NHWC && to_layout == Layout::NCHW)
assert(coords.size() == 4);
Coordinates to{coords};
if (type == PermuteType::COPY)
return to;

if (type == PermuteType::NHWC_TO_NCHW)
{
to.set(0, from_coordinates[0]);
to.set(1, from_coordinates[3]);
to.set(2, from_coordinates[1]);
to.set(3, from_coordinates[2]);
to.set(1, coords[3]);
to.set(2, coords[1]);
to.set(3, coords[2]);
}
else if (from_layout == Layout::NCHW && to_layout == Layout::NHWC)
else
{
to.set(0, from_coordinates[0]);
to.set(1, from_coordinates[2]);
to.set(2, from_coordinates[3]);
to.set(3, from_coordinates[1]);
assert(type == PermuteType::NCHW_TO_NHWC);
to.set(1, coords[2]);
to.set(2, coords[3]);
to.set(3, coords[1]);
}

return to;
Expand Down