Skip to content

Commit

Permalink
[onert] Use constructor paramter to set permute type (#13789)
Browse files Browse the repository at this point in the history
This commit update PermuteLayer constructor to use parameter to set permute type.
By this commit, permute will not use tensor layout any more.

ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh <[email protected]>
Co-authored-by: Jang Jiseob <[email protected]>
  • Loading branch information
hseok-oh and ragmani authored Aug 28, 2024
1 parent 73773e6 commit 109d223
Show file tree
Hide file tree
Showing 10 changed files with 72 additions and 40 deletions.
9 changes: 7 additions & 2 deletions runtime/onert/core/src/backend/builtin/KernelGenerator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,14 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
// Add PermuteLayer
std::vector<ITensor *> output_tensors{getTensor(output_index)};
std::vector<ITensor *> input_tensors{getTensor(input_index)};
std::vector<ir::PermuteType> permute_types;

auto fn =
std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, _external_context);
// Layout in graph is always NHWC, so layout is not changed
for (uint32_t i = 0; i < input_tensors.size(); i++)
permute_types.emplace_back(ir::PermuteType::COPY);

auto fn = std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, permute_types,
_external_context);
_return_fn = std::move(fn);
}

Expand Down
14 changes: 3 additions & 11 deletions runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,26 +31,18 @@ namespace kernel

PermuteLayer::PermuteLayer(const std::vector<ITensor *> &src_tensors,
const std::vector<ITensor *> &dst_tensors,
const std::vector<ir::PermuteType> &types,
const std::shared_ptr<ExternalContext> &external_context)
: _external_context{external_context}, _tasks_map{}
{
assert(src_tensors.size() == dst_tensors.size());
assert(src_tensors.size() == types.size());
_src_tensors = src_tensors;
_dst_tensors = dst_tensors;
_permute_types = types;
_src_tensors_offsets.resize(src_tensors.size());
_dst_tensors_offsets.resize(dst_tensors.size());
_permute_types.resize(src_tensors.size());

// TODO Get from constructor parameter
for (uint32_t i = 0; i < src_tensors.size(); i++)
{
if (src_tensors[i]->layout() == dst_tensors[i]->layout())
_permute_types[i] = ir::PermuteType::COPY;
else if (src_tensors[i]->layout() == ir::Layout::NHWC)
_permute_types[i] = ir::PermuteType::NHWC_TO_NCHW;
else
_permute_types[i] = ir::PermuteType::NCHW_TO_NHWC;
}
}

void PermuteLayer::optimize()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class PermuteLayer : public onert::exec::IPermuteFunction
{
public:
PermuteLayer(const std::vector<ITensor *> &src_tensors, const std::vector<ITensor *> &dst_tensors,
const std::vector<ir::PermuteType> &types,
const std::shared_ptr<ExternalContext> &external_context);

void optimize() override;
Expand Down
10 changes: 8 additions & 2 deletions runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,15 @@ void WhileLayer::run()

std::vector<ITensor *> op_inputs(_input_tensors.begin(), _input_tensors.end());
std::vector<ITensor *> op_outputs(_output_tensors.begin(), _output_tensors.end());
std::vector<ir::PermuteType> permute_types;
// Layout in graph is always NHWC, so layout is not changed
for (uint32_t i = 0; i < op_outputs.size(); i++)
permute_types.emplace_back(ir::PermuteType::COPY);
// Copying body inputs to outputs when the loop body is never executed
if (!getResultCond(cond_output_tensor.get()))
{
PermuteLayer copy_body_inputs_to_op_outputs{op_inputs, op_outputs, _external_context};
PermuteLayer copy_body_inputs_to_op_outputs{op_inputs, op_outputs, permute_types,
_external_context};
copy_body_inputs_to_op_outputs.run();
return;
}
Expand All @@ -105,7 +110,8 @@ void WhileLayer::run()
}

std::vector<ITensor *> body_outputs(temp_outputs.begin(), temp_outputs.end());
PermuteLayer copy_body_outputs_to_op_outputs{body_outputs, op_outputs, _external_context};
PermuteLayer copy_body_outputs_to_op_outputs{body_outputs, op_outputs, permute_types,
_external_context};

const auto body_execute_with_op_inputs = [&]() {
VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,17 @@ void KernelGenerator::visit(const ir::train::operation::Permute &node)

std::vector<ITensor *> output_back_prop_tensors;
std::vector<ITensor *> input_back_prop_tensors;
std::vector<ir::PermuteType> permute_types;

auto input_back_prop_tensor = getBackPropTensor(input_index);
auto output_back_prop_tensor = getBackPropTensor(output_index);
output_back_prop_tensors.emplace_back(output_back_prop_tensor);
input_back_prop_tensors.emplace_back(input_back_prop_tensor);

// Layout in graph is always NHWC, so layout is not changed
for (uint32_t i = 0; i < input_tensors.size(); i++)
permute_types.emplace_back(ir::PermuteType::COPY);

// NOTE The output buffers of IOTensors are not essential for training. If there
// is no output buffer provided by the user, permute is not performed.
bool ignore_forward_in_training = false;
Expand All @@ -77,7 +82,7 @@ void KernelGenerator::visit(const ir::train::operation::Permute &node)
}

auto fn = std::make_unique<kernel::PermuteLayer>(
input_tensors, output_tensors, input_back_prop_tensors, output_back_prop_tensors,
input_tensors, output_tensors, input_back_prop_tensors, output_back_prop_tensors, permute_types,
ignore_forward_in_training, _external_context);

_return_fn = std::move(fn);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,10 @@ PermuteLayer::PermuteLayer(const std::vector<ITensor *> &src_tensors,
const std::vector<ITensor *> &dst_tensors,
const std::vector<ITensor *> &input_back_prop_tensors,
const std::vector<ITensor *> &output_back_prop_tensors,
const std::vector<ir::PermuteType> &types,
bool ignore_forward_in_training,
const std::shared_ptr<ExternalContext> &external_context)
: builtin::kernel::PermuteLayer{src_tensors, dst_tensors, external_context},
: builtin::kernel::PermuteLayer{src_tensors, dst_tensors, types, external_context},
_input_back_prop_tensors{input_back_prop_tensors},
_output_back_prop_tensors{output_back_prop_tensors},
_ignore_forward_in_training{ignore_forward_in_training}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class PermuteLayer : public builtin::kernel::PermuteLayer, public exec::train::I
PermuteLayer(const std::vector<ITensor *> &src_tensors, const std::vector<ITensor *> &dst_tensors,
const std::vector<ITensor *> &input_back_prop_tensors,
const std::vector<ITensor *> &output_back_prop_tensors,
bool ignore_forward_in_training,
const std::vector<ir::PermuteType> &types, bool ignore_forward_in_training,
const std::shared_ptr<ExternalContext> &external_context);

void optimize() override;
Expand Down
17 changes: 4 additions & 13 deletions runtime/onert/core/src/exec/IPermuteFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -252,23 +252,14 @@ class PermuteLayer : public onert::exec::IPermuteFunction
{
public:
PermuteLayer(const std::vector<onert::backend::ITensor *> &inputs,
const std::vector<onert::backend::ITensor *> &outputs)
const std::vector<onert::backend::ITensor *> &outputs,
const std::vector<ir::PermuteType> &types)
{
assert(inputs.size() == outputs.size());
assert(inputs.size() == types.size());
_src_tensors = inputs;
_dst_tensors = outputs;
_permute_types.resize(inputs.size());

// TODO Get from constructor parameter
for (uint32_t i = 0; i < inputs.size(); i++)
{
if (inputs[i]->layout() == outputs[i]->layout())
_permute_types[i] = ir::PermuteType::COPY;
else if (inputs[i]->layout() == ir::Layout::NHWC)
_permute_types[i] = ir::PermuteType::NHWC_TO_NCHW;
else
_permute_types[i] = ir::PermuteType::NCHW_TO_NHWC;
}
_permute_types = types;
}
virtual ~PermuteLayer() {}
void optimize() override {}
Expand Down
28 changes: 23 additions & 5 deletions runtime/onert/core/src/exec/MultiModelExecutors.cc
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ void MultiModelExecutors::createEdgeQuantLayers()

std::vector<backend::ITensor *> inputs;
std::vector<backend::ITensor *> outputs;
std::vector<ir::PermuteType> permute_types;
for (const auto &[from_iodesc, to_list] : _edge_map)
{
if (std::get<ir::ModelIndex>(from_iodesc) == model_index &&
Expand All @@ -212,13 +213,16 @@ void MultiModelExecutors::createEdgeQuantLayers()
auto type_aware_quant_tensor = std::make_unique<EdgeTensor>(to_info, to_layout);
outputs.emplace_back(type_aware_quant_tensor.get());

// No layout change on edge
permute_types.emplace_back(ir::PermuteType::COPY);

_edge_quant_tensors[to_iodesc] = std::move(type_aware_quant_tensor);
}
}
}
}

auto layer = std::make_unique<PermuteLayer>(inputs, outputs);
auto layer = std::make_unique<PermuteLayer>(inputs, outputs, permute_types);
layer->prepare();
_edge_quant_layers[{model_index, subg_index}] = std::move(layer);
}
Expand Down Expand Up @@ -282,6 +286,7 @@ void MultiModelExecutors::createPkgIOQuantLayers(const IODescription &desc)
}
std::vector<backend::ITensor *> src_tensors;
std::vector<backend::ITensor *> dst_tensors;
std::vector<ir::PermuteType> permute_types;
for (const auto &pkg_input : pkg_inputs)
{
const auto &io_index = std::get<ir::IOIndex>(pkg_input);
Expand All @@ -294,19 +299,25 @@ void MultiModelExecutors::createPkgIOQuantLayers(const IODescription &desc)
// Create EdgeTensor for nnpkg input if type is different
const auto &orig_info = executor->inputInfo(io_index.value());
const auto orig_layout = executor->inputLayout(io_index.value());
if (input_desc->info.typeInfo().type() != orig_info.typeInfo().type())
if ((input_desc->info.typeInfo().type() != orig_info.typeInfo().type()) ||
(input_desc->layout == ir::Layout::NCHW))
{
auto pkg_input_edge_tensor = std::make_unique<EdgeTensor>(orig_info, orig_layout);
_pkg_input_quant_tensors[pkg_input] = std::move(pkg_input_edge_tensor);

// Append type-aware quantization layer's inputs/outputs
src_tensors.emplace_back(_pkg_input_tensors[pkg_input].get());
dst_tensors.emplace_back(_pkg_input_quant_tensors[pkg_input].get());

if (input_desc->layout == ir::Layout::NCHW)
permute_types.emplace_back(ir::PermuteType::NCHW_TO_NHWC);
else
permute_types.emplace_back(ir::PermuteType::COPY);
}
}

// Create type-aware quantization layer for nnpkg inputs
auto pkg_input_layer = std::make_unique<PermuteLayer>(src_tensors, dst_tensors);
auto pkg_input_layer = std::make_unique<PermuteLayer>(src_tensors, dst_tensors, permute_types);
pkg_input_layer->prepare();
_pkg_input_quant_layers[{model_index, subg_index}] = std::move(pkg_input_layer);

Expand All @@ -322,6 +333,7 @@ void MultiModelExecutors::createPkgIOQuantLayers(const IODescription &desc)
}
src_tensors.clear();
dst_tensors.clear();
permute_types.clear();
// Create Tensors of nnpkg outputs for type-aware quantization
for (const auto &pkg_output : pkg_outputs)
{
Expand All @@ -335,19 +347,25 @@ void MultiModelExecutors::createPkgIOQuantLayers(const IODescription &desc)
// Create EdgeTensor for nnpkg output if type is different
const auto &orig_info = executor->outputInfo(io_index.value());
const auto orig_layout = executor->outputLayout(io_index.value());
if (output_desc->info.typeInfo().type() != orig_info.typeInfo().type())
if ((output_desc->info.typeInfo().type() != orig_info.typeInfo().type()) ||
(output_desc->layout == ir::Layout::NCHW))
{
auto pkg_output_edge_tensor = std::make_unique<EdgeTensor>(orig_info, orig_layout);
_pkg_output_quant_tensors[pkg_output] = std::move(pkg_output_edge_tensor);

// Append type-aware quantization layer's inputs/outputs
src_tensors.emplace_back(_pkg_output_quant_tensors[pkg_output].get());
dst_tensors.emplace_back(_pkg_output_tensors[pkg_output].get());

if (output_desc->layout == ir::Layout::NCHW)
permute_types.emplace_back(ir::PermuteType::NHWC_TO_NCHW);
else
permute_types.emplace_back(ir::PermuteType::COPY);
}
}

// Create type-aware quantization layer for nnpkg outputs
auto pkg_output_layer = std::make_unique<PermuteLayer>(src_tensors, dst_tensors);
auto pkg_output_layer = std::make_unique<PermuteLayer>(src_tensors, dst_tensors, permute_types);
pkg_output_layer->prepare();
_pkg_output_quant_layers[{model_index, subg_index}] = std::move(pkg_output_layer);
}
Expand Down
21 changes: 17 additions & 4 deletions runtime/onert/core/src/exec/SingleModelExecutors.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,12 @@ void SingleModelExecutors::execute(const ExecutionContext &ctx)
// Vector for input quantization I/O
std::vector<backend::ITensor *> input_tensors;
std::vector<backend::ITensor *> input_qtensors;
std::vector<ir::PermuteType> input_permute_types;

// Vector for output dequantization I/O
std::vector<backend::ITensor *> output_qtensors;
std::vector<backend::ITensor *> output_tensors;
std::vector<ir::PermuteType> output_permute_types;

// Prepare UserTensor and EdgeTensor for input quantization
for (uint32_t i = 0; i < inputs.size(); i++)
Expand All @@ -87,7 +89,8 @@ void SingleModelExecutors::execute(const ExecutionContext &ctx)
auto user_type = desc->info.typeInfo().type();
auto &model_info = entryExecutor()->inputInfo(i).typeInfo();
auto model_type = model_info.type();
if (user_type != model_type && user_type == ir::DataType::FLOAT32)
if ((user_type != model_type && user_type == ir::DataType::FLOAT32) ||
(desc->layout == ir::Layout::NCHW))
{
auto quantized_info = desc->info;
quantized_info.typeInfo(model_info);
Expand All @@ -98,6 +101,10 @@ void SingleModelExecutors::execute(const ExecutionContext &ctx)
input_tensors.push_back(tensorpool.back().get());
input_qtensors.push_back(qtensorpool.back().get());
inputs[i] = qtensorpool.back().get();
if (desc->layout == ir::Layout::NCHW)
input_permute_types.push_back(ir::PermuteType::NCHW_TO_NHWC);
else
input_permute_types.push_back(ir::PermuteType::COPY);
}
else
inputs[i] = tensorpool.back().get();
Expand All @@ -118,7 +125,8 @@ void SingleModelExecutors::execute(const ExecutionContext &ctx)
auto user_type = desc->info.typeInfo().type();
auto &model_info = entryExecutor()->outputInfo(i).typeInfo();
auto model_type = model_info.type();
if (user_type != model_type && user_type == ir::DataType::FLOAT32)
if ((user_type != model_type && user_type == ir::DataType::FLOAT32) ||
(desc->layout == ir::Layout::NCHW))
{
auto quantized_info = desc->info;
quantized_info.typeInfo(model_info);
Expand All @@ -129,6 +137,10 @@ void SingleModelExecutors::execute(const ExecutionContext &ctx)
output_qtensors.push_back(qtensorpool.back().get());
output_tensors.push_back(tensorpool.back().get());
outputs[i] = qtensorpool.back().get();
if (desc->layout == ir::Layout::NCHW)
output_permute_types.push_back(ir::PermuteType::NHWC_TO_NCHW);
else
output_permute_types.push_back(ir::PermuteType::COPY);
}
else
outputs[i] = tensorpool.back().get();
Expand All @@ -137,7 +149,7 @@ void SingleModelExecutors::execute(const ExecutionContext &ctx)
// Run quantization
if (input_tensors.size() > 0)
{
auto input_quantize_layer = PermuteLayer(input_tensors, input_qtensors);
auto input_quantize_layer = PermuteLayer(input_tensors, input_qtensors, input_permute_types);
input_quantize_layer.prepare();
input_quantize_layer.run();
}
Expand All @@ -148,7 +160,8 @@ void SingleModelExecutors::execute(const ExecutionContext &ctx)
// Run dequantization
if (output_tensors.size() != 0)
{
auto output_dequantize_layer = PermuteLayer(output_qtensors, output_tensors);
auto output_dequantize_layer =
PermuteLayer(output_qtensors, output_tensors, output_permute_types);
output_dequantize_layer.prepare();
output_dequantize_layer.run();
}
Expand Down

0 comments on commit 109d223

Please sign in to comment.