From eb5ee58469d73a1406bb1c4b31657ee5db4d8c86 Mon Sep 17 00:00:00 2001 From: Vyacheslav Bazhenov Date: Tue, 9 Jul 2024 15:31:42 +0300 Subject: [PATCH] [onert-micro] Introduce Compressed Conv2D kernel. This commit introduces Conv2D kernel with compressed weights. ONE-DCO-1.0-Signed-off-by: Vyacheslav Bazhenov --- onert-micro/onert-micro/CMakeLists.txt | 2 +- .../onert-micro/include/core/OMKernelData.h | 1 + .../include/pal/mcu/OMHuffmanTranscoder.h | 478 ++++++++++++++++++ .../onert-micro/include/pal/mcu/PALConv2D.h | 66 ++- .../test_models/conv2d/QuantConv2DKernel.h | 103 ++-- .../src/execute/kernels/Conv2D.cpp | 21 +- res/CircleSchema/0.6/circle_schema.fbs | 7 + 7 files changed, 596 insertions(+), 82 deletions(-) create mode 100644 onert-micro/onert-micro/include/pal/mcu/OMHuffmanTranscoder.h diff --git a/onert-micro/onert-micro/CMakeLists.txt b/onert-micro/onert-micro/CMakeLists.txt index a48ad1628fe..80ef8c02ebc 100644 --- a/onert-micro/onert-micro/CMakeLists.txt +++ b/onert-micro/onert-micro/CMakeLists.txt @@ -47,7 +47,7 @@ endif() # TODO move it to specific cmake for platforms add_compile_options(-fno-exceptions) -add_compile_options(-Os) +add_compile_options(-O0) # AFAIK, this will enable leak sanitizer, too if(ENABLE_SANITIZER) diff --git a/onert-micro/onert-micro/include/core/OMKernelData.h b/onert-micro/onert-micro/include/core/OMKernelData.h index 784f68e5053..4f0a9241672 100644 --- a/onert-micro/onert-micro/include/core/OMKernelData.h +++ b/onert-micro/onert-micro/include/core/OMKernelData.h @@ -153,6 +153,7 @@ struct ConvQuant int32_t quantized_activation_min; int32_t quantized_activation_max; int32_t depth_multiplier; + int32_t compressed_weight_size; std::vector per_channel_output_multiplier; std::vector per_channel_output_shift; }; diff --git a/onert-micro/onert-micro/include/pal/mcu/OMHuffmanTranscoder.h b/onert-micro/onert-micro/include/pal/mcu/OMHuffmanTranscoder.h new file mode 100644 index 00000000000..8d9eac61479 --- /dev/null +++ b/onert-micro/onert-micro/include/pal/mcu/OMHuffmanTranscoder.h @@ -0,0 +1,478 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ONERT_MICRO_CORE_OM_HUFFMAN_TRANSCODER_H +#define ONERT_MICRO_CORE_OM_HUFFMAN_TRANSCODER_H + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace onert_micro +{ +namespace core +{ +template struct Node +{ + Node *p_left = nullptr; + Node *p_right = nullptr; + T data; + unsigned int freq; +}; + +template struct CompareNodes +{ + bool operator()(Node *l, Node *r) { return l->freq > r->freq; } +}; + +template class HuffmanTranscoder +{ +private: + Node *root = nullptr; + std::unordered_map huffmanCode; + std::vector encoded_bitset{}; + std::size_t nodes_count = 0; + +private: + Node *allocateNode(T data, unsigned int freq, Node *p_left, Node *p_right) + { + Node *node = new Node; + node->data = data; + node->freq = freq; + node->p_left = p_left; + node->p_right = p_right; + nodes_count++; + return node; + } + + std::unordered_map calculate_frequency_map(const std::vector &input) + { + std::unordered_map out_map; + for (auto &item : input) + out_map[item] = out_map.find(item) != out_map.end() ? out_map[item] + 1 : 1; + return out_map; + } + + std::string exportHuffmanTreeToString(Node *node) + { + if (node == nullptr) + return ""; + if (!node->p_left && !node->p_right) + { + return "0" + std::bitset(node->data).to_string(); + } + std::string tmp = "1"; + tmp += exportHuffmanTreeToString(node->p_left); + tmp += exportHuffmanTreeToString(node->p_right); + return tmp; + } + + Node *importHuffmanTreeFromBoolVec(std::vector &vec, size_t &index) + { + if (vec.empty()) + return nullptr; + if (vec[index]) + { + index++; + Node *p_left = importHuffmanTreeFromBoolVec(vec, index); + Node *p_right = importHuffmanTreeFromBoolVec(vec, index); + return allocateNode(0, 0, p_left, p_right); + } + else if (vec[index] == false) + { + index++; + T tmp = 0; + for (int i = 0; i < sizeof(T) * CHAR_BIT; ++i) + { + if (vec[index++]) + tmp |= (1 << (sizeof(T) * CHAR_BIT - 1)) >> i; + } + + return allocateNode(tmp, 0, nullptr, nullptr); + } + } + + Node *importHuffmanTreeFromString(std::string &str) + { + + if (str.substr(0, 1) == "1") + { + str = str.substr(1); + Node *p_left = importHuffmanTreeFromString(str); + Node *p_right = importHuffmanTreeFromString(str); + return allocateNode(0, 0, p_left, p_right); + } + else if (str.substr(0, 1) == "0") + { + str = str.substr(1); + std::bitset tmp(str.substr(0, sizeof(T) * CHAR_BIT)); + str = str.substr(sizeof(T) * CHAR_BIT); + return allocateNode(static_cast(tmp.to_ullong()), 0, nullptr, nullptr); + } + } + + void buildHuffmanTable(Node *node, const std::string str = "") + { + if (node == nullptr) + return; + + if (!node->p_left && !node->p_right) + { + huffmanCode[node->data] = str; + } + + buildHuffmanTable(node->p_left, str + "0"); + buildHuffmanTable(node->p_right, str + "1"); + } + + void decode(Node *node, std::string &str, std::vector &out_vec, size_t &index) + { + if (node == nullptr) + { + return; + } + + if (!node->p_left && !node->p_right) + { + out_vec.push_back(node->data); + return; + } + + if (str.size() == index) + return; + if (str[index] == '0') + { + // str = str.substr(0, str.size() - 1); + decode(node->p_left, str, out_vec, ++index); + } + else + { + // str = str.substr(0, str.size() - 1); + decode(node->p_right, str, out_vec, ++index); + } + } + + void buildHuffmanTree(const std::vector &input) + { + auto freq_map = calculate_frequency_map(input); + + std::priority_queue *, std::vector *>, CompareNodes> pq; + + for (auto &item : freq_map) + { + pq.push(allocateNode(item.first, item.second, nullptr, nullptr)); + } + + while (pq.size() != 1) + { + Node *left = pq.top(); + pq.pop(); + Node *right = pq.top(); + pq.pop(); + + unsigned int sum = left->freq + right->freq; + pq.push(allocateNode(0, sum, left, right)); + } + + root = pq.top(); + } + + struct EncodedTreeAndData + { + std::vector tree_vec{}; + std::vector data_vec{}; + }; + + std::vector packEncodedDataToArray(const std::string &tree_str, + const std::string &encoded_data) + { + constexpr auto kTreeSizeBytesN = sizeof(size_t); + constexpr auto kDataSizeBytesN = sizeof(size_t); + std::vector arr; + const size_t kTreeSizeInBits = tree_str.size(); + const size_t kDataSizeInBits = encoded_data.size(); + for (int i = 0; i < sizeof(size_t); ++i) + { + arr.push_back( + *(static_cast(static_cast(&kTreeSizeInBits)) + i)); + } + for (int i = 0; i < sizeof(size_t); ++i) + { + arr.push_back( + *(static_cast(static_cast(&kDataSizeInBits)) + i)); + } + const auto merged_str = tree_str + encoded_data; + const size_t kMergedSizeInBits = merged_str.size(); + + const auto kMergedSizeInBytes = kMergedSizeInBits % CHAR_BIT ? kMergedSizeInBits / CHAR_BIT + 1 + : kMergedSizeInBits / CHAR_BIT; + for (int i = 0; i < kMergedSizeInBytes; ++i) + { + const auto kNumOfBits = + kMergedSizeInBits - i * CHAR_BIT < CHAR_BIT ? kMergedSizeInBits - i * CHAR_BIT : CHAR_BIT; + std::string tmp_str = merged_str.substr(i * CHAR_BIT, kNumOfBits); + for (int i = 0; i < CHAR_BIT - kNumOfBits; ++i) + tmp_str += "0"; + const std::bitset tmp_bitset(tmp_str); + arr.push_back(static_cast(tmp_bitset.to_ullong())); + } + return arr; + } + + EncodedTreeAndData unpackArrayToEncodedTreeAndData(const uint8_t *pack_ptr) + { + constexpr auto kTreeSizeBytesN = sizeof(size_t); + constexpr auto kDataSizeBytesN = sizeof(size_t); + // const uint8_t *pack_ptr = packed_vec.data(); + const std::bitset tree_size_bitset( + *static_cast(static_cast(pack_ptr))); + const std::bitset data_size_bitset( + *static_cast(static_cast(pack_ptr + kTreeSizeBytesN))); + + const size_t kTreeSizeInBits = static_cast(tree_size_bitset.to_ullong()); + const size_t kDataSizeInBits = static_cast(data_size_bitset.to_ullong()); + + auto start_pos = kTreeSizeBytesN + kDataSizeBytesN; + EncodedTreeAndData tree_and_data; + + const auto kTreeSizeInBytes = + kTreeSizeInBits % CHAR_BIT ? kTreeSizeInBits / CHAR_BIT + 1 : kTreeSizeInBits / CHAR_BIT; + + for (int i = 0; i < kTreeSizeInBytes; ++i) + { + const auto kNumOfBits = + kTreeSizeInBits - i * CHAR_BIT < CHAR_BIT ? kTreeSizeInBits - i * CHAR_BIT : CHAR_BIT; + for (int j = 0; j < kNumOfBits; ++j) + { + if (*(pack_ptr + start_pos + i) & ((1 << 7) >> j)) + tree_and_data.tree_vec.push_back(true); + else + tree_and_data.tree_vec.push_back(false); + } + } + const auto kDataSizeInBytes = + kDataSizeInBits % CHAR_BIT ? kDataSizeInBits / CHAR_BIT + 1 : kDataSizeInBits / CHAR_BIT; + const auto kOffsetInBits = kTreeSizeInBits % CHAR_BIT; + start_pos += kOffsetInBits ? kTreeSizeInBytes - 1 : kTreeSizeInBytes; + + for (int i = 0; i < kDataSizeInBytes; ++i) + { + const auto kNumOfBits = + kDataSizeInBits - i * CHAR_BIT < CHAR_BIT ? kDataSizeInBits - i * CHAR_BIT : CHAR_BIT; + const auto kBitsInFirstByteToRead = + kNumOfBits < CHAR_BIT - kOffsetInBits ? kNumOfBits : CHAR_BIT - kOffsetInBits; + for (int j = kOffsetInBits; j < kOffsetInBits + kBitsInFirstByteToRead; ++j) + { + + if (*(pack_ptr + start_pos + i) & ((1 << 7) >> j)) + tree_and_data.data_vec.push_back(true); + else + tree_and_data.data_vec.push_back(false); + } + if (kNumOfBits < CHAR_BIT - kOffsetInBits) + break; + const auto kBitsLeft = kNumOfBits - (CHAR_BIT - kOffsetInBits) < kOffsetInBits + ? kNumOfBits - (CHAR_BIT - kOffsetInBits) + : kOffsetInBits; + for (int j = 0; j < kBitsLeft; ++j) + { + + if (*(pack_ptr + start_pos + i + 1) & ((1 << 7) >> j)) + tree_and_data.data_vec.push_back(true); + else + tree_and_data.data_vec.push_back(false); + } + } + return tree_and_data; + } + + EncodedTreeAndData unpackArrayToEncodedTreeAndData(const std::vector &packed_vec) + { + constexpr auto kTreeSizeBytesN = sizeof(size_t); + constexpr auto kDataSizeBytesN = sizeof(size_t); + const uint8_t *pack_ptr = packed_vec.data(); + const std::bitset tree_size_bitset( + *static_cast(static_cast(pack_ptr))); + const std::bitset data_size_bitset( + *static_cast(static_cast(pack_ptr + kTreeSizeBytesN))); + + const size_t kTreeSizeInBits = static_cast(tree_size_bitset.to_ullong()); + const size_t kDataSizeInBits = static_cast(data_size_bitset.to_ullong()); + + auto start_pos = kTreeSizeBytesN + kDataSizeBytesN; + EncodedTreeAndData tree_and_data; + // tree_and_data.tree_vec.push_back.reserve(kTreeSizeInBits); + // tree_and_data.data_vec.push_back.reserve(kDataSizeInBits); + + const auto kTreeSizeInBytes = + kTreeSizeInBits % CHAR_BIT ? kTreeSizeInBits / CHAR_BIT + 1 : kTreeSizeInBits / CHAR_BIT; + + for (int i = 0; i < kTreeSizeInBytes; ++i) + { + const auto kNumOfBits = + kTreeSizeInBits - i * CHAR_BIT < CHAR_BIT ? kTreeSizeInBits - i * CHAR_BIT : CHAR_BIT; + for (int j = 0; j < kNumOfBits; ++j) + { + if (*(pack_ptr + start_pos + i) & ((1 << 7) >> j)) + tree_and_data.data_vec.push_back(true); + else + tree_and_data.data_vec.push_back(false); + } + } + const auto kDataSizeInBytes = + kDataSizeInBits % CHAR_BIT ? kDataSizeInBits / CHAR_BIT + 1 : kDataSizeInBits / CHAR_BIT; + const auto kOffsetInBits = kTreeSizeInBits % CHAR_BIT; + start_pos += kOffsetInBits ? kTreeSizeInBytes - 1 : kTreeSizeInBytes; + + for (int i = 0; i < kDataSizeInBytes; ++i) + { + const auto kNumOfBits = + kDataSizeInBits - i * CHAR_BIT < CHAR_BIT ? kDataSizeInBits - i * CHAR_BIT : CHAR_BIT; + const auto kBitsInFirstByteToRead = + kNumOfBits < CHAR_BIT - kOffsetInBits ? kNumOfBits : CHAR_BIT - kOffsetInBits; + for (int j = kOffsetInBits; j < kOffsetInBits + kBitsInFirstByteToRead; ++j) + { + + if (*(pack_ptr + start_pos + i) & ((1 << 7) >> j)) + tree_and_data.data_vec.push_back(true); + else + tree_and_data.data_vec.push_back(false); + } + if (kNumOfBits < CHAR_BIT - kOffsetInBits) + break; + const auto kBitsLeft = kNumOfBits - (CHAR_BIT - kOffsetInBits) < kOffsetInBits + ? kNumOfBits - (CHAR_BIT - kOffsetInBits) + : kOffsetInBits; + for (int j = 0; j < kBitsLeft; ++j) + { + + if (*(pack_ptr + start_pos + i + 1) & ((1 << 7) >> j)) + tree_and_data.data_vec.push_back(true); + else + tree_and_data.data_vec.push_back(false); + } + } + return tree_and_data; + } + +public: + std::vector encodeInputArray(const std::vector &input) + { + buildHuffmanTree(input); + buildHuffmanTable(root); + auto exported_tree = exportHuffmanTreeToString(root); + std::string str = ""; + for (auto &item : input) + { + str += huffmanCode[item]; + } + std::vector raw_arr = packEncodedDataToArray(exported_tree, str); + return raw_arr; + } + + void decode(Node *node, std::vector &vec, T *dst_ptr) + { + if (node == nullptr) + { + return; + } + + if (!node->p_left && !node->p_right) + { + *dst_ptr = node->data; + // dst_ptr++; + return; + } + + if (vec.size() == _decode_idx) + return; + if (vec[_decode_idx] == false) + { + // str = str.substr(0, str.size() - 1); + ++_decode_idx; + decode(node->p_left, vec, dst_ptr); + } + else + { + // str = str.substr(0, str.size() - 1); + ++_decode_idx; + decode(node->p_right, vec, dst_ptr); + } + } + +private: + size_t _decode_idx = 0; + EncodedTreeAndData _encoded_tree_and_data; + +public: + void init_decoder(const uint8_t *input) + { + size_t index = 0; + _encoded_tree_and_data = unpackArrayToEncodedTreeAndData(input); + root = importHuffmanTreeFromBoolVec(_encoded_tree_and_data.tree_vec, index); + } + + void reset_decode_idx(void) { _decode_idx = 0; } + + int decode_n(uint8_t *dst_ptr, size_t num) + { + // EncodedTreeAndData encoded_tree_and_data = unpackArrayToEncodedTreeAndData(input); + // auto root_imported = importHuffmanTreeFromString(encoded_tree_and_data.tree_str); + /*size_t index = 0;*/ + size_t bytes_decoded = 0; + for (int i = 0; i < num && _decode_idx < _encoded_tree_and_data.data_vec.size(); ++i) + { + decode(root, _encoded_tree_and_data.data_vec, dst_ptr + bytes_decoded); + bytes_decoded++; + } + return bytes_decoded; + } + + std::vector decodeEncodedArray(const uint8_t *input) + { + + size_t index = 0; + std::vector res{}; + // std::reverse(encoded_tree_and_data.data_str.begin(), encoded_tree_and_data.data_str.end()); + + while (index < _encoded_tree_and_data.data_str.size()) + decode(root, _encoded_tree_and_data.data_str, res, index); + return res; + } + + std::vector decodeEncodedArray(const std::vector &input) + { + EncodedTreeAndData encoded_tree_and_data = unpackArrayToEncodedTreeAndData(input); + auto root_imported = importHuffmanTreeFromString(encoded_tree_and_data.tree_str); + size_t index = 0; + std::vector res{}; + // std::reverse(encoded_tree_and_data.data_str.begin(), encoded_tree_and_data.data_str.end()); + + while (index < encoded_tree_and_data.data_str.size()) + decode(root_imported, encoded_tree_and_data.data_str, res, index); + return res; + } + + HuffmanTranscoder() = default; +}; +} // namespace core +} // namespace onert_micro +#endif // ONERT_MICRO_CORE_OM_HUFFMAN_TRANSCODER_H diff --git a/onert-micro/onert-micro/include/pal/mcu/PALConv2D.h b/onert-micro/onert-micro/include/pal/mcu/PALConv2D.h index 3bca5a902ec..236fb520bf2 100644 --- a/onert-micro/onert-micro/include/pal/mcu/PALConv2D.h +++ b/onert-micro/onert-micro/include/pal/mcu/PALConv2D.h @@ -17,11 +17,11 @@ #ifndef ONERT_MICRO_EXECUTE_PAL_CONV_2D_H #define ONERT_MICRO_EXECUTE_PAL_CONV_2D_H - #include "PALConv2DCommon.h" #include "core/OMKernelData.h" #include "core/OMRuntimeShape.h" #include "PALUtils.h" +#include "OMHuffmanTranscoder.h" namespace onert_micro { @@ -34,9 +34,11 @@ namespace pal OMStatus ConvPerChannel(const core::ConvQuant ¶ms, const core::OMRuntimeShape &input_shape, const int8_t *input_data, const core::OMRuntimeShape &filter_shape, const int8_t *filter_data, const int32_t *bias_data, - const core::OMRuntimeShape &output_shape, int8_t *output_data) + const core::OMRuntimeShape &output_shape, int8_t *output_data, + bool is_compressed = false) { // Get parameters. + const int32_t input_offset = params.input_offset; // r = s(q - Z) const int stride_width = params.stride_width; const int stride_height = params.stride_height; @@ -76,40 +78,53 @@ OMStatus ConvPerChannel(const core::ConvQuant ¶ms, const core::OMRuntimeShap assert(filters_per_group != 0); const int output_height = output_shape.dims(1); const int output_width = output_shape.dims(2); - for (int batch = 0; batch < batches; ++batch) + + // Buffer for decompressed filter vals + // TODO: get by pointer from memory manager + std::vector filter_data_tmp(filter_height * filter_width * filter_input_depth, 0); + + core::HuffmanTranscoder transcoder; + transcoder.init_decoder(reinterpret_cast(filter_data)); + transcoder.reset_decode_idx(); + for (int out_channel = 0; out_channel < output_depth; ++out_channel) { - for (int out_y = 0; out_y < output_height; ++out_y) + auto group = out_channel / filters_per_group; + int32_t acc = 0; + + // extract compressed filter + transcoder.decode_n(reinterpret_cast(&filter_data_tmp[0]), filter_data_tmp.size()); + + for (int batch = 0; batch < batches; ++batch) { - const int in_y_origin = (out_y * stride_height) - pad_height; - for (int out_x = 0; out_x < output_width; ++out_x) + for (int out_y = 0; out_y < output_height; ++out_y) { - const int in_x_origin = (out_x * stride_width) - pad_width; - for (int out_channel = 0; out_channel < output_depth; ++out_channel) + const int in_y_origin = (out_y * stride_height) - pad_height; + for (int out_x = 0; out_x < output_width; ++out_x) { - auto group = out_channel / filters_per_group; - int32_t acc = 0; - for (int filter_y = 0; filter_y < filter_height; ++filter_y) + const int in_x_origin = (out_x * stride_width) - pad_width; + for (int in_channel = 0; in_channel < filter_input_depth; ++in_channel) { - const int in_y = in_y_origin + dilation_height_factor * filter_y; - for (int filter_x = 0; filter_x < filter_width; ++filter_x) + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { - const int in_x = in_x_origin + dilation_width_factor * filter_x; + const int in_y = in_y_origin + dilation_height_factor * filter_y; + for (int filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int in_x = in_x_origin + dilation_width_factor * filter_x; - // Zero padding by omitting the areas outside the image. - const bool is_point_inside_image = - (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height); + // Zero padding by omitting the areas outside the image. + const bool is_point_inside_image = + (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height); - if (!is_point_inside_image) - { - continue; - } + if (!is_point_inside_image) + { + continue; + } - for (int in_channel = 0; in_channel < filter_input_depth; ++in_channel) - { int32_t input_val = input_data[offset(input_shape.dimsData(), batch, in_y, in_x, in_channel + group * filter_input_depth)]; - int32_t filter_val = filter_data[offset(filter_shape.dimsData(), out_channel, - filter_y, filter_x, in_channel)]; + int32_t filter_val = + filter_data_tmp[(filter_y * filter_height + filter_x) * filter_width + + in_channel]; // Accumulate with 32 bits accumulator. // In the nudging process during model quantization, we force // real value of 0.0 be represented by a quantized value. This @@ -145,6 +160,7 @@ OMStatus ConvPerChannel(const core::ConvQuant ¶ms, const core::OMRuntimeShap } } } + return Ok; } diff --git a/onert-micro/onert-micro/include/test_models/conv2d/QuantConv2DKernel.h b/onert-micro/onert-micro/include/test_models/conv2d/QuantConv2DKernel.h index f5cc7d4d46e..b6a3232fbbd 100644 --- a/onert-micro/onert-micro/include/test_models/conv2d/QuantConv2DKernel.h +++ b/onert-micro/onert-micro/include/test_models/conv2d/QuantConv2DKernel.h @@ -37,63 +37,64 @@ namespace s8_conv_2d * Output(1, 2, 2, 2) */ const unsigned char test_kernel_model_circle[] = { - 0x1c, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x30, 0x03, 0x00, 0x00, 0x4c, 0x03, 0x00, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xea, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0xb0, 0x03, 0x00, 0x00, 0x12, 0xfb, 0xff, 0xff, 0x00, 0x00, 0x06, 0x00, - 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x0d, 0x11, 0xf8, 0xf8, 0x01, 0x54, 0x21, 0x35, 0xe6, 0x1a, 0xf2, 0xff, 0x17, 0x3b, 0x1c, 0xf8, - 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, - 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x1c, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, - 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x18, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0f, 0x00, - 0x08, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x9c, 0x01, 0x00, 0x00, 0xec, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x82, 0xfe, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x09, 0x48, 0x00, 0x00, 0x00, 0x74, 0xfe, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0xb7, 0x7d, 0x49, 0x39, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x40, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x80, 0xc0, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0xf2, 0xfe, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x02, 0x44, 0x00, 0x00, 0x00, 0xe4, 0xfe, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x58, 0x03, 0x00, 0x00, + 0xf0, 0x02, 0x00, 0x00, 0x10, 0x02, 0x00, 0x00, 0x98, 0x01, 0x00, 0x00, 0x20, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6e, 0x6e, 0x70, 0x61, 0x63, 0x6b, 0x61, 0x67, + 0x65, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, + 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, + 0x18, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x38, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x34, 0x02, 0x00, 0x00, + 0x4c, 0x01, 0x00, 0x00, 0xd8, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x38, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x12, 0x00, 0x10, 0x00, 0x06, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x07, 0x00, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x4a, 0xfe, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x09, 0x54, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0xdc, 0xfd, 0xff, 0xff, + 0x3c, 0xfe, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0xb7, 0x7d, 0x49, 0x39, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x40, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0xba, 0xfe, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x02, 0x60, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, + 0x92, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xb0, 0x03, 0x00, 0x00, + 0x12, 0xfb, 0xff, 0xff, 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x87, 0x7b, 0x24, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x0e, 0x00, 0x1a, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x09, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x18, 0x00, 0x04, 0x00, + 0x87, 0x7b, 0x24, 0x39, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2a, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x09, 0xb8, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x5c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6b, 0x65, 0x72, 0x00, 0x00, 0x00, 0x06, 0x00, + 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, + 0x8b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xdf, 0x1b, 0xc8, 0x23, 0x00, 0x83, 0x79, 0x51, 0xcd, 0x1a, 0x88, 0x71, 0xd8, 0x6a, 0xff, 0x87, + 0x02, 0xee, 0xa0, 0xd1, 0x75, 0x3a, 0x9d, 0xfc, 0xf0, 0x00, 0x12, 0x00, 0x18, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x44, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0xa6, 0x11, 0xd0, 0x39, 0x96, 0x6c, 0xa1, 0x3a, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x40, - 0x00, 0x00, 0x60, 0x41, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0, 0x00, 0x00, 0xa0, 0xc1, - 0x03, 0x00, 0x00, 0x00, 0x6b, 0x65, 0x72, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, - 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, - 0x54, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0xa6, 0x11, 0xd0, 0x39, + 0x96, 0x6c, 0xa1, 0x3a, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0x60, 0x41, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0, 0x00, 0x00, 0xa0, 0xc1, 0x04, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x08, 0x00, 0x07, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00, + 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x64, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00, + 0xa0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x7c, 0x7b, 0xd7, 0x39, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x20, 0x41, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0xc1, 0x03, 0x00, 0x00, 0x00, - 0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, - 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00}; + 0x00, 0x00, 0x20, 0x41, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0xc1, 0x04, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00}; const std::vector input_data = {9, 9, -7, -6, -2, 2, -10, 7, -5, -5, 1, 1, 6, -8, -6, -3, -9, 0}; diff --git a/onert-micro/onert-micro/src/execute/kernels/Conv2D.cpp b/onert-micro/onert-micro/src/execute/kernels/Conv2D.cpp index 63a4f3f22f0..c28adc280c0 100644 --- a/onert-micro/onert-micro/src/execute/kernels/Conv2D.cpp +++ b/onert-micro/onert-micro/src/execute/kernels/Conv2D.cpp @@ -156,12 +156,23 @@ OMStatus onert_micro::execute::execute_kernel_CircleConv2D(const OMExecuteArgs & assert(status == Ok); if (status != Ok) return status; + if(options->weight_compression_type() == circle::WeightCompressionType::WeightCompressionType_Huffman) + { + status = + pal::ConvPerChannel(params, input_shape, core::utils::castInputData(input_data), + weight_shape, core::utils::castInputData(weight_data), + core::utils::castInputData(bias_data), output_shape, + core::utils::castOutputData(output_data),true); + } + else + { + status = + pal::ConvPerChannel(params, input_shape, core::utils::castInputData(input_data), + weight_shape, core::utils::castInputData(weight_data), + core::utils::castInputData(bias_data), output_shape, + core::utils::castOutputData(output_data),false); + } - status = - pal::ConvPerChannel(params, input_shape, core::utils::castInputData(input_data), - weight_shape, core::utils::castInputData(weight_data), - core::utils::castInputData(bias_data), output_shape, - core::utils::castOutputData(output_data)); } break; #endif // DIS_QUANT diff --git a/res/CircleSchema/0.6/circle_schema.fbs b/res/CircleSchema/0.6/circle_schema.fbs index 1630bacd212..4cb076f3ad6 100644 --- a/res/CircleSchema/0.6/circle_schema.fbs +++ b/res/CircleSchema/0.6/circle_schema.fbs @@ -580,6 +580,12 @@ enum ActivationFunctionType : byte { TANH = 4, SIGN_BIT = 5, } +// LINT.IfChange +enum WeightCompressionType : byte { + NONE = 0, + Huffman = 1 +} +// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td) table Conv2DOptions { padding:Padding; @@ -588,6 +594,7 @@ table Conv2DOptions { fused_activation_function:ActivationFunctionType; dilation_w_factor:int = 1; dilation_h_factor:int = 1; + weight_compression_type:WeightCompressionType = NONE; } // Options for both Conv3D and Conv3DTranspose.