diff --git a/onert-micro/onert-micro/include/core/OMKernelData.h b/onert-micro/onert-micro/include/core/OMKernelData.h index 784f68e5053..d0ab251777e 100644 --- a/onert-micro/onert-micro/include/core/OMKernelData.h +++ b/onert-micro/onert-micro/include/core/OMKernelData.h @@ -228,6 +228,18 @@ struct SliceParams int32_t size[5]; }; +struct SVDFQuantParams +{ + int32_t input_zero_point; + int32_t output_zero_point; + int32_t activation_state_zero_point; + int32_t effective_scale_1_a; + int effective_scale_1_b; + int32_t effective_scale_2_a; + int effective_scale_2_b; + int rank; +}; + } // namespace core } // namespace onert_micro diff --git a/onert-micro/onert-micro/include/pal/cmsisnn/KernelsToBuild.lst b/onert-micro/onert-micro/include/pal/cmsisnn/KernelsToBuild.lst index f3fba6542af..bd3884f8b5c 100644 --- a/onert-micro/onert-micro/include/pal/cmsisnn/KernelsToBuild.lst +++ b/onert-micro/onert-micro/include/pal/cmsisnn/KernelsToBuild.lst @@ -75,7 +75,7 @@ REGISTER_KERNEL(MAX_POOL_2D, MaxPool2D) REGISTER_KERNEL(SOFTMAX, Softmax) #/*REGISTER_KERNEL(SUM, Sum)*/ #/*REGISTER_KERNEL(SELECT_V2, SelectV2)*/ -#/*REGISTER_KERNEL(SVDF, SVDF)*/ +REGISTER_KERNEL(SVDF, SVDF) #/*REGISTER_KERNEL(WHILE, While)*/ #/*REGISTER_KERNEL(UNIDIRECTIONAL_SEQUENCE_LSTM, UnidirectionalSequenceLSTM)*/ #/*REGISTER_KERNEL(RESIZE_BILINEAR, ResizeBilinear)*/ diff --git a/onert-micro/onert-micro/include/pal/cmsisnn/PALSVDF.h b/onert-micro/onert-micro/include/pal/cmsisnn/PALSVDF.h new file mode 100644 index 00000000000..f6707689bc4 --- /dev/null +++ b/onert-micro/onert-micro/include/pal/cmsisnn/PALSVDF.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ONERT_MICRO_EXECUTE_PAL_SVDF_H +#define ONERT_MICRO_EXECUTE_PAL_SVDF_H + +#include "PALSVDFCommon.h" +#include "core/OMRuntimeShape.h" +#include "core/OMKernelData.h" +#include "core/memory/OMMemoryManager.h" + +#include + +namespace onert_micro +{ +namespace execute +{ +namespace pal +{ + +OMStatus SVDF(const core::SVDFQuantParams ¶ms, const int8_t *input_data, + const int8_t *weights_feature_data, const int8_t *weights_time_data, + const int32_t *bias_data, int8_t *state_data, int8_t *output_data, + const core::OMRuntimeShape &input_shape, + const core::OMRuntimeShape &weights_feature_shape, + const core::OMRuntimeShape &weights_time_shape, + const core::OMRuntimeShape &bias_shape, const core::OMRuntimeShape &output_shape) +{ + cmsis_nn_dims input_dims; + input_dims.n = input_shape.dims(0); + input_dims.h = input_shape.dims(1); + + cmsis_nn_dims weights_feature_dims; + weights_feature_dims.n = weights_feature_shape.dims(0); + weights_feature_dims.h = weights_feature_shape.dims(1); + + cmsis_nn_dims weights_time_dims; + weights_time_dims.n = weights_time_shape.dims(0); + weights_time_dims.h = weights_time_shape.dims(1); + + cmsis_nn_dims bias_dims; + bias_dims.n = bias_shape.dims(0); + + cmsis_nn_dims state_dims; + state_dims.n = bias_shape.dims(0); + state_dims.h = bias_shape.dims(1); + + cmsis_nn_dims output_dims; + output_dims.n = output_shape.dims(0); + output_dims.h = output_shape.dims(1); + + cmsis_nn_svdf_params svdf_params; + svdf_params.rank = params.rank; + svdf_params.input_offset = params.input_zero_point; + svdf_params.output_offset = params.output_zero_point; + + svdf_params.input_activation.min = INT16_MIN; + svdf_params.input_activation.max = INT16_MAX; + + svdf_params.output_activation.min = INT8_MIN; + svdf_params.output_activation.max = INT8_MAX; + + cmsis_nn_per_tensor_quant_params in_quant_params; + in_quant_params.multiplier = params.effective_scale_1_a; + in_quant_params.shift = params.effective_scale_1_b; + + cmsis_nn_per_tensor_quant_params out_quant_params; + out_quant_params.multiplier = params.effective_scale_2_a; + out_quant_params.shift = params.effective_scale_2_b; + + const int batch_size = input_shape.dims(0); + const int input_size = input_shape.dims(1); + const int num_filters = weights_feature_shape.dims(0); + const int num_units = num_filters / params.rank; + + uint8_t *scratch_tensor_data; + OMStatus status = core::memory::OMMemoryManager::allocateMemory( + batch_size * num_filters * sizeof(int32_t), &scratch_tensor_data); + assert(status == Ok); + if (status != Ok) + return status; + + uint8_t *scratch_output_tensor_data; + status = core::memory::OMMemoryManager::allocateMemory(batch_size * num_units * sizeof(int32_t), + &scratch_output_tensor_data); + assert(status == Ok); + if (status != Ok) + return status; + + cmsis_nn_context scratch_ctx; + scratch_ctx.buf = reinterpret_cast(scratch_tensor_data); + + cmsis_nn_context scratch_output_ctx; + scratch_output_ctx.buf = reinterpret_cast(scratch_output_tensor_data); + + arm_svdf_s8(&scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params, &out_quant_params, + &input_dims, input_data, &state_dims, state_data, &weights_feature_dims, + weights_feature_data, &weights_time_dims, weights_time_data, &bias_dims, bias_data, + &output_dims, output_data); + + core::memory::OMMemoryManager::deallocateMemory(scratch_tensor_data); + core::memory::OMMemoryManager::deallocateMemory(scratch_output_tensor_data); + + return Ok; +} + +} // namespace pal +} // namespace execute +} // namespace onert_micro + +#endif // ONERT_MICRO_EXECUTE_PAL_SVDF_H diff --git a/onert-micro/onert-micro/include/pal/mcu/PALSVDF.h b/onert-micro/onert-micro/include/pal/mcu/PALSVDF.h index e1c707607c4..3d479c59d65 100644 --- a/onert-micro/onert-micro/include/pal/mcu/PALSVDF.h +++ b/onert-micro/onert-micro/include/pal/mcu/PALSVDF.h @@ -20,4 +20,24 @@ #include "PALSVDFCommon.h" +namespace onert_micro +{ +namespace execute +{ +namespace pal +{ + +OMStatus SVDF(const core::SVDFQuantParams &, const int8_t *, const int8_t *, const int8_t *, + const int32_t *, int8_t *, int8_t *, const core::OMRuntimeShape &, + const core::OMRuntimeShape &, const core::OMRuntimeShape &, + const core::OMRuntimeShape &, const core::OMRuntimeShape &) +{ + // TODO: support it + return UnsupportedType; +} + +} // namespace pal +} // namespace execute +} // namespace onert_micro + #endif // ONERT_MICRO_EXECUTE_PAL_SVDF_H diff --git a/onert-micro/onert-micro/src/execute/kernels/SVDF.cpp b/onert-micro/onert-micro/src/execute/kernels/SVDF.cpp index 418a6b155d1..d1a7a2e6072 100644 --- a/onert-micro/onert-micro/src/execute/kernels/SVDF.cpp +++ b/onert-micro/onert-micro/src/execute/kernels/SVDF.cpp @@ -22,6 +22,7 @@ #include "core/memory/OMMemoryManager.h" #include "execute/OMKernelExecutionBuilder.h" +#include "execute/OMUtils.h" #include "execute/OMRuntimeKernel.h" #include "PALSVDF.h" @@ -41,6 +42,38 @@ constexpr int inputActivationStateTensorIdx = 4; // This is a variable tensor, and will be modified by this op. constexpr int outputTensorIdx = 0; +void prepareQuantParams(core::SVDFQuantParams ¶ms, const circle::Tensor *input, + const circle::Tensor *weights_feature, const circle::Tensor *weights_time, + const circle::Tensor *activation_state, const circle::Tensor *output) +{ + assert(input->quantization() != nullptr); + assert(output->quantization() != nullptr); + assert(weights_feature->quantization() != nullptr); + assert(weights_time->quantization() != nullptr); + assert(activation_state->quantization() != nullptr); + + // Write zero points + params.input_zero_point = + static_cast(input->quantization()->zero_point()->operator[](0)); + params.output_zero_point = + static_cast(output->quantization()->zero_point()->operator[](0)); + params.activation_state_zero_point = + static_cast(activation_state->quantization()->zero_point()->operator[](0)); + + // Calculate effective scales + const float effective_scale_1 = (input->quantization()->scale()->operator[](0) * + weights_feature->quantization()->scale()->operator[](0)) / + (activation_state->quantization()->scale()->operator[](0)); + const float effective_scale_2 = (activation_state->quantization()->scale()->operator[](0) * + weights_time->quantization()->scale()->operator[](0)) / + (output->quantization()->scale()->operator[](0)); + + execute::quantizeMultiplier(effective_scale_1, ¶ms.effective_scale_1_a, + ¶ms.effective_scale_1_b); + execute::quantizeMultiplier(effective_scale_2, ¶ms.effective_scale_2_a, + ¶ms.effective_scale_2_b); +} + } // namespace OMStatus onert_micro::execute::execute_kernel_CircleSVDF(const OMExecuteArgs &execute_args) @@ -130,20 +163,20 @@ OMStatus onert_micro::execute::execute_kernel_CircleSVDF(const OMExecuteArgs &ex return status; std::memset(activation_state_data, 0, activation_state_size); - // Temporary buffer - uint8_t *scratch_buffer; - status = core::memory::OMMemoryManager::allocateMemory( - batch_size * num_filters * sizeof(core::OMDataType(output->type())), &scratch_buffer); - - assert(status == Ok); - if (status != Ok) - return status; switch (input->type()) { #ifndef DIS_FLOAT case circle::TensorType_FLOAT32: { + // Temporary buffer + uint8_t *scratch_buffer; + status = core::memory::OMMemoryManager::allocateMemory( + batch_size * num_filters * sizeof(core::OMDataType(output->type())), &scratch_buffer); + + assert(status == Ok); + if (status != Ok) + return status; status = pal::SVDF( utils::castInputData(input_data), utils::castInputData(weights_feature_data), utils::castInputData(weights_time_data), utils::castInputData(bias_data), @@ -151,9 +184,29 @@ OMStatus onert_micro::execute::execute_kernel_CircleSVDF(const OMExecuteArgs &ex utils::castOutputData(scratch_buffer), utils::castOutputData(output_data), rank, input_size, batch_size, num_filters, num_units, memory_size, options->fused_activation_function()); + + status = core::memory::OMMemoryManager::deallocateMemory(scratch_buffer); } break; #endif // DIS_FLOAT +#ifndef DIS_QUANT + case circle::TensorType_INT8: + { + core::SVDFQuantParams params{}; + prepareQuantParams(params, input, weights_feature, weights_time, activation_state, output); + + params.rank = rank; + + status = pal::SVDF( + params, utils::castInputData(input_data), + utils::castInputData(weights_feature_data), + utils::castInputData(weights_time_data), utils::castInputData(bias_data), + utils::castOutputData(activation_state_data), + utils::castOutputData(output_data), input_shape, weights_feature_shape, + weights_time_shape, core::OMRuntimeShape(bias), output_shape); + } + break; +#endif // DIS_QUANT default: { status = UnsupportedActivation; @@ -163,7 +216,6 @@ OMStatus onert_micro::execute::execute_kernel_CircleSVDF(const OMExecuteArgs &ex } status = core::memory::OMMemoryManager::deallocateMemory(activation_state_data); - status = core::memory::OMMemoryManager::deallocateMemory(scratch_buffer); return status; }