From 2caa2a883f42c6e034a9539498a8a938f7a67039 Mon Sep 17 00:00:00 2001 From: Artem Balyshev Date: Fri, 30 Jun 2023 13:43:36 +0300 Subject: [PATCH] [onert-micro] Support Div kernel This PR adds supporting of Div kernel. ONE-DCO-1.0-Signed-off-by: Artem Balyshev --- .../test_models/div/FloatDivKernel.h | 153 +++++++++++ .../test_models/div/NegDivKernel.h | 149 +++++++++++ .../test_models/div/TestDataDivBase.h | 68 +++++ .../pal/mcu/KernelsToBuild.lst | 1 + onert-micro/luci-interpreter/pal/mcu/PALDiv.h | 115 +++++++++ .../luci-interpreter/src/kernels/Div.cpp | 158 +++--------- .../luci-interpreter/src/kernels/Div.h | 49 ---- .../luci-interpreter/src/kernels/Div.test.cpp | 244 +++++------------- 8 files changed, 595 insertions(+), 342 deletions(-) create mode 100644 onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/FloatDivKernel.h create mode 100644 onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/NegDivKernel.h create mode 100644 onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/TestDataDivBase.h create mode 100644 onert-micro/luci-interpreter/pal/mcu/PALDiv.h delete mode 100644 onert-micro/luci-interpreter/src/kernels/Div.h diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/FloatDivKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/FloatDivKernel.h new file mode 100644 index 00000000000..ed9408a1647 --- /dev/null +++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/FloatDivKernel.h @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_TEST_MODELS_DIV_KERNEL_FLOAT_H +#define LUCI_INTERPRETER_TEST_MODELS_DIV_KERNEL_FLOAT_H + +#include "TestDataDivBase.h" + +namespace luci_interpreter +{ +namespace test_kernel +{ +namespace div_float_with_broadcasting +{ + +/* + * Div Kernel: + * + * Input_1(2, 5) Input_2(2, 1) + * \ / + * Div(with broadcast) + * | + * Output(2, 5) + */ +const unsigned char test_kernel_model_circle[] = { + 0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x54, 0x01, 0x00, 0x00, 0x70, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, + 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, + 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, + 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x10, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, + 0x2c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff, + 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, + 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00}; + +const std::vector input1_data = {8.432024, 5.4664106, 16.856224, -10.004156, -14.128681, + 12.695552, -7.5779333, -1.1460792, 15.574873, -12.670321}; +const std::vector input2_data = {-2.0361109, -9.528288}; +const std::vector reference_output_data = {-4.14124, -2.6847312, -8.278638, 4.913365, + 6.939053, -1.3324064, 0.795309, 0.120281756, + -1.634593, 1.3297584}; + +} // namespace div_float_with_broadcasting + +namespace div_float_no_broadcasting +{ +/* + * Div Kernel: + * + * Input_1(2, 5) Input_2(2, 5) + * \ / + * Div(no broadcast) + * | + * Output(2, 5) + */ +const unsigned char test_kernel_model_circle[] = { + 0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x54, 0x01, 0x00, 0x00, 0x70, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, + 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, + 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, + 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x10, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, + 0x2c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff, + 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, + 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00}; + +std::vector input1_data = {3.563036, 13.645134, 0.427146, 11.032923, 0.4189046, + 15.737275, 7.7726707, 0.75059056, -7.833488, 3.0679407}; +std::vector input2_data = {-0.62832826, 7.937863, -14.899745, 0.2819096, -5.8306913, + 8.6010685, -10.391579, -3.312385, -11.495937, 5.5657125}; +std::vector reference_output_data = {-5.67066, 1.7189934, -0.028668007, 39.136383, + -0.07184476, 1.8296884, -0.74797785, -0.22660124, + 0.6814136, 0.55122155}; + +} // namespace div_float_no_broadcasting + +class TestDataFloatDiv : public TestDataDivBase +{ +public: + explicit TestDataFloatDiv(bool is_with_broadcast) : TestDataDivBase(is_with_broadcast) + { + if (is_with_broadcast) + { + _input1_data = div_float_with_broadcasting::input1_data; + _input2_data = div_float_with_broadcasting::input2_data; + _reference_output_data = div_float_with_broadcasting::reference_output_data; + _test_kernel_model_circle = div_float_with_broadcasting::test_kernel_model_circle; + } + else + { + _input1_data = div_float_no_broadcasting::input1_data; + _input2_data = div_float_no_broadcasting::input2_data; + _reference_output_data = div_float_no_broadcasting::reference_output_data; + _test_kernel_model_circle = div_float_no_broadcasting::test_kernel_model_circle; + } + } + + ~TestDataFloatDiv() override = default; +}; + +} // namespace test_kernel +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_TEST_MODELS_DIV_KERNEL_FLOAT_H diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/NegDivKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/NegDivKernel.h new file mode 100644 index 00000000000..df42f95fe49 --- /dev/null +++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/NegDivKernel.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_DIV_KERNEL_H +#define LUCI_INTERPRETER_TEST_MODELS_NEG_DIV_KERNEL_H + +#include "TestDataDivBase.h" + +namespace luci_interpreter +{ +namespace test_kernel +{ +namespace input_1_wrong_type +{ + +/* + * Div Kernel with input type mismatch: + * + * Input_1(2, 5) - Int32 Input_2(2, 1) - Float + * \ / + * Div(with broadcast) + * | + * Output(2, 5) + */ +const unsigned char test_kernel_model_circle[] = { + 0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x64, 0x01, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, + 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, + 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, + 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x10, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, + 0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, + 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, + 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00}; + +} // namespace input_1_wrong_type + +namespace input_2_wrong_type +{ + +/* + * DIV Kernel with input type mismatch: + * + * Input_1(2, 5)- Float Input_2(2, 1) - Int32 + * \ / + * Div(with broadcast) + * | + * Output(2, 5) + */ +const unsigned char test_kernel_model_circle[] = { + 0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x64, 0x01, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, + 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, + 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, + 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x10, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, + 0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, + 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, + 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00}; + +} // namespace input_2_wrong_type + +class NegTestDataInput1WrongTypeDiv : public NegTestDataBase +{ +public: + NegTestDataInput1WrongTypeDiv() + { + _test_kernel_model_circle = input_1_wrong_type::test_kernel_model_circle; + } + + ~NegTestDataInput1WrongTypeDiv() override = default; + + const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; } + +protected: + const unsigned char *_test_kernel_model_circle; +}; + +class NegTestDataInput2WrongTypeDiv : public NegTestDataBase +{ +public: + NegTestDataInput2WrongTypeDiv() + { + _test_kernel_model_circle = input_2_wrong_type::test_kernel_model_circle; + } + + ~NegTestDataInput2WrongTypeDiv() override = default; + + const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; } + +protected: + const unsigned char *_test_kernel_model_circle; +}; + +} // namespace test_kernel +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_DIV_KERNEL_H diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/TestDataDivBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/TestDataDivBase.h new file mode 100644 index 00000000000..e4894e1aee3 --- /dev/null +++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/TestDataDivBase.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_TEST_MODELS_DIV_KERNEL_BASE_H +#define LUCI_INTERPRETER_TEST_MODELS_DIV_KERNEL_BASE_H + +#include "luci_interpreter/test_models/TestDataBase.h" + +namespace luci_interpreter +{ +namespace test_kernel +{ + +template class TestDataDivBase : public TestDataBase +{ +public: + explicit TestDataDivBase(bool) + { + // Do nothing + } + + TestDataDivBase() = delete; + + const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; } + + const std::vector &get_input_data_by_index(int i) override final + { + switch (i) + { + case 0: + return _input1_data; + case 1: + return _input2_data; + default: + assert(false && "Wrong input index"); + } + } + + const std::vector &get_output_data_by_index(int i) override final + { + assert(i == 0); + return _reference_output_data; + } + +protected: + std::vector _input1_data; + std::vector _input2_data; + std::vector _reference_output_data; + const unsigned char *_test_kernel_model_circle; +}; + +} // namespace test_kernel +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_TEST_MODELS_DIV_KERNEL_BASE_H diff --git a/onert-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst b/onert-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst index bfa3457b113..37ae2e50442 100644 --- a/onert-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst +++ b/onert-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst @@ -1,6 +1,7 @@ REGISTER_KERNEL(ABS, Abs) REGISTER_KERNEL(ADD, Add) REGISTER_KERNEL(AVERAGE_POOL_2D, AveragePool2D) +REGISTER_KERNEL(DIV, Div) REGISTER_KERNEL(FULLY_CONNECTED, FullyConnected) REGISTER_KERNEL(CONV_2D, Conv2D) REGISTER_KERNEL(LOGISTIC, Logistic) diff --git a/onert-micro/luci-interpreter/pal/mcu/PALDiv.h b/onert-micro/luci-interpreter/pal/mcu/PALDiv.h new file mode 100644 index 00000000000..cf84a1007e0 --- /dev/null +++ b/onert-micro/luci-interpreter/pal/mcu/PALDiv.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DIV_H +#define LUCI_INTERPRETER_PAL_DIV_H + +#include "Params.h" +#include "PALUtils.h" +#include "ProcessBroadcastShapes.h" + +namespace luci_interpreter_pal +{ +template +inline void Div(const ArithmeticParams ¶ms, const int flat_size, const T *input1_data, + const T *input2_data, T *output_data) +{ + T activation_min, activation_max; + getActivationParams(params, &activation_min, &activation_max); + + for (int i = 0; i < flat_size; ++i) + output_data[i] = + std::min(std::max(input1_data[i] / input2_data[i], activation_min), activation_max); +} + +template +inline void DivScalar(const ArithmeticParams ¶ms, const int flat_size, const T *input_data, + const T scalar_value, T *output_data) +{ + T activation_min, activation_max; + getActivationParams(params, &activation_min, &activation_max); + + for (int i = 0; i < flat_size; ++i) + output_data[i] = + std::min(std::max(input_data[i] / scalar_value, activation_min), activation_max); +} + +template +inline void +BroadcastDiv4DSlow(const ArithmeticParams ¶ms, + const luci_interpreter::RuntimeShape &input1_shape, const T *input1_data, + const luci_interpreter::RuntimeShape &input2_shape, const T *input2_data, + const luci_interpreter::RuntimeShape &output_shape, T *output_data) +{ + const int flat_size = input1_shape.flatSize(); + + if (params.broadcast_category == BroadcastableOpCategory::kScalarFirstBroadcast) + { + return DivScalar(params, flat_size, input2_data, input1_data[0], output_data); + } + else if (params.broadcast_category == BroadcastableOpCategory::kScalarSecondBroadcast) + { + return DivScalar(params, flat_size, input1_data, input2_data[0], output_data); + } + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2); + const luci_interpreter::RuntimeShape extended_output_shape = + luci_interpreter::RuntimeShape::extendedShape(4, output_shape); + + T activation_min, activation_max; + getActivationParams(params, &activation_min, &activation_max); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < extended_output_shape.dims(0); ++b) + { + for (int y = 0; y < extended_output_shape.dims(1); ++y) + { + for (int x = 0; x < extended_output_shape.dims(2); ++x) + { + for (int c = 0; c < extended_output_shape.dims(3); ++c) + { + const int output_data_offset = + ((b * extended_output_shape.dims(1) + y) * extended_output_shape.dims(2) + x) * + extended_output_shape.dims(3) + + c; + + output_data[output_data_offset] = + std::min(std::max(input1_data[subscriptToIndex(desc1, b, y, x, c)] / + input2_data[subscriptToIndex(desc2, b, y, x, c)], + activation_min), + activation_max); + } + } + } + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DIV_H diff --git a/onert-micro/luci-interpreter/src/kernels/Div.cpp b/onert-micro/luci-interpreter/src/kernels/Div.cpp index efa90f8d374..f8a469061d8 100644 --- a/onert-micro/luci-interpreter/src/kernels/Div.cpp +++ b/onert-micro/luci-interpreter/src/kernels/Div.cpp @@ -14,140 +14,64 @@ * limitations under the License. */ -#include "kernels/Div.h" - +#include "Builders.h" #include "kernels/Utils.h" -#include -#include +#include "kernels/BinaryOpCommon.h" -namespace luci_interpreter -{ -namespace kernels -{ +#include "PALDiv.h" -Div::Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams ¶ms) - : KernelWithParams({input1, input2}, {output}, params) +namespace luci_interpreter { -} -void Div::configure() +// TODO: reduce code duplication with Mul +void configure_kernel_CircleDiv(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph) { - LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()); - LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()); + kernels::TISOKernel kernel(cur_op, runtime_graph); - // TODO: enable it only if kernel with dynamic shapes - output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); + LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) == + Tensor::element_type(kernel.input2())); + LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) == + Tensor::element_type(kernel.input2())); } -void Div::execute() const +void execute_kernel_CircleDiv(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph) { - switch (input1()->element_type()) - { - case DataType::FLOAT32: - evalFloat(); - break; - case DataType::S64: - evalInteger(); - break; - case DataType::S32: - evalInteger(); - break; - case DataType::U8: - evalQuantized(); - break; - default: - assert(false && "Unsupported type."); - } -} - -void Div::evalFloat() const -{ - tflite::ArithmeticParams params{}; - fillArithmeticActivationRange(params, _params.activation); + kernels::TISOKernel kernel(cur_op, runtime_graph); - const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( - getTensorShape(input1()), getTensorShape(input2()), ¶ms); - - if (need_broadcast) - { - tflite::reference_ops::BroadcastDivSlow( - params, getTensorShape(input1()), getTensorData(input1()), getTensorShape(input2()), - getTensorData(input2()), getTensorShape(output()), getTensorData(output())); - } - else - { - tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData(input1()), - getTensorShape(input2()), getTensorData(input2()), - getTensorShape(output()), getTensorData(output())); - } -} + const auto *options = cur_op->builtin_options_as_DivOptions(); -template void Div::evalInteger() const -{ - tflite::ArithmeticParams params{}; - fillArithmeticActivationRange(params, _params.activation); + luci_interpreter::RuntimeShape input_shape1 = + kernels::getTensorRuntimeShape(kernel.input1(), runtime_graph); + luci_interpreter::RuntimeShape input_shape2 = + kernels::getTensorRuntimeShape(kernel.input2(), runtime_graph); - const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( - getTensorShape(input1()), getTensorShape(input2()), ¶ms); + bool is_inplace = runtime_graph->is_inplace_op(cur_op); - if (need_broadcast) + switch (Tensor::element_type(kernel.input1())) { - tflite::reference_ops::BroadcastDivSlow( - params, getTensorShape(input1()), getTensorData(input1()), getTensorShape(input2()), - getTensorData(input2()), getTensorShape(output()), getTensorData(output())); - } - else - { - tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData(input1()), - getTensorShape(input2()), getTensorData(input2()), - getTensorShape(output()), getTensorData(output())); - } -} - -void Div::evalQuantized() const -{ - const auto input1_scale = static_cast(input1()->scale()); - const auto input2_scale = static_cast(input2()->scale()); - const auto output_scale = static_cast(output()->scale()); - - const double real_output_multiplier = input1_scale / (input2_scale * output_scale); - - int32_t output_multiplier{}; - int output_shift{}; - - quantizeMultiplier(real_output_multiplier, &output_multiplier, &output_shift); - - int32_t activation_min{}; - int32_t activation_max{}; - calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); - - tflite::ArithmeticParams params{}; - - params.input1_offset = -input1()->zero_point(); // Note the '-'. - params.input2_offset = -input2()->zero_point(); // Note the '-'. - params.output_offset = output()->zero_point(); - params.output_multiplier = output_multiplier; - params.output_shift = output_shift; - params.quantized_activation_min = activation_min; - params.quantized_activation_max = activation_max; - - const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( - getTensorShape(input1()), getTensorShape(input2()), ¶ms); - - if (need_broadcast) - { - tflite::reference_ops::BroadcastDivSlow( - params, getTensorShape(input1()), getTensorData(input1()), getTensorShape(input2()), - getTensorData(input2()), getTensorShape(output()), getTensorData(output())); - } - else - { - tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData(input1()), - getTensorShape(input2()), getTensorData(input2()), - getTensorShape(output()), getTensorData(output())); +#ifndef DIS_FLOAT + case DataType::FLOAT32: + { + auto tiso_func = luci_interpreter_pal::Div; + auto broadcast_tiso_func = luci_interpreter_pal::BroadcastDiv4DSlow; + if (is_inplace) + { + kernels::evalTISOInplaceKernel(tiso_func, broadcast_tiso_func, &kernel, options, + std::move(input_shape1), std::move(input_shape2)); + } + else + { + kernels::TISOData kernel_data = kernel.readData(); + kernels::evalTISOKernel(tiso_func, broadcast_tiso_func, &kernel, &kernel_data, + options, std::move(input_shape1), std::move(input_shape2)); + } + } + break; +#endif // DIS_FLOAT + default: + assert(false && "Unsupported type."); } } -} // namespace kernels } // namespace luci_interpreter diff --git a/onert-micro/luci-interpreter/src/kernels/Div.h b/onert-micro/luci-interpreter/src/kernels/Div.h deleted file mode 100644 index c1bf3e10bd5..00000000000 --- a/onert-micro/luci-interpreter/src/kernels/Div.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LUCI_INTERPRETER_KERNELS_DIV_H -#define LUCI_INTERPRETER_KERNELS_DIV_H - -#include "core/Kernel.h" -#include "core/KernelParams.h" - -namespace luci_interpreter -{ -namespace kernels -{ - -class Div : public KernelWithParams -{ -public: - Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams ¶ms); - - const Tensor *input1() const { return _inputs[0]; } - const Tensor *input2() const { return _inputs[1]; } - Tensor *output() const { return _outputs[0]; } - - void configure() override; - void execute() const override; - -private: - void evalFloat() const; - template void evalInteger() const; - void evalQuantized() const; -}; - -} // namespace kernels -} // namespace luci_interpreter - -#endif // LUCI_INTERPRETER_KERNELS_DIV_H diff --git a/onert-micro/luci-interpreter/src/kernels/Div.test.cpp b/onert-micro/luci-interpreter/src/kernels/Div.test.cpp index 85cd8b90aff..d6014397ca5 100644 --- a/onert-micro/luci-interpreter/src/kernels/Div.test.cpp +++ b/onert-micro/luci-interpreter/src/kernels/Div.test.cpp @@ -15,14 +15,14 @@ * limitations under the License. */ -#include "kernels/Div.h" #include "kernels/TestUtils.h" -#include "luci_interpreter/TestMemoryManager.h" +#include "luci_interpreter/test_models/div/FloatDivKernel.h" +#include "luci_interpreter/test_models/div/NegDivKernel.h" + +#include "loader/ModuleLoader.h" namespace luci_interpreter { -namespace kernels -{ namespace { @@ -30,201 +30,93 @@ using namespace testing; class DivTest : public ::testing::Test { -protected: - void SetUp() override { _memory_manager = std::make_unique(); } - - std::unique_ptr _memory_manager; + // Do nothing }; -float GetTolerance(float min, float max) -{ - const float kQuantizedStep = (max - min) / 255.0f; - const float kQuantizedTolerance = 2.0f * kQuantizedStep + kQuantizedStep * kQuantizedStep; - return kQuantizedTolerance; -} - -TEST_F(DivTest, Float) -{ - Shape base_shape = {2, 3, 1, 1}; - - std::vector output_shape = {2, 3, 1, 1}; - - std::vector input1_data{0.3f, 2.3f, 0.9f, 0.5f, 0.8f, 1.1f}; - std::vector input2_data{0.2f, 1.6f, 0.5f, 0.4f, 1.6f, 0.4f}; - std::vector test_outputs{1.5f, 1.4375f, 1.8f, 1.25f, 0.5f, 2.75f}; - - Tensor input1_tensor = - makeInputTensor(base_shape, input1_data, _memory_manager.get()); - Tensor input2_tensor = - makeInputTensor(base_shape, input2_data, _memory_manager.get()); - - Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); - - DivParams params{}; - params.activation = Activation::RELU; - - Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); - kernel.configure(); - _memory_manager->allocate_memory(output_tensor); - kernel.execute(); - - EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(test_outputs, 0.0001f)); - EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); -} - -TEST_F(DivTest, FloatBroadcast) -{ - Shape input1_shape = {1, 3}; - Shape input2_shape = {3, 1}; - - std::vector input1_data{-0.3f, 2.3f, 0.9f}; - std::vector input2_data{0.2f, 1.6f, 0.5f}; - std::vector test_outputs{0.f, 11.5f, 4.5f, 0.f, 1.4375f, 0.5625f, 0.f, 4.6f, 1.8f}; - - Tensor input1_tensor = - makeInputTensor(input1_shape, input1_data, _memory_manager.get()); - Tensor input2_tensor = - makeInputTensor(input2_shape, input2_data, _memory_manager.get()); - - Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); - - DivParams params{}; - params.activation = Activation::RELU; - - Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); - kernel.configure(); - _memory_manager->allocate_memory(output_tensor); - kernel.execute(); - - EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(test_outputs, 0.0001f)); -} - -TEST_F(DivTest, Uint8) +template std::vector checkDivKernel(test_kernel::TestDataBase *test_data_base) { - Shape base_shape = {1, 2, 2, 1}; + MemoryManager memory_manager{}; + RuntimeModule runtime_module{}; + bool dealloc_input = true; - std::vector output_shape = {1, 2, 2, 1}; + // Load model with single op + auto *model_data_raw = reinterpret_cast(test_data_base->get_model_ptr()); + ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input); - std::vector input1_data = {-0.8f, -0.2f, 0.3f, 0.7f}; - std::vector input2_data = {-0.8f, 0.4f, 0.8f, 1.0f}; - std::vector test_outputs{1.0f, 0.f, 0.375f, 0.7f}; + auto *main_runtime_graph = runtime_module.getMainGraph(); + assert(main_runtime_graph->getNumOfInputTensors() == 2); - const float kQuantizedTolerance = GetTolerance(-1.0, 1.0); - - std::pair quant_param = quantizationParams(-1.f, 1.f); - - Tensor input1_tensor = makeInputTensor( - base_shape, quant_param.first, quant_param.second, input1_data, _memory_manager.get()); - Tensor input2_tensor = makeInputTensor( - base_shape, quant_param.first, quant_param.second, input2_data, _memory_manager.get()); + // set left input data + { + auto *input_tensor_data = reinterpret_cast(main_runtime_graph->configureGraphInput(0)); + std::copy(test_data_base->get_input_data_by_index(0).begin(), + test_data_base->get_input_data_by_index(0).end(), input_tensor_data); + } - Tensor output_tensor = - makeOutputTensor(getElementType(), quant_param.first, quant_param.second); + // set right input data + { + auto *input_tensor_data = reinterpret_cast(main_runtime_graph->configureGraphInput(1)); + std::copy(test_data_base->get_input_data_by_index(1).begin(), + test_data_base->get_input_data_by_index(1).end(), input_tensor_data); + } - DivParams params{}; - params.activation = Activation::RELU; + runtime_module.execute(); - Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); - kernel.configure(); - _memory_manager->allocate_memory(output_tensor); - kernel.execute(); + assert(main_runtime_graph->getNumOfOutputTensors() == 1); - EXPECT_THAT(dequantizeTensorData(output_tensor), - FloatArrayNear(test_outputs, kQuantizedTolerance)); - EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + T *output_data = reinterpret_cast(main_runtime_graph->getOutputDataByIndex(0)); + const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T)); + std::vector output_data_vector(output_data, output_data + num_elements); + return output_data_vector; } -template void checkInteger(luci_interpreter::IMemoryManager *memory_manager) +TEST_F(DivTest, Float_P) { - using dtype = typename loco::DataTypeImpl::Type; - Shape base_shape = {2, 3, 1, 2}; - std::vector test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; - - std::vector> test_outputs = {{5, 6, 2, 0, 10, 3, // - 10, 0, 4, 5, 20, 0, // - 0, 0, 0, 2, 0, 0, // - 2, 0, 1, 10, 5, 0, // - 2, 3, 1, 0, 5, 1, // - 18, 20, 7, 0, 37, 10}, - {5, 6, 4, 5, 0, 0, 2, 0, 1, 0, 37, 10}, - {5, 7, 4, 6, 2, 3, 10, 0, 8, 0, 4, 0, - 0, 0, 0, 0, 0, 0, 0, 10, 5, 0, 1, 0, - 0, 0, 5, 9, 1, 1, 0, 0, 37, 50, 7, 10}, - {5, 7, 8, 0, 0, 0, 0, 10, 5, 9, 7, 10}}; - std::vector input1_data{20, 30, 40, -17, -4, -7, 11, -31, 10, 19, 75, 100}; - std::vector input2_data{4, 5, 10, -3, 2, 10}; - for (size_t i = 0; i < test_shapes.size(); ++i) + // No broadcast { - Tensor input1_tensor = makeInputTensor(base_shape, input1_data, memory_manager); - Tensor input2_tensor = makeInputTensor(test_shapes[i], input2_data, memory_manager); - Tensor output_tensor = makeOutputTensor(DType); - - DivParams params{}; - params.activation = Activation::RELU; - - Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); - kernel.configure(); - memory_manager->allocate_memory(output_tensor); - kernel.execute(); - - EXPECT_THAT(extractTensorData(output_tensor), test_outputs[i]) - << "With shape number " << i; + const bool is_with_broadcast = false; + test_kernel::TestDataFloatDiv test_data_kernel(is_with_broadcast); + std::vector output_data_vector = checkDivKernel(&test_data_kernel); + EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear( + test_data_kernel.get_output_data_by_index(0), 0.0001f)); + } + // With broadcast + { + const bool is_with_broadcast = true; + test_kernel::TestDataFloatDiv test_data_kernel(is_with_broadcast); + std::vector output_data_vector = checkDivKernel(&test_data_kernel); + EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear( + test_data_kernel.get_output_data_by_index(0), 0.0001f)); } } -TEST_F(DivTest, SInt64) -{ - checkInteger(_memory_manager.get()); - SUCCEED(); -} - -TEST_F(DivTest, SInt32) -{ - checkInteger(_memory_manager.get()); - SUCCEED(); -} - -TEST_F(DivTest, Input_Output_Type_NEG) +TEST_F(DivTest, Wrong_Input1_Type_NEG) { - Tensor input1_tensor = makeInputTensor({1}, {1.f}, _memory_manager.get()); - Tensor input2_tensor = makeInputTensor({1}, {2}, _memory_manager.get()); - Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); - - DivParams params{}; - params.activation = Activation::RELU; - - Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); - EXPECT_ANY_THROW(kernel.configure()); + test_kernel::NegTestDataInput1WrongTypeDiv test_data_kernel; + + MemoryManager memory_manager{}; + RuntimeModule runtime_module{}; + bool dealloc_input = true; + // Load model with single op + auto *model_data_raw = reinterpret_cast(test_data_kernel.get_model_ptr()); + EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input), + ""); } -TEST_F(DivTest, Invalid_Input_Type_NEG) +TEST_F(DivTest, Wrong_Input2_Type_NEG) { - Tensor input1_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); - Tensor input2_tensor = makeInputTensor({1}, {2}, _memory_manager.get()); - Tensor output_tensor = makeOutputTensor(DataType::U64); - - DivParams params{}; - params.activation = Activation::RELU; - - Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); - kernel.configure(); - _memory_manager->allocate_memory(output_tensor); - EXPECT_ANY_THROW(kernel.execute()); + test_kernel::NegTestDataInput2WrongTypeDiv test_data_kernel; + + MemoryManager memory_manager{}; + RuntimeModule runtime_module{}; + bool dealloc_input = true; + // Load model with single op + auto *model_data_raw = reinterpret_cast(test_data_kernel.get_model_ptr()); + EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input), + ""); } -TEST_F(DivTest, Invalid_Output_Type_NEG) -{ - Tensor input1_tensor = makeInputTensor({1}, {1}, _memory_manager.get()); - Tensor input2_tensor = makeInputTensor({1}, {2}, _memory_manager.get()); - Tensor output_tensor = makeOutputTensor(DataType::S64); - - DivParams params{}; - params.activation = Activation::RELU; - - Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); - EXPECT_ANY_THROW(kernel.configure()); -} +// TODO: add tests for inplace optimizations for all types } // namespace -} // namespace kernels } // namespace luci_interpreter