From 2caa2a883f42c6e034a9539498a8a938f7a67039 Mon Sep 17 00:00:00 2001
From: Artem Balyshev <a.balyshev@samsung.com>
Date: Fri, 30 Jun 2023 13:43:36 +0300
Subject: [PATCH] [onert-micro] Support Div kernel

This PR adds supporting of Div kernel.

ONE-DCO-1.0-Signed-off-by: Artem Balyshev <a.balyshev@samsung.com>
---
 .../test_models/div/FloatDivKernel.h          | 153 +++++++++++
 .../test_models/div/NegDivKernel.h            | 149 +++++++++++
 .../test_models/div/TestDataDivBase.h         |  68 +++++
 .../pal/mcu/KernelsToBuild.lst                |   1 +
 onert-micro/luci-interpreter/pal/mcu/PALDiv.h | 115 +++++++++
 .../luci-interpreter/src/kernels/Div.cpp      | 158 +++---------
 .../luci-interpreter/src/kernels/Div.h        |  49 ----
 .../luci-interpreter/src/kernels/Div.test.cpp | 244 +++++-------------
 8 files changed, 595 insertions(+), 342 deletions(-)
 create mode 100644 onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/FloatDivKernel.h
 create mode 100644 onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/NegDivKernel.h
 create mode 100644 onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/TestDataDivBase.h
 create mode 100644 onert-micro/luci-interpreter/pal/mcu/PALDiv.h
 delete mode 100644 onert-micro/luci-interpreter/src/kernels/Div.h

diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/FloatDivKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/FloatDivKernel.h
new file mode 100644
index 00000000000..ed9408a1647
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/FloatDivKernel.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_DIV_KERNEL_FLOAT_H
+#define LUCI_INTERPRETER_TEST_MODELS_DIV_KERNEL_FLOAT_H
+
+#include "TestDataDivBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace div_float_with_broadcasting
+{
+
+/*
+ * Div Kernel:
+ *
+ * Input_1(2, 5)   Input_2(2, 1)
+ *       \             /
+ *     Div(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x54, 0x01, 0x00, 0x00, 0x70, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {8.432024,  5.4664106,  16.856224,  -10.004156, -14.128681,
+                                        12.695552, -7.5779333, -1.1460792, 15.574873,  -12.670321};
+const std::vector<float> input2_data = {-2.0361109, -9.528288};
+const std::vector<float> reference_output_data = {-4.14124,  -2.6847312, -8.278638, 4.913365,
+                                                  6.939053,  -1.3324064, 0.795309,  0.120281756,
+                                                  -1.634593, 1.3297584};
+
+} // namespace div_float_with_broadcasting
+
+namespace div_float_no_broadcasting
+{
+/*
+ * Div Kernel:
+ *
+ * Input_1(2, 5)   Input_2(2, 5)
+ *       \             /
+ *     Div(no broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x54, 0x01, 0x00, 0x00, 0x70, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+std::vector<float> input1_data = {3.563036,  13.645134, 0.427146,   11.032923, 0.4189046,
+                                  15.737275, 7.7726707, 0.75059056, -7.833488, 3.0679407};
+std::vector<float> input2_data = {-0.62832826, 7.937863,   -14.899745, 0.2819096,  -5.8306913,
+                                  8.6010685,   -10.391579, -3.312385,  -11.495937, 5.5657125};
+std::vector<float> reference_output_data = {-5.67066,    1.7189934, -0.028668007, 39.136383,
+                                            -0.07184476, 1.8296884, -0.74797785,  -0.22660124,
+                                            0.6814136,   0.55122155};
+
+} // namespace div_float_no_broadcasting
+
+class TestDataFloatDiv : public TestDataDivBase<float>
+{
+public:
+  explicit TestDataFloatDiv(bool is_with_broadcast) : TestDataDivBase<float>(is_with_broadcast)
+  {
+    if (is_with_broadcast)
+    {
+      _input1_data = div_float_with_broadcasting::input1_data;
+      _input2_data = div_float_with_broadcasting::input2_data;
+      _reference_output_data = div_float_with_broadcasting::reference_output_data;
+      _test_kernel_model_circle = div_float_with_broadcasting::test_kernel_model_circle;
+    }
+    else
+    {
+      _input1_data = div_float_no_broadcasting::input1_data;
+      _input2_data = div_float_no_broadcasting::input2_data;
+      _reference_output_data = div_float_no_broadcasting::reference_output_data;
+      _test_kernel_model_circle = div_float_no_broadcasting::test_kernel_model_circle;
+    }
+  }
+
+  ~TestDataFloatDiv() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_DIV_KERNEL_FLOAT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/NegDivKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/NegDivKernel.h
new file mode 100644
index 00000000000..df42f95fe49
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/NegDivKernel.h
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_DIV_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_DIV_KERNEL_H
+
+#include "TestDataDivBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace input_1_wrong_type
+{
+
+/*
+ * Div Kernel with input type mismatch:
+ *
+ * Input_1(2, 5) - Int32   Input_2(2, 1) - Float
+ *       \             /
+ *     Div(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x64, 0x01, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace input_1_wrong_type
+
+namespace input_2_wrong_type
+{
+
+/*
+ * DIV Kernel with input type mismatch:
+ *
+ * Input_1(2, 5)- Float   Input_2(2, 1) - Int32
+ *       \             /
+ *     Div(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x64, 0x01, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace input_2_wrong_type
+
+class NegTestDataInput1WrongTypeDiv : public NegTestDataBase
+{
+public:
+  NegTestDataInput1WrongTypeDiv()
+  {
+    _test_kernel_model_circle = input_1_wrong_type::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInput1WrongTypeDiv() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataInput2WrongTypeDiv : public NegTestDataBase
+{
+public:
+  NegTestDataInput2WrongTypeDiv()
+  {
+    _test_kernel_model_circle = input_2_wrong_type::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInput2WrongTypeDiv() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_DIV_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/TestDataDivBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/TestDataDivBase.h
new file mode 100644
index 00000000000..e4894e1aee3
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/TestDataDivBase.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_DIV_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_DIV_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataDivBase : public TestDataBase<T>
+{
+public:
+  explicit TestDataDivBase(bool)
+  {
+    // Do nothing
+  }
+
+  TestDataDivBase() = delete;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input1_data;
+      case 1:
+        return _input2_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input1_data;
+  std::vector<T> _input2_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_DIV_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst b/onert-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst
index bfa3457b113..37ae2e50442 100644
--- a/onert-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst
+++ b/onert-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst
@@ -1,6 +1,7 @@
 REGISTER_KERNEL(ABS, Abs)
 REGISTER_KERNEL(ADD, Add)
 REGISTER_KERNEL(AVERAGE_POOL_2D, AveragePool2D)
+REGISTER_KERNEL(DIV, Div)
 REGISTER_KERNEL(FULLY_CONNECTED, FullyConnected)
 REGISTER_KERNEL(CONV_2D, Conv2D)
 REGISTER_KERNEL(LOGISTIC, Logistic)
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALDiv.h b/onert-micro/luci-interpreter/pal/mcu/PALDiv.h
new file mode 100644
index 00000000000..cf84a1007e0
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/mcu/PALDiv.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DIV_H
+#define LUCI_INTERPRETER_PAL_DIV_H
+
+#include "Params.h"
+#include "PALUtils.h"
+#include "ProcessBroadcastShapes.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+inline void Div(const ArithmeticParams &params, const int flat_size, const T *input1_data,
+                const T *input2_data, T *output_data)
+{
+  T activation_min, activation_max;
+  getActivationParams(params, &activation_min, &activation_max);
+
+  for (int i = 0; i < flat_size; ++i)
+    output_data[i] =
+      std::min(std::max(input1_data[i] / input2_data[i], activation_min), activation_max);
+}
+
+template <typename T>
+inline void DivScalar(const ArithmeticParams &params, const int flat_size, const T *input_data,
+                      const T scalar_value, T *output_data)
+{
+  T activation_min, activation_max;
+  getActivationParams(params, &activation_min, &activation_max);
+
+  for (int i = 0; i < flat_size; ++i)
+    output_data[i] =
+      std::min(std::max(input_data[i] / scalar_value, activation_min), activation_max);
+}
+
+template <typename T>
+inline void
+BroadcastDiv4DSlow(const ArithmeticParams &params,
+                   const luci_interpreter::RuntimeShape &input1_shape, const T *input1_data,
+                   const luci_interpreter::RuntimeShape &input2_shape, const T *input2_data,
+                   const luci_interpreter::RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = input1_shape.flatSize();
+
+  if (params.broadcast_category == BroadcastableOpCategory::kScalarFirstBroadcast)
+  {
+    return DivScalar(params, flat_size, input2_data, input1_data[0], output_data);
+  }
+  else if (params.broadcast_category == BroadcastableOpCategory::kScalarSecondBroadcast)
+  {
+    return DivScalar(params, flat_size, input1_data, input2_data[0], output_data);
+  }
+
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
+  const luci_interpreter::RuntimeShape extended_output_shape =
+    luci_interpreter::RuntimeShape::extendedShape(4, output_shape);
+
+  T activation_min, activation_max;
+  getActivationParams(params, &activation_min, &activation_max);
+
+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
+  // col, channel), with extents (batches, height, width, depth), with the
+  // trailing dimension changing most rapidly (channels has the smallest stride,
+  // typically 1 element).
+  //
+  // In generated C code, we store arrays with the dimensions reversed. The
+  // first dimension has smallest stride.
+  //
+  // We name our variables by their Tensorflow convention, but generate C code
+  // nesting loops such that the innermost loop has the smallest stride for the
+  // best cache behavior.
+  for (int b = 0; b < extended_output_shape.dims(0); ++b)
+  {
+    for (int y = 0; y < extended_output_shape.dims(1); ++y)
+    {
+      for (int x = 0; x < extended_output_shape.dims(2); ++x)
+      {
+        for (int c = 0; c < extended_output_shape.dims(3); ++c)
+        {
+          const int output_data_offset =
+            ((b * extended_output_shape.dims(1) + y) * extended_output_shape.dims(2) + x) *
+              extended_output_shape.dims(3) +
+            c;
+
+          output_data[output_data_offset] =
+            std::min(std::max(input1_data[subscriptToIndex(desc1, b, y, x, c)] /
+                                input2_data[subscriptToIndex(desc2, b, y, x, c)],
+                              activation_min),
+                     activation_max);
+        }
+      }
+    }
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DIV_H
diff --git a/onert-micro/luci-interpreter/src/kernels/Div.cpp b/onert-micro/luci-interpreter/src/kernels/Div.cpp
index efa90f8d374..f8a469061d8 100644
--- a/onert-micro/luci-interpreter/src/kernels/Div.cpp
+++ b/onert-micro/luci-interpreter/src/kernels/Div.cpp
@@ -14,140 +14,64 @@
  * limitations under the License.
  */
 
-#include "kernels/Div.h"
-
+#include "Builders.h"
 #include "kernels/Utils.h"
 
-#include <tensorflow/lite/kernels/internal/reference/div.h>
-#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+#include "kernels/BinaryOpCommon.h"
 
-namespace luci_interpreter
-{
-namespace kernels
-{
+#include "PALDiv.h"
 
-Div::Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams &params)
-  : KernelWithParams<DivParams>({input1, input2}, {output}, params)
+namespace luci_interpreter
 {
-}
 
-void Div::configure()
+// TODO: reduce code duplication with Mul
+void configure_kernel_CircleDiv(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
 {
-  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
-  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
 
-  // TODO: enable it only if kernel with dynamic shapes
-  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
+                         Tensor::element_type(kernel.input2()));
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
+                         Tensor::element_type(kernel.input2()));
 }
 
-void Div::execute() const
+void execute_kernel_CircleDiv(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
 {
-  switch (input1()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::S64:
-      evalInteger<int64_t>();
-      break;
-    case DataType::S32:
-      evalInteger<int32_t>();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    default:
-      assert(false && "Unsupported type.");
-  }
-}
-
-void Div::evalFloat() const
-{
-  tflite::ArithmeticParams params{};
-  fillArithmeticActivationRange<float>(params, _params.activation);
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
 
-  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-    getTensorShape(input1()), getTensorShape(input2()), &params);
-
-  if (need_broadcast)
-  {
-    tflite::reference_ops::BroadcastDivSlow(
-      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
-      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
-  }
-  else
-  {
-    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<float>(input1()),
-                               getTensorShape(input2()), getTensorData<float>(input2()),
-                               getTensorShape(output()), getTensorData<float>(output()));
-  }
-}
+  const auto *options = cur_op->builtin_options_as_DivOptions();
 
-template <typename T> void Div::evalInteger() const
-{
-  tflite::ArithmeticParams params{};
-  fillArithmeticActivationRange<T>(params, _params.activation);
+  luci_interpreter::RuntimeShape input_shape1 =
+    kernels::getTensorRuntimeShape(kernel.input1(), runtime_graph);
+  luci_interpreter::RuntimeShape input_shape2 =
+    kernels::getTensorRuntimeShape(kernel.input2(), runtime_graph);
 
-  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-    getTensorShape(input1()), getTensorShape(input2()), &params);
+  bool is_inplace = runtime_graph->is_inplace_op(cur_op);
 
-  if (need_broadcast)
+  switch (Tensor::element_type(kernel.input1()))
   {
-    tflite::reference_ops::BroadcastDivSlow(
-      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
-      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
-  }
-  else
-  {
-    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<T>(input1()),
-                               getTensorShape(input2()), getTensorData<T>(input2()),
-                               getTensorShape(output()), getTensorData<T>(output()));
-  }
-}
-
-void Div::evalQuantized() const
-{
-  const auto input1_scale = static_cast<double>(input1()->scale());
-  const auto input2_scale = static_cast<double>(input2()->scale());
-  const auto output_scale = static_cast<double>(output()->scale());
-
-  const double real_output_multiplier = input1_scale / (input2_scale * output_scale);
-
-  int32_t output_multiplier{};
-  int output_shift{};
-
-  quantizeMultiplier(real_output_multiplier, &output_multiplier, &output_shift);
-
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
-  tflite::ArithmeticParams params{};
-
-  params.input1_offset = -input1()->zero_point(); // Note the '-'.
-  params.input2_offset = -input2()->zero_point(); // Note the '-'.
-  params.output_offset = output()->zero_point();
-  params.output_multiplier = output_multiplier;
-  params.output_shift = output_shift;
-  params.quantized_activation_min = activation_min;
-  params.quantized_activation_max = activation_max;
-
-  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-    getTensorShape(input1()), getTensorShape(input2()), &params);
-
-  if (need_broadcast)
-  {
-    tflite::reference_ops::BroadcastDivSlow(
-      params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
-      getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
-  }
-  else
-  {
-    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
-                               getTensorShape(input2()), getTensorData<uint8_t>(input2()),
-                               getTensorShape(output()), getTensorData<uint8_t>(output()));
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+    {
+      auto tiso_func = luci_interpreter_pal::Div<float>;
+      auto broadcast_tiso_func = luci_interpreter_pal::BroadcastDiv4DSlow<float>;
+      if (is_inplace)
+      {
+        kernels::evalTISOInplaceKernel<float>(tiso_func, broadcast_tiso_func, &kernel, options,
+                                              std::move(input_shape1), std::move(input_shape2));
+      }
+      else
+      {
+        kernels::TISOData kernel_data = kernel.readData();
+        kernels::evalTISOKernel<float>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,
+                                       options, std::move(input_shape1), std::move(input_shape2));
+      }
+    }
+    break;
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type.");
   }
 }
 
-} // namespace kernels
 } // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Div.h b/onert-micro/luci-interpreter/src/kernels/Div.h
deleted file mode 100644
index c1bf3e10bd5..00000000000
--- a/onert-micro/luci-interpreter/src/kernels/Div.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_KERNELS_DIV_H
-#define LUCI_INTERPRETER_KERNELS_DIV_H
-
-#include "core/Kernel.h"
-#include "core/KernelParams.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-class Div : public KernelWithParams<DivParams>
-{
-public:
-  Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams &params);
-
-  const Tensor *input1() const { return _inputs[0]; }
-  const Tensor *input2() const { return _inputs[1]; }
-  Tensor *output() const { return _outputs[0]; }
-
-  void configure() override;
-  void execute() const override;
-
-private:
-  void evalFloat() const;
-  template <typename T> void evalInteger() const;
-  void evalQuantized() const;
-};
-
-} // namespace kernels
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_KERNELS_DIV_H
diff --git a/onert-micro/luci-interpreter/src/kernels/Div.test.cpp b/onert-micro/luci-interpreter/src/kernels/Div.test.cpp
index 85cd8b90aff..d6014397ca5 100644
--- a/onert-micro/luci-interpreter/src/kernels/Div.test.cpp
+++ b/onert-micro/luci-interpreter/src/kernels/Div.test.cpp
@@ -15,14 +15,14 @@
  * limitations under the License.
  */
 
-#include "kernels/Div.h"
 #include "kernels/TestUtils.h"
-#include "luci_interpreter/TestMemoryManager.h"
+#include "luci_interpreter/test_models/div/FloatDivKernel.h"
+#include "luci_interpreter/test_models/div/NegDivKernel.h"
+
+#include "loader/ModuleLoader.h"
 
 namespace luci_interpreter
 {
-namespace kernels
-{
 namespace
 {
 
@@ -30,201 +30,93 @@ using namespace testing;
 
 class DivTest : public ::testing::Test
 {
-protected:
-  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
-
-  std::unique_ptr<IMemoryManager> _memory_manager;
+  // Do nothing
 };
 
-float GetTolerance(float min, float max)
-{
-  const float kQuantizedStep = (max - min) / 255.0f;
-  const float kQuantizedTolerance = 2.0f * kQuantizedStep + kQuantizedStep * kQuantizedStep;
-  return kQuantizedTolerance;
-}
-
-TEST_F(DivTest, Float)
-{
-  Shape base_shape = {2, 3, 1, 1};
-
-  std::vector<int32_t> output_shape = {2, 3, 1, 1};
-
-  std::vector<float> input1_data{0.3f, 2.3f, 0.9f, 0.5f, 0.8f, 1.1f};
-  std::vector<float> input2_data{0.2f, 1.6f, 0.5f, 0.4f, 1.6f, 0.4f};
-  std::vector<float> test_outputs{1.5f, 1.4375f, 1.8f, 1.25f, 0.5f, 2.75f};
-
-  Tensor input1_tensor =
-    makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
-  Tensor input2_tensor =
-    makeInputTensor<DataType::FLOAT32>(base_shape, input2_data, _memory_manager.get());
-
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
-  DivParams params{};
-  params.activation = Activation::RELU;
-
-  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
-  kernel.configure();
-  _memory_manager->allocate_memory(output_tensor);
-  kernel.execute();
-
-  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
-}
-
-TEST_F(DivTest, FloatBroadcast)
-{
-  Shape input1_shape = {1, 3};
-  Shape input2_shape = {3, 1};
-
-  std::vector<float> input1_data{-0.3f, 2.3f, 0.9f};
-  std::vector<float> input2_data{0.2f, 1.6f, 0.5f};
-  std::vector<float> test_outputs{0.f, 11.5f, 4.5f, 0.f, 1.4375f, 0.5625f, 0.f, 4.6f, 1.8f};
-
-  Tensor input1_tensor =
-    makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get());
-  Tensor input2_tensor =
-    makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get());
-
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
-  DivParams params{};
-  params.activation = Activation::RELU;
-
-  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
-  kernel.configure();
-  _memory_manager->allocate_memory(output_tensor);
-  kernel.execute();
-
-  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
-}
-
-TEST_F(DivTest, Uint8)
+template <typename T> std::vector<T> checkDivKernel(test_kernel::TestDataBase<T> *test_data_base)
 {
-  Shape base_shape = {1, 2, 2, 1};
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
 
-  std::vector<int32_t> output_shape = {1, 2, 2, 1};
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
 
-  std::vector<float> input1_data = {-0.8f, -0.2f, 0.3f, 0.7f};
-  std::vector<float> input2_data = {-0.8f, 0.4f, 0.8f, 1.0f};
-  std::vector<float> test_outputs{1.0f, 0.f, 0.375f, 0.7f};
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 2);
 
-  const float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
-
-  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.f, 1.f);
-
-  Tensor input1_tensor = makeInputTensor<DataType::U8>(
-    base_shape, quant_param.first, quant_param.second, input1_data, _memory_manager.get());
-  Tensor input2_tensor = makeInputTensor<DataType::U8>(
-    base_shape, quant_param.first, quant_param.second, input2_data, _memory_manager.get());
+  // set left input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
 
-  Tensor output_tensor =
-    makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+  // set right input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(1));
+    std::copy(test_data_base->get_input_data_by_index(1).begin(),
+              test_data_base->get_input_data_by_index(1).end(), input_tensor_data);
+  }
 
-  DivParams params{};
-  params.activation = Activation::RELU;
+  runtime_module.execute();
 
-  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
-  kernel.configure();
-  _memory_manager->allocate_memory(output_tensor);
-  kernel.execute();
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
 
-  EXPECT_THAT(dequantizeTensorData(output_tensor),
-              FloatArrayNear(test_outputs, kQuantizedTolerance));
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
 }
 
-template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager)
+TEST_F(DivTest, Float_P)
 {
-  using dtype = typename loco::DataTypeImpl<DType>::Type;
-  Shape base_shape = {2, 3, 1, 2};
-  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
-
-  std::vector<std::vector<dtype>> test_outputs = {{5,  6,  2, 0,  10, 3, //
-                                                   10, 0,  4, 5,  20, 0, //
-                                                   0,  0,  0, 2,  0,  0, //
-                                                   2,  0,  1, 10, 5,  0, //
-                                                   2,  3,  1, 0,  5,  1, //
-                                                   18, 20, 7, 0,  37, 10},
-                                                  {5, 6, 4, 5, 0, 0, 2, 0, 1, 0, 37, 10},
-                                                  {5, 7, 4, 6, 2, 3, 10, 0,  8,  0,  4, 0,
-                                                   0, 0, 0, 0, 0, 0, 0,  10, 5,  0,  1, 0,
-                                                   0, 0, 5, 9, 1, 1, 0,  0,  37, 50, 7, 10},
-                                                  {5, 7, 8, 0, 0, 0, 0, 10, 5, 9, 7, 10}};
-  std::vector<dtype> input1_data{20, 30, 40, -17, -4, -7, 11, -31, 10, 19, 75, 100};
-  std::vector<dtype> input2_data{4, 5, 10, -3, 2, 10};
-  for (size_t i = 0; i < test_shapes.size(); ++i)
+  // No broadcast
   {
-    Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
-    Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
-    Tensor output_tensor = makeOutputTensor(DType);
-
-    DivParams params{};
-    params.activation = Activation::RELU;
-
-    Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
-    kernel.configure();
-    memory_manager->allocate_memory(output_tensor);
-    kernel.execute();
-
-    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
-      << "With shape number " << i;
+    const bool is_with_broadcast = false;
+    test_kernel::TestDataFloatDiv test_data_kernel(is_with_broadcast);
+    std::vector<float> output_data_vector = checkDivKernel(&test_data_kernel);
+    EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                      test_data_kernel.get_output_data_by_index(0), 0.0001f));
+  }
+  // With broadcast
+  {
+    const bool is_with_broadcast = true;
+    test_kernel::TestDataFloatDiv test_data_kernel(is_with_broadcast);
+    std::vector<float> output_data_vector = checkDivKernel(&test_data_kernel);
+    EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                      test_data_kernel.get_output_data_by_index(0), 0.0001f));
   }
 }
 
-TEST_F(DivTest, SInt64)
-{
-  checkInteger<loco::DataType::S64>(_memory_manager.get());
-  SUCCEED();
-}
-
-TEST_F(DivTest, SInt32)
-{
-  checkInteger<loco::DataType::S32>(_memory_manager.get());
-  SUCCEED();
-}
-
-TEST_F(DivTest, Input_Output_Type_NEG)
+TEST_F(DivTest, Wrong_Input1_Type_NEG)
 {
-  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
-  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
-  DivParams params{};
-  params.activation = Activation::RELU;
-
-  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
-  EXPECT_ANY_THROW(kernel.configure());
+  test_kernel::NegTestDataInput1WrongTypeDiv test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
 }
 
-TEST_F(DivTest, Invalid_Input_Type_NEG)
+TEST_F(DivTest, Wrong_Input2_Type_NEG)
 {
-  Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
-  Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::U64);
-
-  DivParams params{};
-  params.activation = Activation::RELU;
-
-  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
-  kernel.configure();
-  _memory_manager->allocate_memory(output_tensor);
-  EXPECT_ANY_THROW(kernel.execute());
+  test_kernel::NegTestDataInput2WrongTypeDiv test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
 }
 
-TEST_F(DivTest, Invalid_Output_Type_NEG)
-{
-  Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
-  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::S64);
-
-  DivParams params{};
-  params.activation = Activation::RELU;
-
-  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
-  EXPECT_ANY_THROW(kernel.configure());
-}
+// TODO: add tests for inplace optimizations for all types
 
 } // namespace
-} // namespace kernels
 } // namespace luci_interpreter