[onert] Implement RoPELayer in backend cpu

This commit implements RoPE Layer in backend cpu ONE-DCO-1.0-Signed-off-by: youngsik kim <[email protected]>
Samsung · Oct 14, 2024 · c468776 · c468776
1 parent 0665747
commit c468776
Show file tree

Hide file tree

Showing 6 changed files with 202 additions and 0 deletions.
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -49,6 +49,7 @@
 #include "ops/ReshapeLayer.h"
 #include "ops/ResizeBilinearLayer.h"
 #include "ops/ReverseLayer.h"
+#include "ops/RoPELayer.h"
 #include "ops/SelectLayer.h"
 #include "ops/ShapeLayer.h"
 #include "ops/SliceLayer.h"
@@ -1562,6 +1563,26 @@ void KernelGenerator::visit(const ir::operation::LSTM &node)
   _return_fn = std::move(fn);
 }
 
+void KernelGenerator::visit(const ir::operation::RoPE &node)
+{
+  const auto input_index{node.getInputs().at(ir::operation::RoPE::Input::INPUT)};
+  const auto sin_table{node.getInputs().at(ir::operation::RoPE::Input::SIN_TABLE)};
+  const auto cos_table{node.getInputs().at(ir::operation::RoPE::Input::COS_TABLE)};
+  const auto output_index{node.getOutputs().at(ir::operation::RoPE::Output::OUTPUT)};
+
+  auto mode = ops::getRoPEMode(node.param().mode);
+
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto sin_tensor = _tensor_reg->getPortableTensor(sin_table);
+  auto cos_tensor = _tensor_reg->getPortableTensor(cos_table);
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+
+  auto fn = std::make_unique<ops::RoPELayer>();
+
+  fn->configure(input_tensor, sin_tensor, cos_tensor, mode, output_tensor);
+  _return_fn = std::move(fn);
+}
+
 } // namespace cpu
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -82,6 +82,7 @@ class KernelGenerator : public basic::KernelGeneratorBase
   void visit(const ir::operation::ResizeBilinear &node) override;
   void visit(const ir::operation::Reverse &) override;
   void visit(const ir::operation::RmsNorm &) override;
+  void visit(const ir::operation::RoPE &) override;
   void visit(const ir::operation::Select &) override;
   void visit(const ir::operation::Shape &) override;
   void visit(const ir::operation::Slice &) override;

diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.cc b/runtime/onert/backend/cpu/ops/OperationUtils.cc
@@ -308,6 +308,20 @@ std::vector<int32_t> getReducerAxes(const IPortableTensor *axes)
   return ret;
 }
 
+nnfw::cker::RoPEMode getRoPEMode(ir::operation::RoPE::RoPEMode rope_mode)
+{
+  switch (rope_mode)
+  {
+    case ir::operation::RoPE::RoPEMode::GPT_NEOX:
+      return nnfw::cker::RoPEMode::kGptNeox;
+    case ir::operation::RoPE::RoPEMode::GPT_J:
+      return nnfw::cker::RoPEMode::kGptJ;
+    default:
+      throw std::runtime_error("Wrong rope mode.");
+      break;
+  }
+}
+
 } // namespace ops
 } // namespace cpu
 } // namespace backend

diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.h b/runtime/onert/backend/cpu/ops/OperationUtils.h
@@ -21,6 +21,7 @@
 #include <ir/DataType.h>
 #include <ir/Operand.h>
 #include <ir/Padding.h>
+#include <ir/operation/RoPE.h>
 #include <util/CalculateActivationRange.h>
 
 #include <cker/Shape.h>
@@ -169,6 +170,8 @@ nnfw::cker::PaddingType getPaddingType(ir::PaddingType ir_padding_type);
 
 std::vector<int32_t> getReducerAxes(const IPortableTensor *axes);
 
+nnfw::cker::RoPEMode getRoPEMode(ir::operation::RoPE::RoPEMode rope_mode);
+
 template <typename T> const T *getBuffer(const IPortableTensor *tensor)
 {
   return reinterpret_cast<const T *>(tensor->buffer());

diff --git a/runtime/onert/backend/cpu/ops/RoPELayer.cc b/runtime/onert/backend/cpu/ops/RoPELayer.cc
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "RoPELayer.h"
+
+#include <cker/operation/RoPE.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+RoPELayer::RoPELayer()
+  : _input(nullptr), _sin(nullptr), _cos(nullptr), _mode(nnfw::cker::RoPEMode::kGptNeox),
+    _output(nullptr)
+{
+  // DO NOTHING
+}
+
+RoPELayer::~RoPELayer() = default;
+
+void RoPELayer::configure(const IPortableTensor *input, const IPortableTensor *sin,
+                          const IPortableTensor *cos, nnfw::cker::RoPEMode mode,
+                          IPortableTensor *output)
+{
+  assert(input != nullptr);
+  assert(sin != nullptr);
+  assert(cos != nullptr);
+  assert(output != nullptr);
+
+  _input = input;
+  _sin = sin;
+  _cos = cos;
+  _mode = mode;
+  _output = output;
+}
+
+template <typename T> void RoPELayer::rope()
+{
+  auto input_shape = _input->getShape();
+  assert(input_shape.rank() == 4);
+
+  nnfw::cker::RoPE(_mode, getShape(_input), getBuffer<T>(_input), getShape(_sin),
+                   getBuffer<T>(_sin), getShape(_cos), getBuffer<T>(_cos), getShape(_output),
+                   getBuffer<T>(_output));
+}
+
+void RoPELayer::run()
+{
+  switch (_input->data_type())
+  {
+    case OperandType::FLOAT32:
+      rope<float>();
+      break;
+    case OperandType::INT32:
+      rope<int32_t>();
+      break;
+    case OperandType::INT64:
+      rope<int64_t>();
+      break;
+    case OperandType::QUANT_UINT8_ASYMM:
+      rope<uint8_t>();
+      break;
+    case OperandType::QUANT_INT8_ASYMM:
+      rope<int8_t>();
+      break;
+    default:
+      throw std::runtime_error("RoPE: unsupported data type");
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/RoPELayer.h b/runtime/onert/backend/cpu/ops/RoPELayer.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_ROPE_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ROPE_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace nnfw
+{
+namespace cker
+{
+class RoPE;
+}
+} // namespace nnfw
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class RoPELayer : public ::onert::exec::IFunction
+{
+public:
+  RoPELayer();
+  ~RoPELayer();
+
+public:
+  template <typename T> void rope();
+
+  void configure(const IPortableTensor *input, const IPortableTensor *sin,
+                 const IPortableTensor *cos, const nnfw::cker::RoPEMode mode,
+                 IPortableTensor *output);
+
+  void run() override;
+
+private:
+  const IPortableTensor *_input;
+  const IPortableTensor *_sin;
+  const IPortableTensor *_cos;
+
+  nnfw::cker::RoPEMode _mode;
+  IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_ROPE_LAYER_H__