Merge pull request #2758 from 946166920/master

MNN_NPU IR operators have modified to support more models
alibaba · Feb 20, 2024 · 5607201 · 5607201
2 parents 8686994 + 241289e
commit 5607201
Show file tree

Hide file tree

Showing 61 changed files with 1,916 additions and 838 deletions.
diff --git a/source/backend/hiai/backend/NPUBackend.cpp b/source/backend/hiai/backend/NPUBackend.cpp
@@ -231,8 +231,20 @@ namespace MNN {
             if (isInput && mGrapMap.find(inputIndex) == mGrapMap.end()) {
                 auto opName = string("input") + to_string(inputIndex);
                 shared_ptr<hiai::op::Data> data(new hiai::op::Data(opName));
-                auto shape = tensorShapeFormat(inputTensor);
-                ge::TensorDesc desc(ge::Shape(shape), ge::FORMAT_NCHW, ge::DT_FLOAT);
+                vector<int64_t> dims;
+                for(int32_t i = 0; i < inputTensor->buffer().dimensions; i++) {
+                    dims.push_back(inputTensor->buffer().dim[i].extent);
+                }
+                ge::TensorDesc desc(ge::Shape(dims), ge::FORMAT_NCHW, ge::DT_FLOAT);
+                if (TensorUtils::getDescribe(inputTensor)->dimensionFormat == MNN_DATA_FORMAT::MNN_DATA_FORMAT_NHWC) {
+                    desc.SetFormat(ge::FORMAT_NHWC);
+                }
+                if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 32) {
+                    desc.SetDataType(ge::DT_INT32);
+                }
+                if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 64) {
+                    desc.SetDataType(ge::DT_INT64);
+                }
                 data->update_input_desc_x(desc);
                 // map
                 vector<pair<shared_ptr<ge::Operator>, string>> ops;
@@ -248,19 +260,25 @@ namespace MNN {
                 shared_ptr<hiai::op::Const> mConst(new hiai::op::Const(opName));
                 {
                     ge::TensorPtr filter = std::make_shared<ge::Tensor>();
-                    auto shape = tensorShapeFormat(inputTensor);
-                    ge::TensorDesc fdesc(ge::Shape(shape), ge::FORMAT_NCHW, ge::DT_FLOAT);
-                    filter->SetTensorDesc(fdesc);
-                    if (TensorUtils::getDescribe(inputTensor)->dimensionFormat == MNN::MNN_DATA_FORMAT_NCHW) {
-                        filter->SetData((uint8_t *)inputTensor->host<float>(), inputTensor->elementSize() * sizeof(float));
-                        mConst->set_attr_value(filter);
-                    } else {
-                        vector<float> temp(inputTensor->elementSize(), 0);
-                        NHWC2NCHW((float*)inputTensor->host<float>(), (float*)temp.data(), shape[0], shape[1], shape[2]*shape[3]);
-                        filter->SetData((uint8_t *)temp.data(), temp.size() * sizeof(float));
-                        mConst->set_attr_value(filter);
+                    vector<int64_t> dims;
+                    for(int32_t i = 0; i < inputTensor->buffer().dimensions; i++) {
+                        dims.push_back(inputTensor->buffer().dim[i].extent);
+                    }
+                    ge::TensorDesc fdesc(ge::Shape(dims), ge::FORMAT_NCHW, ge::DT_FLOAT);
+                    if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 32) {
+                        fdesc.SetDataType(ge::DT_INT32);
+                    }
+                    if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 64) {
+                        fdesc.SetDataType(ge::DT_INT64);
                     }
+                    filter->SetTensorDesc(fdesc);
                     filter->SetData((uint8_t *)inputTensor->host<float>(), inputTensor->elementSize() * sizeof(float));
+                    if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 32) {
+                        filter->SetData((uint8_t *)inputTensor->host<int32_t>(), inputTensor->elementSize() * sizeof(int32_t));
+                    }
+                    if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 64) {
+                        filter->SetData((uint8_t *)inputTensor->host<int64_t>(), inputTensor->elementSize() * sizeof(int64_t));
+                    }
                     mConst->set_attr_value(filter);
                 }
                 vector<pair<shared_ptr<ge::Operator>, string>> ops;
@@ -339,14 +357,7 @@ namespace MNN {
             auto index = mInputMap.find((unsigned long)(const_cast<Tensor*>(dstTensor)));
             MNN_ASSERT(index != mInputMap.end());
             shared_ptr<hiai::INDTensorBuffer> input = inputTensors[index->second];
-            if(TensorUtils::getDescribe(srcTensor)->dimensionFormat == MNN_DATA_FORMAT_NCHW 
-             ||TensorUtils::getDescribe(srcTensor)->dimensionFormat == MNN_DATA_FORMAT_NC4HW4 ) {
-                memcpy(input->GetData(), srcTensor->host<float>(), (size_t)input->GetSize());
-            } else {
-                shared_ptr<Tensor> tmpTensor(new Tensor(dstTensor, Tensor::DimensionType::CAFFE, true));
-                tensorConvert(srcTensor, tmpTensor.get());
-                memcpy(input->GetData(), tmpTensor->host<float>(), (size_t)tmpTensor->size());
-            }
+            memcpy(input->GetData(), srcTensor->host<void>(), (size_t)input->GetSize());
         } else if(isOutputCopy){
             int index;
             bool flag = false;
@@ -361,18 +372,8 @@ namespace MNN {
                 return;
             }
             shared_ptr<hiai::INDTensorBuffer> output = outputTensors[index];
-            if(TensorUtils::getDescribe(dstTensor)->dimensionFormat == MNN_DATA_FORMAT_NCHW 
-             ||TensorUtils::getDescribe(dstTensor)->dimensionFormat == MNN_DATA_FORMAT_NC4HW4 ) {
-                memcpy(dstTensor->buffer().host, output->GetData(), (size_t)output->GetSize());
-            } else {
-                auto tmpShape = tensorShapeFormat(srcTensor);
-                vector<int> srcShape = {(int)tmpShape[0],(int)tmpShape[1],(int)tmpShape[2],(int)tmpShape[3]};
-                shared_ptr<Tensor> tmpTensor(Tensor::create(srcShape,halide_type_of<float>(),
-                                                            (void*)(output->GetData()), 
-                                                            Tensor::DimensionType::CAFFE));
-                auto shape = output->GetTensorDesc(); 
-                tensorConvert(tmpTensor.get(), dstTensor);
-            }
+            Tensor* tmpTensor = const_cast<Tensor*>(dstTensor);
+            memcpy(tmpTensor->buffer().host, output->GetData(), (size_t)output->GetSize());
         }
 #ifdef HIAI_DEBUG
         ATrace_endSection();
@@ -420,7 +421,7 @@ namespace MNN {
         model->SetGraph(graph);
 
         hiai::ModelBuildOptions buildOptions;
-
+        buildOptions.formatMode = hiai::FormatMode::USE_ORIGIN;
         std::ifstream file("quant_param", std::ios::binary | std::ios::ate);
         if (!file.is_open()) {
             MNN_PRINT("no quant_param config file, build non-quantized model.\n");
@@ -507,7 +508,7 @@ namespace MNN {
 
     void NPUBackend::setOutputOps(const Op *op, vector<shared_ptr<ge::Operator>>&& HIAI_op,
                                   const std::vector<Tensor *> &outputs){
-        if(op->type() == OpType_Slice){
+        if(op->type() == OpType_Slice || op->type() == OpType_TopKV2){
             for (size_t i = 0; i < op->outputIndexes()->size(); i++){
                 auto index = op->outputIndexes()->data()[i];
                 mSclipMap[index] = i;

diff --git a/source/backend/hiai/backend/NPUBackend.hpp b/source/backend/hiai/backend/NPUBackend.hpp
@@ -43,6 +43,41 @@ namespace MNN {
     typedef void *(*fp_ATrace_endSection) (void);
 #endif
     void NHWC2NCHW(const float* source, float* dest, int b, int c, int area);
+
+    static ge::DataType mapDataType(DataType src) {
+        ge::DataType retVal = ge::DataType::DT_UNDEFINED;
+        switch (src) {
+            case DataType_DT_FLOAT:
+                retVal = ge::DataType::DT_FLOAT;
+                break;
+            case DataType_DT_DOUBLE:
+                retVal = ge::DataType::DT_DOUBLE;
+                break;
+            case DataType_DT_INT32:
+                retVal = ge::DataType::DT_INT32;
+                break;
+            case DataType_DT_UINT8:
+                retVal = ge::DataType::DT_UINT8;
+                break;
+            case DataType_DT_INT16:
+                retVal = ge::DataType::DT_INT16;
+                break;
+            case DataType_DT_INT8:
+                retVal = ge::DataType::DT_INT8;
+                break;
+            case DataType_DT_INT64:
+                retVal = ge::DataType::DT_INT64;
+                break;
+            case DataType_DT_VARIANT:
+                retVal = ge::DataType::DT_FLOAT;
+                break;
+            default:
+                MNN_ASSERT(false);
+                printf("cast Datatype : %d \n", src);
+                break;
+        }
+        return retVal;
+    }
     inline std::vector<int64_t> tensorShapeFormat(const Tensor *input, const Tensor *broadCastInput=nullptr) {
         auto dimSize = input->buffer().dimensions;
         if(broadCastInput != nullptr) {

diff --git a/source/backend/hiai/execution/NPUActivation.cpp b/source/backend/hiai/execution/NPUActivation.cpp
@@ -21,39 +21,84 @@ NPUActivation::NPUActivation(Backend *b, const Op *op, const std::vector<Tensor
 ErrorCode NPUActivation::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
     mNpuBackend->setNetworkInput(inputs, mOp);
     auto opName = mOp->name()->str();
-
-
-
     auto xOp = mNpuBackend->getInputOps(mOp);
-
-    if(mType == 5){
-        shared_ptr<hiai::op::PRelu> prelu(new hiai::op::PRelu(opName + "_prelu"));
-        auto slopePtr = mOp->main_as_PRelu()->slope()->data();
-        auto slopeSize = mOp->main_as_PRelu()->slope()->size();
-
-        mConst_w = hiai::op::Const(opName + "_w_const");
-        {
-            ge::TensorDesc fdesc(ge::Shape({1, slopeSize, 1, 1}), ge::FORMAT_NCHW,
-                                ge::DT_FLOAT); // in o h w ?
+    auto inputIndex = mOp->inputIndexes()->data()[0];
+    auto iops = mNpuBackend->mGrapMap[inputIndex];
+    xOp = iops.back().first;
+    if (mType == OpType_PReLU && mOp->main_as_PRelu()->slope() != nullptr) {
+        if (mOp->main_as_PRelu()->slope()->size() == 1) {
+            const float* slopePtr = mOp->main_as_PRelu()->slope()->data();
+            shared_ptr<hiai::op::Activation> relu(new hiai::op::Activation(opName + "_relu"));
+            if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) {
+                (*relu).set_input_x(*xOp.get());
+            } else {
+                (*relu).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex]));
+            }
+            (*relu)
+                .set_attr_coef(.000000)
+                .set_attr_negative_slope(*slopePtr)
+                .set_attr_mode(mType);
+            mNpuBackend->setOutputOps(mOp, {relu}, outputs);  
+        } else {
+            shared_ptr<hiai::op::PRelu> prelu(new hiai::op::PRelu(opName + "_prelu"));
+            auto slopePtr = mOp->main_as_PRelu()->slope()->data();
+            auto slopeSize = mOp->main_as_PRelu()->slope()->size();
+            mConst_w = hiai::op::Const(opName + "_w_const");
+            ge::TensorDesc fdesc(ge::Shape({1, slopeSize, 1, 1}), ge::FORMAT_NCHW, ge::DT_FLOAT);
             ge::TensorPtr filter = std::make_shared<ge::Tensor>();
             filter->SetTensorDesc(fdesc);
             filter->SetData((uint8_t *)slopePtr, slopeSize * sizeof(float));
             mConst_w.set_attr_value(filter);
+            if (inputs[0]->buffer().dimensions < 4) {
+                std::vector<int32_t> shape;
+                for (int32_t i = 0; i < inputs[0]->buffer().dimensions; i++) {
+                    shape.push_back(inputs[0]->buffer().dim[i].extent);
+                }
+                for (int32_t i = inputs[0]->buffer().dimensions; i < 4; i++) {
+                    shape.push_back(1);
+                }
+                shapeConst = hiai::op::Const(opName +"_reshapeConst");
+                {
+                    ge::TensorDesc fdesc(ge::Shape({static_cast<int64_t>(shape.size())}), ge::FORMAT_NCHW, ge::DT_INT32);
+                    ge::TensorPtr filter = std::make_shared<ge::Tensor>();
+                    filter->SetTensorDesc(fdesc);
+                    filter->SetData((uint8_t *)shape.data(), shape.size() * sizeof(int32_t));
+                    shapeConst.set_attr_value(filter);
+                }
+                shared_ptr<hiai::op::Reshape> reshape(new hiai::op::Reshape(opName + "_reshape"));
+                if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) {
+                    (*reshape).set_input_x(*xOp.get());
+                } else {
+                    (*reshape).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex]));
+                }
+                (*reshape).set_input_shape(shapeConst);
+                (*prelu).set_input_x(*reshape.get()).set_input_weight(mConst_w);
+                mNpuBackend->setOutputOps(mOp, {reshape, prelu}, outputs);
+            } else {
+                if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) {
+                    (*prelu).set_input_x(*xOp.get());
+                } else {
+                    (*prelu).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex]));
+                }
+                (*prelu).set_input_weight(mConst_w);
+                mNpuBackend->setOutputOps(mOp, {prelu}, outputs);
+            }
         }
-
-        (*prelu)
-            .set_input_x(*xOp.get()).set_input_weight(mConst_w);
-        mNpuBackend->setOutputOps(mOp, {prelu}, outputs);
     }else{
         float slope = 0.0;
         if (mOp->type() == OpType_ReLU) {
             slope = mOp->main_as_Relu()->slope();
-            mType = 5;
+            if (slope != 0.0) {
+                mType = 5;
+            }
         }
-
         shared_ptr<hiai::op::Activation> relu(new hiai::op::Activation(opName + "_relu"));
+        if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) {
+            (*relu).set_input_x(*xOp.get());
+        } else {
+            (*relu).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex]));
+        }
         (*relu)
-            .set_input_x(*xOp.get())
             .set_attr_coef(.000000)
             .set_attr_negative_slope(slope)
             .set_attr_mode(mType);

diff --git a/source/backend/hiai/execution/NPUActivation.hpp b/source/backend/hiai/execution/NPUActivation.hpp
@@ -20,6 +20,7 @@ class NPUActivation : public NPUCommonExecution {
     virtual ~NPUActivation() = default;
 private:
     hiai::op::Const mConst_w;
+    hiai::op::Const shapeConst;
     int mType;
 };
 

diff --git a/source/backend/hiai/execution/NPUArgMax.cpp b/source/backend/hiai/execution/NPUArgMax.cpp
@@ -28,7 +28,7 @@ ErrorCode NPUArgMax::onResize(const std::vector<Tensor *> &inputs, const std::ve
     // om input weight const op
     mConst_axis = hiai::op::Const(opName + "_w_const");
     {
-        auto aixs = axisFormat(inputs[0], argMaxParam->axis());
+        auto aixs = argMaxParam->axis();
         ge::TensorDesc fdesc(ge::Shape({1}),ge::DT_INT32); 
         ge::TensorPtr axis = std::make_shared<ge::Tensor>();
         axis->SetTensorDesc(fdesc);

diff --git a/source/backend/hiai/execution/NPUBatchMatMul.cpp b/source/backend/hiai/execution/NPUBatchMatMul.cpp
@@ -0,0 +1,93 @@
+//
+//  NPUBatchMatMul.cpp
+//  MNN
+//
+//  Created by MNN on b'2020/10/15'.
+//  Copyright © 2018, Alibaba Group Holding Limited
+//
+
+#include "NPUBatchMatMul.hpp"
+#include "NPUBackend.hpp"
+
+using namespace std;
+
+namespace MNN {
+
+NPUBatchMatMul::NPUBatchMatMul(MNN::Backend *b, const MNN::Op *op, const std::vector<Tensor *> &inputs, const std::vector<MNN::Tensor *> &outputs) : NPUCommonExecution(b, op) {
+    auto opName = mOp->name()->str();
+
+    bool isConst0 = TensorUtils::getDescribe(inputs[0])->usage==Tensor::InsideDescribe::Usage::CONSTANT;
+    bool isConst1 = TensorUtils::getDescribe(inputs[1])->usage==Tensor::InsideDescribe::Usage::CONSTANT;
+
+    Tensor* input = nullptr;
+    if (isConst0 && !isConst1){
+        input = inputs[0];
+    }
+    if (!isConst0 && isConst1){
+        input = inputs[1];
+    }
+    if (input != nullptr) {
+        mConst = ge::op::Const(opName + "_w_const");
+        ge::TensorPtr filter = std::make_shared<ge::Tensor>();
+        vector<int64_t> dims;
+        for (int32_t i = 0; i < input->buffer().dimensions; i++) {
+            dims.push_back(input->buffer().dim[i].extent);
+        }
+        ge::TensorDesc fdesc(ge::Shape(dims), ge::FORMAT_NCHW, ge::DT_FLOAT);
+        if (input->getType().code == halide_type_int && input->getType().bits == 32) {
+            fdesc.SetDataType(ge::DT_INT32);
+            filter->SetData((uint8_t *)input->host<int32_t>(), input->elementSize() * sizeof(int32_t));
+        } else {
+            filter->SetData((uint8_t *)input->host<float>(), input->elementSize() * sizeof(float));
+        }
+        filter->SetTensorDesc(fdesc);
+        mConst.set_attr_value(filter);
+    }
+
+}
+
+ErrorCode NPUBatchMatMul::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
+    mNpuBackend->setNetworkInput(inputs, mOp);
+    auto opName = mOp->name()->str();
+    bool isConst0 = TensorUtils::getDescribe(inputs[0])->usage==Tensor::InsideDescribe::Usage::CONSTANT;
+    bool isConst1 = TensorUtils::getDescribe(inputs[1])->usage==Tensor::InsideDescribe::Usage::CONSTANT;
+    auto param = mOp->main_as_BatchMatMulParam();
+    shared_ptr<hiai::op::BatchMatMul> batchMatMul(new hiai::op::BatchMatMul(opName));
+    if (isConst0 && !isConst1) {
+        auto inputIndex1 = mOp->inputIndexes()->data()[1];
+        auto iops1       = mNpuBackend->mGrapMap[inputIndex1]; 
+        auto xOp1        = iops1.back().first;
+        (*batchMatMul)
+            .set_input_x1(mConst)
+            .set_input_x2(*xOp1.get())
+            .set_attr_adj_x1(param->adjX()) 
+            .set_attr_adj_x2(param->adjY());
+    } else if (!isConst0 && isConst1) {
+        auto inputIndex = mOp->inputIndexes()->data()[0];
+        auto iops       = mNpuBackend->mGrapMap[inputIndex]; 
+        auto xOp        = iops.back().first;
+        (*batchMatMul)
+            .set_input_x1(*xOp.get())
+            .set_input_x2(mConst)
+            .set_attr_adj_x1(param->adjX()) 
+            .set_attr_adj_x2(param->adjY());
+    } else {
+        auto inputIndex = mOp->inputIndexes()->data()[0];
+        auto iops       = mNpuBackend->mGrapMap[inputIndex]; 
+        auto xOp        = iops.back().first;
+        auto inputIndex1 = mOp->inputIndexes()->data()[1];
+        auto iops1       = mNpuBackend->mGrapMap[inputIndex1]; 
+        auto xOp1        = iops1.back().first;
+        (*batchMatMul)
+            .set_input_x1(*xOp.get())
+            .set_input_x2(*xOp1.get())
+            .set_attr_adj_x1(param->adjX()) 
+            .set_attr_adj_x2(param->adjY());
+    }
+    mNpuBackend->setOutputOps(mOp, {batchMatMul}, outputs);
+    return NO_ERROR;
+}
+
+NPUCreatorRegister<TypedCreator<NPUBatchMatMul>> __BatchMatMul_op(OpType_BatchMatMul);
+
+} // namespace MNN