[MNN:Sync] Sync Internal 2.8.1

alibaba · Dec 27, 2023 · 3b978d9 · 3b978d9
1 parent 1a5609b
commit 3b978d9
Show file tree

Hide file tree

Showing 282 changed files with 9,504 additions and 4,740 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -489,6 +489,7 @@ IF(MNN_COREML)
 
     IF(MNN_SEP_BUILD)
       list(APPEND MNN_DEPS MNNCoreML)
+      list(APPEND MNN_EXTRA_DEPENDS MNNCoreML)
     ELSE()
       list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNCoreML>)
     ENDIF()
@@ -552,6 +553,7 @@ IF(MNN_OPENCL)
   IF(MNN_SEP_BUILD)
     list(APPEND MNN_DEPS MNN_CL)
   ELSE()
+    add_definitions(-DMNN_OPENCL_ENABLED=1)
     list(APPEND MNN_TARGETS MNN_CL)
     list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_CL>)
     list(APPEND MNN_EXTRA_DEPENDS ${MNN_OCL_LIBS})

diff --git a/MNN_Render.podspec b/MNN_Render.podspec
@@ -0,0 +1,82 @@
+Pod::Spec.new do |s|
+  s.name         = "MNN"
+  s.version      = "2.2.0"
+  s.summary      = "MNN"
+
+  s.description  = <<-DESC
+                    MNN is a lightweight deep neural network inference framework. It loads models and do inference on devices.
+                   DESC
+
+  s.homepage     = "https://github.com/alibaba/MNN"
+  s.license = {
+    :type => 'Apache License, Version 2.0',
+    :text => <<-LICENSE
+                      Copyright © 2018, Alibaba Group Holding Limited
+
+                      Licensed under the Apache License, Version 2.0 (the "License");
+                      you may not use this file except in compliance with the License.
+                      You may obtain a copy of the License at
+
+                        http://www.apache.org/licenses/LICENSE-2.0
+
+                      Unless required by applicable law or agreed to in writing, software
+                      distributed under the License is distributed on an "AS IS" BASIS,
+                      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+                      See the License for the specific language governing permissions and
+                      limitations under the License.
+    LICENSE
+  }
+
+  s.author       = { "MNN" => "[email protected]" }
+  s.platform     = :ios
+  s.ios.deployment_target = '8.0'
+  s.requires_arc = true
+
+  #s.source =  { :git => "[email protected]:alibaba/MNN.git", :branch => 'master' }
+  s.source = {:git => "/Users/zhang/Development/AliNNPrivate/",:branch=> 'head'}
+  s.frameworks = 'Metal', 'Accelerate', 'CoreML'
+  s.library = 'c++'
+  s.source_files = \
+  'include/MNN/*.{h,hpp}',\
+  'include/MNN/expr/*.{h,hpp}',\
+  'schema/current/*.{h}',\
+  '3rd_party/flatbuffers/include/flatbuffers/*.{h}',\
+  'source/internal/logging/*.{hpp,cpp}',\
+  'source/internal/logging/ios/*.{h,c,m,mm,cc,hpp,cpp}',\
+  'source/internal/logging/aliyun-log-c-sdk/src/*.{h,c,m,mm,cc,hpp,cpp}',\
+  'source/core/**/*.{h,c,m,mm,cc,hpp,cpp}',\
+  'source/common/**/*.{h,c,m,mm,cc,hpp,cpp}',\
+  'source/utils/**/*.{h,c,m,mm,cc,hpp,cpp}',\
+  'source/geometry/**/*.{h,c,m,mm,cc,hpp,cpp}',\
+  'source/cv/**/*.{h,c,m,mm,cc,hpp,cpp}',\
+  'source/math/**/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
+  'source/shape/*.{h,c,m,mm,cc,hpp,cpp}',\
+  'source/shape/render/*.{h,c,m,mm,cc,hpp,cpp}',\
+  #'source/backend/arm82/*.{h,c,m,mm,cc,S,hpp,cpp}',\
+  #'source/backend/arm82/asm/**/*.{h,c,m,mm,cc,S,hpp,cpp}',\
+  'source/backend/cpu/*.{h,c,m,mm,cc,S,hpp,cpp}',\
+  'source/backend/cpu/render/*.{h,c,m,mm,cc,S,hpp,cpp}',\
+  'source/backend/cpu/bf16/*.{h,c,m,mm,cc,S,hpp,cpp}',\
+  'source/backend/cpu/arm/**/*.{h,c,m,mm,cc,S,hpp,cpp}',\
+  'source/backend/cpu/compute/*.{h,c,m,mm,cc,S,hpp,cpp}',\
+  'source/backend/metal/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
+  'source/backend/metal/render/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
+  'source/backend/coreml/backend/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
+  'source/backend/coreml/execution/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
+  'source/backend/coreml/mlmodel/src/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
+  'express/**/*.{hpp,cpp}',\
+  'tools/cv/include/**/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
+  'tools/cv/source/imgproc/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
+  'tools/cv/source/calib3d/*.{h,c,m,mm,cc,hpp,cpp,metal}'
+
+  s.header_mappings_dir = 'include'
+  s.subspec 'cv' do |sp|
+    sp.source_files = 'tools/cv/include/**/*.hpp'
+    sp.header_mappings_dir = 'tools/cv/include'
+    sp.xcconfig = { 'ALWAYS_SEARCH_USER_PATHS' => 'NO' }
+  end
+
+  s.compiler_flags = '-arch arm64 -march=armv8.2-a+simd+fp16'
+  s.pod_target_xcconfig = {'METAL_LIBRARY_FILE_BASE' => 'mnn', 'HEADER_SEARCH_PATHS' => '"$(PODS_TARGET_SRCROOT)/include" "$(PODS_TARGET_SRCROOT)/3rd_party/flatbuffers/include" "$(PODS_TARGET_SRCROOT)/source" "$(PODS_TARGET_SRCROOT)/3rd_party/half" "$(PODS_TARGET_SRCROOT)/source/backend/coreml/mlmodel/include" "$(PODS_TARGET_SRCROOT)/tools/cv/include"', 'GCC_PREPROCESSOR_DEFINITIONS' => '$(inherited) MNN_CODEGEN_REGISTER=1 MNN_SUPPORT_TFLITE_QUAN=1 MNN_METAL_ENABLED=1 MNN_METAL_FULL_PRECISION=1 MNN_SUPPORT_RENDER=1 MNN_SUPPORT_BF16=1 MNN_COREML_ENABLED=1 USE_LZ4_FLAG=1 MNN_INTERNAL_ENABLED=1 MNN_USE_SPARSE_COMPUTE=1'}
+  s.user_target_xcconfig = { 'OTHER_LDFLAGS' => '-force_load $(BUILD_DIR)/$(CONFIGURATION)$(EFFECTIVE_PLATFORM_NAME)/MNN/libMNN.a', 'HEADER_SEARCH_PATHS' => '"$(PODS_TARGET_SRCROOT)/include"' }
+end
diff --git a/docs/compile/tools.md b/docs/compile/tools.md
@@ -55,14 +55,10 @@
   - `checkInvalidValue.out` 检测输出目录里的数据
   - `timeProfile.out` 测试模型在指定后端上执行的时间，并获取每层的执行时间占比
   - `testTrain.out` 测试训练功能
-  - `aoa_nlu_encoder.out` 测试NLU编码
-  - `aoa_nlu_decoder1.out` 测试NLU解码1
-  - `aoa_nlu_decoder2.out` 测试NLU解码2
   - `checkDir.out`  测试两个文件夹是否一致
   - `checkFile.out` 测试两个文件是否一致
   - `winogradExample.out` winograd示例
-  - `winogradGenerateGLSL.out` winograd生成GLSL
-  - `winogradGenerateCL.out`  winograd生成CL
+  - `fuseTest` 测试 GPU 自定义算子的功能，目前仅支持 Vulkan Buffer 模式
 ## Benchmark工具
 - 相关编译选项
   - `MNN_BUILD_BENCHMARK` 是否编译Benchmark工具

diff --git a/docs/pymnn/expr.md b/docs/pymnn/expr.md
@@ -2195,6 +2195,25 @@ array([[[[0., 1.]],
         [[6., 7.]]]], dtype=float32)
 ```
 
+---
+### `reverse(x, axis)`
+在输入x变量在axis[0]维度进行翻转
+
+参数：
+- `x : var_like` 输入变量
+- `axis : var_like` 输入变量
+
+返回：反转序列的值
+
+返回类型：`Var`
+
+示例：
+
+```python
+>>> expr.reverse(expr.range(-4., 4., 1.), [0])
+array([ 3.,  2.,  1.,  0., -1., -2., -3., -4.], dtype=float32)
+```
+
 ---
 ### `reverse_sequence(x, y, batch_dim, seq_dim)`
 沿着batch_dim维度对x进行切片并反转维度seq_dim上的y[i]元素

diff --git a/docs/tools/test.md b/docs/tools/test.md
@@ -457,3 +457,14 @@ Matrix:
 0.0000000	0.0000000	1.0000000
 ```
 
+## fuseTest
+### 功能
+测试 GPU 自定义算子的功能，目前仅支持 Vulkan Buffer 模式
+
+### 参数
+`Usage: ./fuseTest user.spirv config.json`
+- `user.spirv:str`：SPIRV文件路径，可以用 glslangValidator -V user.comp -o user.spirv 编译获得
+- `config.json:str`: 配置文件路径
+### 示例
+```bash
+$ ./fuseTest user.spirv user.json
diff --git a/express/Executor.cpp b/express/Executor.cpp
@@ -120,7 +120,7 @@ Executor::Requirement Executor::getRequirement(Expr* expr) const {
         return req;
     }
     for (int i = 0; i < inputSize; ++i) {
-        req.contentNeedContent[i] = OpCommonUtils::opNeedContent(op->type(), i);
+        req.contentNeedContent[i] = OpCommonUtils::opNeedContent(op, i);
         req.shapeNeedContent[i]   = false;
     }
     auto needIndexId = SizeComputer::needInputContent(op, inputSize);

diff --git a/express/Expr.cpp b/express/Expr.cpp
@@ -192,6 +192,17 @@ EXPRP Expr::create(std::shared_ptr<BufferStorage> extra, std::vector<VARP>&& inp
     EXPRP expr(new Expr(outputSize));
     expr->mStorage = extra;
     expr->mOp = flatbuffers::GetRoot<Op>(extra->buffer());
+    switch (expr->mOp->type()) {
+        case OpType_Const:
+            expr->mType = VARP::CONSTANT;
+            break;
+        case OpType_TrainableParam:
+            expr->mType = VARP::TRAINABLE;
+            break;
+        default:
+            expr->mType = VARP::INPUT;
+            break;
+    }
     expr->mInputs   = std::move(inputs);
     auto exe = ExecutorScope::Current();
     expr->mInside->mReq = exe->getRequirement(expr.get());

diff --git a/express/NeuralNetWorkOp.cpp b/express/NeuralNetWorkOp.cpp
@@ -626,6 +626,13 @@ VARP _ChannelShuffle(VARP x, int group) {
     x = _Convert(x, NC4HW4);
     return x;
 }
+
+VARP _Reverse(VARP x, VARP axis) {
+    std::unique_ptr<MNN::OpT> op(new MNN::OpT);
+    op->type = MNN::OpType_Reverse;
+    return (Variable::create(Expr::create(op.get(), {x, axis})));
+}
+
 VARP _ReverseSequence(VARP x, VARP y, int batchDim, int seqDim) {
     std::unique_ptr<OpT> op(new OpT);
     op->type                                    = OpType_ReverseSequence;
@@ -1710,19 +1717,10 @@ VARP _GridSample(VARP input, VARP grid, InterpolationMethod mode, GridSamplePadd
 }
 
 VARP _FloatToInt8(VARP x, VARP scale, char minValue/*For future*/, char maxValue/*For future*/) {
-    auto xInfo = x->getInfo();
     auto scaleInfo = scale->getInfo();
     auto scalePtr = scale->readMap<float>();
-    if (nullptr == scalePtr || nullptr == xInfo || nullptr == scaleInfo) {
-        MNN_ERROR("Error for FloatToInt8 because var not ready\n");
-        return nullptr;
-    }
-    if (xInfo->order != NC4HW4 || xInfo->type.code != halide_type_float) {
-        MNN_ERROR("Not Support Input for FloatToInt8 because var not NC4HW4 or not float\n");
-        return nullptr;
-    }
-    if ((scaleInfo->size != xInfo->dim[1]) && (scaleInfo->size != 1)) {
-        MNN_ERROR("Scale's size not match input's channel: %d - %d\n", scaleInfo->size, xInfo->dim[1]);
+    if (nullptr == scalePtr || nullptr == scaleInfo) {
+        MNN_ERROR("Error for FloatToInt8 because scale not ready\n");
         return nullptr;
     }
     std::unique_ptr<OpT> op(new OpT);
@@ -1735,21 +1733,12 @@ VARP _FloatToInt8(VARP x, VARP scale, char minValue/*For future*/, char maxValue
 }
 
 VARP _FloatToInt8(VARP x, VARP scale, int8_t minValue, int8_t maxValue, int8_t zeroPoint) {
-    auto xInfo = x->getInfo();
     auto scaleInfo = scale->getInfo();
     auto scalePtr = scale->readMap<float>();
-    if (nullptr == scalePtr || nullptr == xInfo || nullptr == scaleInfo) {
+    if (nullptr == scalePtr || nullptr == scaleInfo) {
         MNN_ERROR("Error for FloatToInt8 because var not ready\n");
         return nullptr;
     }
-    if (xInfo->order != NC4HW4 || xInfo->type.code != halide_type_float) {
-        MNN_ERROR("Not Support Input for FloatToInt8 because var not NC4HW4 or not float\n");
-        return nullptr;
-    }
-    if ((scaleInfo->size != xInfo->dim[1]) && (scaleInfo->size != 1)) {
-        MNN_ERROR("Scale's size not match input's channel: %d - %d\n", scaleInfo->size, xInfo->dim[1]);
-        return nullptr;
-    }
     std::unique_ptr<OpT> op(new OpT);
     op->type = OpType_FloatToInt8;
     op->main.type = OpParameter_QuantizedFloatParam;

diff --git a/express/module/PipelineModule.cpp b/express/module/PipelineModule.cpp
@@ -58,6 +58,10 @@ ExprModule::ExprModule(EXPRP expr) {
                 break;
         }
     }
+    // TODO: Optimize the logic
+    if (!mExpr->mCanDecompose) {
+        ExecutorScope::Current()->setLazyComputeMode(Executor::LAZY_CONTENT);
+    }
 }
 
 std::vector<VARP> ExprModule::onForward(const std::vector<VARP>& inputs) {
@@ -72,6 +76,14 @@ std::vector<VARP> ExprModule::onForward(const std::vector<VARP>& inputs) {
     std::vector<VARP> outputVars;
     auto newExpr = Expr::create(mExpr->extra(), std::move(tempInputs), mExpr->outputSize());
     newExpr->setName(mExpr->name());
+    if (!mExpr->mCanDecompose) {
+        // Set tensor shape from net
+        newExpr->mCanDecompose = false;
+        for (int index = 0; index < mExpr->outputSize(); ++index) {
+            TensorUtils::copyShape(mExpr->inside()->mOutputTensors[index], newExpr->inside()->mOutputTensors[index], true, true);
+            Utils::copyTensorToInfo(newExpr->inside()->mOutputInfos.data() + index, newExpr->inside()->mOutputTensors[index]);
+        }
+    }
     for (int i = 0; i < mExpr->outputSize(); ++i) {
         outputVars.emplace_back(Variable::create(newExpr, i));
     }
@@ -562,6 +574,23 @@ Module* PipelineModule::load(const std::vector<std::string>& inputs, const std::
         config = &defaultConfig;
     }
     auto subGraphs = net->subgraphs();
+    if (config->dynamic) {
+        // TODO: Support subgraph
+        if (nullptr == subGraphs) {
+            auto varMap = MNN::Express::Variable::loadMap(buffer, length);
+            std::vector<MNN::Express::VARP> inputsVar(inputs.size());
+            for (int i=0; i<inputs.size(); ++i) {
+                inputsVar[i] = varMap[inputs[i]];
+            }
+            std::vector<MNN::Express::VARP> outputsVar(outputs.size());
+            for (int i=0; i<outputs.size(); ++i) {
+                outputsVar[i] = varMap[outputs[i]];
+            }
+            return extract(inputsVar, outputsVar, false);
+        } else {
+            MNN_ERROR("Don't support subgraph for dynamic load, turn back to static load\n");
+        }
+    }
     std::map<std::string, SubGraph> subGraphMap;
     _createSubGraph(net, rtMgr, config, subGraphMap);
     std::shared_ptr<BufferStorage> bufferStorage(new BufferStorage);

diff --git a/include/MNN/MNNDefine.h b/include/MNN/MNNDefine.h
@@ -69,6 +69,6 @@ MNN_ERROR("Check failed: %s ==> %s\n", #success, #log); \
 #define STR(x) STR_IMP(x)
 #define MNN_VERSION_MAJOR 2
 #define MNN_VERSION_MINOR 8
-#define MNN_VERSION_PATCH 0
+#define MNN_VERSION_PATCH 1
 #define MNN_VERSION STR(MNN_VERSION_MAJOR) "." STR(MNN_VERSION_MINOR) "." STR(MNN_VERSION_PATCH)
 #endif /* MNNDefine_h */
diff --git a/include/MNN/MNNSharedContext.h b/include/MNN/MNNSharedContext.h
@@ -24,6 +24,15 @@ struct MNNVulkanContext {
     uint32_t iQueueFamilyIndex;
 };
 
+struct MNNVulkanTensorContent {
+    VkBuffer buffer;
+    VkDeviceSize size;
+    VkDeviceSize offset;
+
+    halide_type_t realType;
+    int32_t mask; // For future usage
+};
+
 #endif
 
 #ifdef MNN_METAL
@@ -36,6 +45,9 @@ struct MNNMetalTensorContent {
     id<MTLBuffer> buffer;
     int32_t offset;
     id<MTLTexture> texture;
+
+    halide_type_t type;
+    int32_t mask;
     int32_t forFuture[8];
 };
 

diff --git a/include/MNN/Tensor.hpp b/include/MNN/Tensor.hpp
@@ -275,6 +275,12 @@ class MNN_PUBLIC Tensor {
         mBuffer.dim[index].extent = length;
     }
 
+    /**
+     * @brief For GPU and Other Device, get memory directly, see MNNSharedContext for detail
+     * @return Success or not. If type != tensor's backend's type or type is cpu , return false
+     */
+    bool getDeviceInfo(void* dst, int forwardType) const;
+
 public:
     /**
      * @brief print tensor data. for DEBUG use only.

diff --git a/include/MNN/expr/Expr.hpp b/include/MNN/expr/Expr.hpp
@@ -267,6 +267,7 @@ class MNN_PUBLIC Expr {
     bool mVisited                   = false;
     std::vector<WeakEXPRP> mTo;
     bool mCanDecompose = true;
+    friend class ExprModule;
 
 };
 } // namespace Express

diff --git a/include/MNN/expr/NeuralNetWorkOp.hpp b/include/MNN/expr/NeuralNetWorkOp.hpp
@@ -77,6 +77,7 @@ MNN_PUBLIC VARP _ChangeInputFormat(VARP input, Dimensionformat format);
 MNN_PUBLIC VARP _Conv2DBackPropFilter(VARP input, VARP inputGrad, INTS kernelSize, PaddingMode pad = VALID, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0});
 MNN_PUBLIC VARP _PoolGrad(VARP originInput, VARP originOutput, VARP inputGrad, INTS kernel, INTS stride, PoolingMode type, PaddingMode pad = VALID, INTS pads= {0, 0});
 // FIXME: move the api to Array Ops
+MNN_PUBLIC VARP _Reverse(VARP x, VARP axis);
 MNN_PUBLIC VARP _ReverseSequence(VARP x, VARP y, int batchDim, int seqDim);
 // FIXME: move the api to Image Ops
 MNN_PUBLIC VARP _Crop(VARP images, VARP size, int axis, INTS offset);