beta 0.2.0.1

- support both armv7/arm64 in podspec (pod version >= 1.5.0 required) - refactor neg axis support - fix memory overlap in de-conv - fix CONVOLUTION_TILED_NUMBER spell error - fix few warnings - add binary / interp / permute / relu / reshape / softmax support and optimize conv for OpenGL backend - add clean in nmake build script
alibaba · Jun 24, 2019 · ad759eb · ad759eb
1 parent 6a4213f
commit ad759eb
Show file tree

Hide file tree

Showing 77 changed files with 2,786 additions and 587 deletions.
diff --git a/MNN.podspec b/MNN.podspec
@@ -34,20 +34,32 @@ Pod::Spec.new do |s|
 
   s.source =  { :git => "[email protected]:alibaba/MNN.git", :branch => 'master' } 
   s.frameworks = 'Metal', 'Accelerate'
-  s.source_files = \
-  'include/*.{h,hpp}',\
-  'schema/current/*.{h}',\
-  '3rd_party/flatbuffers/include/flatbuffers/*.{h}',\
-  'source/backend/cpu/*.{h,c,m,mm,cc,S,hpp,cpp}',\
-  'source/backend/cpu/arm/*.{h,c,m,mm,cc,S,hpp,cpp}',\
-  'source/backend/cpu/arm/arm64/*.{h,c,m,mm,cc,S,hpp,cpp}',\
-  'source/backend/cpu/compute/*.{h,c,m,mm,cc,S,hpp,cpp}',\
-  'source/backend/metal/**/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
-  'source/core/**/*.{h,c,m,mm,cc,hpp,cpp}',\
-  'source/cv/**/*.{h,c,m,mm,cc,hpp,cpp}',\
-  'source/math/**/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
-  'source/shape/*.{h,c,m,mm,cc,hpp,cpp}'
-
+  s.library = 'c++'
+
+  s.subspec 'core' do |a|
+    a.source_files = \
+    'include/*.{h,hpp}',\
+    'schema/current/*.{h}',\
+    '3rd_party/flatbuffers/include/flatbuffers/*.{h}',\
+    'source/core/**/*.{h,c,m,mm,cc,hpp,cpp}',\
+    'source/cv/**/*.{h,c,m,mm,cc,hpp,cpp}',\
+    'source/math/**/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
+    'source/shape/*.{h,c,m,mm,cc,hpp,cpp}',\
+    'source/backend/cpu/*.{h,c,m,mm,cc,S,hpp,cpp}',\
+    'source/backend/cpu/arm/*.{h,c,m,mm,cc,S,hpp,cpp}',\
+    'source/backend/cpu/compute/*.{h,c,m,mm,cc,S,hpp,cpp}'
+  end
+  s.subspec 'armv7' do |a|
+    a.source_files = 'source/backend/cpu/arm/arm32/*.{h,c,m,mm,cc,S,hpp,cpp}'
+  end
+  s.subspec 'aarch64' do |a|
+    a.source_files = 'source/backend/cpu/arm/arm64/*.{h,c,m,mm,cc,S,hpp,cpp}'
+  end
+  s.subspec 'metal' do |a|
+    a.source_files = 'source/backend/metal/**/*.{h,c,m,mm,cc,hpp,cpp,metal}'
+  end
+
+  s.default_subspecs = 'core', 'armv7', 'aarch64', 'metal'
   s.pod_target_xcconfig = {'METAL_LIBRARY_FILE_BASE' => 'mnn', 'HEADER_SEARCH_PATHS' => ' "$(PODS_TARGET_SRCROOT)/3rd_party/flatbuffers/include" ', 'GCC_PREPROCESSOR_DEFINITIONS' => '$(inherited) MNN_CODEGEN_REGISTER=1'}
   s.user_target_xcconfig = { 'OTHER_LDFLAGS' => '-force_load $(BUILD_DIR)/$(CONFIGURATION)$(EFFECTIVE_PLATFORM_NAME)/MNN/libMNN.a'}
 end
diff --git a/demo/exec/README.md b/demo/exec/README.md
@@ -6,11 +6,22 @@ Use [Top CMakeLists.txt](../../CMakeLists.txt) to construct demo like this:
 
 ```bash
 cd path/to/MNN
-mkdir build
+mkdir build && cd build
 cmake -DMNN_BUILD_DEMO=ON ..
 make -j8
 ```
 
+# Build this Demo on Windows
+
+Use [Top CMakeLists.txt](../../CMakeLists.txt) to construct demo like this:
+```powershell
+cd path/to/MNN
+mkdir build
+cd build
+cmake -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=Release -DMNN_BUILD_DEMO=ON ..
+nmake
+```
+
 # MultiPose
 
 1. Download [pose model](https://github.com/czy2014hust/posenet-python/raw/master/models/model-mobilenet_v1_075.pb)

diff --git a/project/ios/MNN.xcodeproj/project.pbxproj b/project/ios/MNN.xcodeproj/project.pbxproj
@@ -436,7 +436,6 @@
 		920004A021EDBDF600BCE892 /* SqueezeTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9200046621EDBDF600BCE892 /* SqueezeTest.cpp */; };
 		920004A121EDBDF600BCE892 /* LSTMTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9200046721EDBDF600BCE892 /* LSTMTest.cpp */; };
 		920004A221EDBDF600BCE892 /* QuantizedReshapeTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9200046821EDBDF600BCE892 /* QuantizedReshapeTest.cpp */; };
-		920004A321EDBDF600BCE892 /* QuantizedSoftmaxTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9200046921EDBDF600BCE892 /* QuantizedSoftmaxTest.cpp */; };
 		920004A421EDBDF600BCE892 /* MatMulTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9200046A21EDBDF600BCE892 /* MatMulTest.cpp */; };
 		920004A521EDBDF600BCE892 /* TileTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9200046B21EDBDF600BCE892 /* TileTest.cpp */; };
 		920004A621EDBDF600BCE892 /* LRNTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9200046C21EDBDF600BCE892 /* LRNTest.cpp */; };
@@ -1151,7 +1150,6 @@
 		9200046621EDBDF600BCE892 /* SqueezeTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = SqueezeTest.cpp; sourceTree = "<group>"; };
 		9200046721EDBDF600BCE892 /* LSTMTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = LSTMTest.cpp; sourceTree = "<group>"; };
 		9200046821EDBDF600BCE892 /* QuantizedReshapeTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = QuantizedReshapeTest.cpp; sourceTree = "<group>"; };
-		9200046921EDBDF600BCE892 /* QuantizedSoftmaxTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = QuantizedSoftmaxTest.cpp; sourceTree = "<group>"; };
 		9200046A21EDBDF600BCE892 /* MatMulTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = MatMulTest.cpp; sourceTree = "<group>"; };
 		9200046B21EDBDF600BCE892 /* TileTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = TileTest.cpp; sourceTree = "<group>"; };
 		9200046C21EDBDF600BCE892 /* LRNTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = LRNTest.cpp; sourceTree = "<group>"; };
@@ -2204,7 +2202,6 @@
 				9200047D21EDBDF600BCE892 /* QuantizedAvgPoolTest.cpp */,
 				9200047321EDBDF600BCE892 /* QuantizedMaxPoolTest.cpp */,
 				9200046821EDBDF600BCE892 /* QuantizedReshapeTest.cpp */,
-				9200046921EDBDF600BCE892 /* QuantizedSoftmaxTest.cpp */,
 				9200049221EDBDF600BCE892 /* RangeTest.cpp */,
 				9200046D21EDBDF600BCE892 /* RankTest.cpp */,
 				9200046F21EDBDF600BCE892 /* ReductionTest.cpp */,
@@ -3163,7 +3160,6 @@
 			buildActionMask = 2147483647;
 			files = (
 				92A4E0FC21F05A4F000B0919 /* MemoryUtilsTest.cpp in Sources */,
-				920004A321EDBDF600BCE892 /* QuantizedSoftmaxTest.cpp in Sources */,
 				920004B521EDBDF600BCE892 /* BinaryOPTest.cpp in Sources */,
 				92D765BD222819EF00178BE5 /* DirectedAcyclicGraphTest.cpp in Sources */,
 				920004D221EDBE1100BCE892 /* MNNTestSuite.cpp in Sources */,

diff --git a/source/backend/cpu/CPUConcat.cpp b/source/backend/cpu/CPUConcat.cpp
@@ -173,11 +173,7 @@ static int _concatTf(const Tensor* outputTensor, const vector<Tensor*>& inputTen
 ErrorCode CPUConcat::onResize(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
     MNN_ASSERT(outputs.size() == 1);
     MNN_ASSERT(inputs.size() >= 2);
-    auto output = outputs[0];
-    mAxis       = mOriginAxis;
-    if (-1 == mAxis) {
-        mAxis = output->dimensions() - 1;
-    }
+    auto output    = outputs[0];
     mUseSlowMethod = false;
     mTempOutput.reset();
     if (output->buffer().dimensions > 1 && output->buffer().dim[1].flags == Tensor::REORDER_4) {
@@ -228,9 +224,6 @@ ErrorCode CPUConcat::onExecute(const vector<Tensor*>& inputs, const std::vector<
         }
     } else {
         int axis = mAxis;
-        if (mAxis == -1) {
-            axis = outputs[0]->buffer().dimensions - 1;
-        }
         // tf concat
         _concatTf(outputs[0], inputs, axis);
     }
@@ -244,6 +237,9 @@ class CPUConcatCreator : public CPUBackend::Creator {
                                 const MNN::Op* op, Backend* backend) const {
         auto axis = op->main_as_Axis();
         if (nullptr != axis) {
+            if (axis->axis() < 0) {
+                return new CPUConcat(backend, outputs[0]->dimensions() + axis->axis());
+            }
             return new CPUConcat(backend, axis->axis());
         }
         return new CPUConcat(backend, 0);

diff --git a/source/backend/cpu/CPUConcat.hpp b/source/backend/cpu/CPUConcat.hpp
@@ -14,16 +14,15 @@
 namespace MNN {
 class CPUConcat : public Execution {
 public:
-    CPUConcat(Backend *b, int axis) : Execution(b), mOriginAxis(axis) {
-        mAxis = mOriginAxis;
+    CPUConcat(Backend *b, int axis) : Execution(b), mAxis(axis) {
+        // Do nothing
     }
     virtual ~CPUConcat() = default;
     virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
     virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
 
 private:
-    int mOriginAxis = 1;
-    int mAxis       = 1;
+    int mAxis = 1;
     std::shared_ptr<Tensor> mTempOutput;
     bool mUseSlowMethod = false;
 };

diff --git a/source/backend/cpu/CPUDeconvolution.cpp b/source/backend/cpu/CPUDeconvolution.cpp
@@ -142,8 +142,8 @@ ErrorCode CPUDeconvolutionMultiInput::onResize(const std::vector<Tensor*>& input
     mTempInputs = {inputs[0], mWeight.get(), mBias.get()};
     backend()->onAcquireBuffer(mWeight.get(), Backend::DYNAMIC);
     backend()->onAcquireBuffer(mCacheWeight.get(), Backend::DYNAMIC);
-    backend()->onReleaseBuffer(mCacheWeight.get(), Backend::DYNAMIC);
     backend()->onAcquireBuffer(mBias.get(), Backend::DYNAMIC);
+    backend()->onReleaseBuffer(mCacheWeight.get(), Backend::DYNAMIC);
     auto error = mOrigin->onResize(mTempInputs, outputs);
     backend()->onReleaseBuffer(mWeight.get(), Backend::DYNAMIC);
     backend()->onReleaseBuffer(mBias.get(), Backend::DYNAMIC);

diff --git a/source/backend/cpu/CPUPool.cpp b/source/backend/cpu/CPUPool.cpp
@@ -73,7 +73,7 @@ static void pooling_max_pad(const float *channelInput, float *offsetOutput, int
 
 static void poolingMax(const float *channelInput, int inputWidth, int inputHeight, float *channelOutput,
                        int outputWidth, int outputHeight, int kernelWidth, int kernelHeight, int strideWidth,
-                       int strideHeight, int padWidth, int padHeight) {
+                       int strideHeight, int padWidth, int padHeight, MNN::PoolPadType padType) {
     int padTop    = padHeight <= 0 ? 0 : (padHeight + strideHeight - 1) / strideHeight;
     int padBottom = (padHeight + inputHeight - kernelHeight) / strideHeight + 1;
     int padLeft   = padWidth <= 0 ? 0 : (padWidth + strideWidth - 1) / strideWidth;
@@ -166,7 +166,8 @@ static void poolingMax(const float *channelInput, int inputWidth, int inputHeigh
 }
 
 static void poolingAvgPad(const float *offsetInput, float *offsetOutput, int inputWidth, int inputHeight,
-                          int kernelWidth, int kernelHeight, int inputStep4, int iw, int ih) {
+                          int kernelWidth, int kernelHeight, int inputStep4, int iw, int ih, int padWidth,
+                          int padHeight, MNN::PoolPadType padType) {
 #ifdef MNN_USE_NEON
     float32x4_t sum = vdupq_n_f32(0);
 #else
@@ -175,15 +176,23 @@ static void poolingAvgPad(const float *offsetInput, float *offsetOutput, int inp
     float sum2 = 0;
     float sum3 = 0;
 #endif
+
+    const int khs = 0 < -ih ? -ih : 0;                                                 // max
+    const int khe = kernelHeight < inputHeight - ih ? kernelHeight : inputHeight - ih; // min
+    const int kws = 0 < -iw ? -iw : 0;                                                 // max
+    const int kwe = kernelWidth < inputWidth - iw ? kernelWidth : inputWidth - iw;     // min
+
     // sum
     int count = 0;
+    if (padType == MNN::PoolPadType_CAFFE) {
+        count = (ALIMIN(ih + kernelHeight, inputHeight + padHeight) - ih) *
+                (ALIMIN(iw + kernelWidth, inputWidth + padWidth) - iw);
+    } else {
+        count = (khe - khs) * (kwe - kws);
+    }
 
-    const int khs            = 0 < -ih ? -ih : 0;                                                 // max
-    const int khe            = kernelHeight < inputHeight - ih ? kernelHeight : inputHeight - ih; // min
     const float *kernelInput = offsetInput + khs * inputStep4;
     for (int kh = khs; kh < khe; kh++, kernelInput += inputStep4) {
-        const int kws            = 0 < -iw ? -iw : 0;                                             // max
-        const int kwe            = kernelWidth < inputWidth - iw ? kernelWidth : inputWidth - iw; // min
         const float *cursorInput = kernelInput + kws * 4;
         for (int kw = kws; kw < kwe; kw++, cursorInput += 4) {
 #ifdef MNN_USE_NEON
@@ -194,7 +203,6 @@ static void poolingAvgPad(const float *offsetInput, float *offsetOutput, int inp
             sum2 += cursorInput[2];
             sum3 += cursorInput[3];
 #endif
-            count++;
         }
     }
 
@@ -222,7 +230,7 @@ static void poolingAvgPad(const float *offsetInput, float *offsetOutput, int inp
 
 static void poolingAvg(const float *channelInput, int inputWidth, int inputHeight, float *channelOutput,
                        int outputWidth, int outputHeight, int kernelWidth, int kernelHeight, int strideWidth,
-                       int strideHeight, int padWidth, int padHeight) {
+                       int strideHeight, int padWidth, int padHeight, MNN::PoolPadType padType) {
     int padTop    = padHeight <= 0 ? 0 : (padHeight + strideHeight - 1) / strideHeight;
     int padBottom = (padHeight + inputHeight - kernelHeight) / strideHeight + 1;
     int padLeft   = padWidth <= 0 ? 0 : (padWidth + strideWidth - 1) / strideWidth;
@@ -243,7 +251,7 @@ static void poolingAvg(const float *channelInput, int inputWidth, int inputHeigh
             for (int ow = 0, iw = -padWidth; ow < outputWidth;
                  ow++, iw += strideWidth, offsetOutput += 4, offsetInput += strideWidth4) {
                 poolingAvgPad(offsetInput, offsetOutput, inputWidth, inputHeight, kernelWidth, kernelHeight, inputStep4,
-                              iw, ih);
+                              iw, ih, padWidth, padHeight, padType);
             }
         }
         for (int oh = padTop, ih = -padHeight + oh * strideHeight; oh < padBottom;
@@ -253,14 +261,14 @@ static void poolingAvg(const float *channelInput, int inputWidth, int inputHeigh
             for (int ow = 0, iw = -padWidth; ow < padLeft;
                  ow++, iw += strideWidth, offsetOutput += 4, offsetInput += strideWidth4) {
                 poolingAvgPad(offsetInput, offsetOutput, inputWidth, inputHeight, kernelWidth, kernelHeight, inputStep4,
-                              iw, ih);
+                              iw, ih, padWidth, padHeight, padType);
             }
             offsetInput  = lineInput + padRight * strideWidth * 4;
             offsetOutput = lineOutput + padRight * 4;
             for (int ow = padRight, iw = -padWidth + ow * strideWidth; ow < outputWidth;
                  ow++, iw += strideWidth, offsetOutput += 4, offsetInput += strideWidth4) {
                 poolingAvgPad(offsetInput, offsetOutput, inputWidth, inputHeight, kernelWidth, kernelHeight, inputStep4,
-                              iw, ih);
+                              iw, ih, padWidth, padHeight, padType);
             }
         }
         for (int oh = padBottom, ih = -padHeight + oh * strideHeight; oh < outputHeight;
@@ -270,7 +278,7 @@ static void poolingAvg(const float *channelInput, int inputWidth, int inputHeigh
             for (int ow = 0, iw = -padWidth; ow < outputWidth;
                  ow++, iw += strideWidth, offsetOutput += 4, offsetInput += strideWidth4) {
                 poolingAvgPad(offsetInput, offsetOutput, inputWidth, inputHeight, kernelWidth, kernelHeight, inputStep4,
-                              iw, ih);
+                              iw, ih, padWidth, padHeight, padType);
             }
         }
     }
@@ -368,6 +376,8 @@ ErrorCode CPUPool::onResize(const std::vector<Tensor *> &inputs, const std::vect
         int padNeededHeight = (output->height() - 1) * strideHeight + kernelHeight - input->height();
         padWidth            = padNeededWidth > 0 ? padNeededWidth / 2 : 0;
         padHeight           = padNeededHeight > 0 ? padNeededHeight / 2 : 0;
+    } else if (layer->padType() == PoolPadType_VALID) {
+        padWidth = padHeight = 0;
     }
     auto poolType      = layer->type();
     auto planeFunction = poolingMax;
@@ -380,13 +390,14 @@ ErrorCode CPUPool::onResize(const std::vector<Tensor *> &inputs, const std::vect
     auto inputPlaneStride  = 4 * input->width() * input->height();
     auto outputPlaneStride = 4 * output->width() * output->height();
     int threadNumber       = ((CPUBackend *)backend())->threadNumber();
+    auto padType           = layer->padType();
     mFunction              = [=]() {
         MNN_CONCURRENCY_BEGIN(tId, threadNumber) {
             for (int channel = (int)tId; channel < totalDepth; channel += threadNumber) {
                 // run
                 planeFunction(inputData + channel * inputPlaneStride, input->width(), input->height(),
                               outputData + outputPlaneStride * channel, output->width(), output->height(), kernelWidth,
-                              kernelHeight, strideWidth, strideHeight, padWidth, padHeight);
+                              kernelHeight, strideWidth, strideHeight, padWidth, padHeight, padType);
             }
         }
         MNN_CONCURRENCY_END();

diff --git a/source/backend/cpu/CPUSlice.cpp b/source/backend/cpu/CPUSlice.cpp
@@ -120,12 +120,8 @@ static int _sliceChannel(const Tensor* inputTensor, const vector<Tensor*>& outpu
     return 0;
 }
 
-CPUSlice::CPUSlice(Backend* b, const MNN::Op* op) : MNN::Execution(b) {
-    auto slice = op->main_as_Slice();
-    mAxis      = slice->axis();
-    for (int i = 0; i < slice->slicePoints()->size(); ++i) {
-        mSlicePoints.push_back(slice->slicePoints()->data()[i]);
-    }
+CPUSlice::CPUSlice(Backend* b, int axis) : MNN::Execution(b) {
+    mAxis      = axis;
 }
 
 ErrorCode CPUSlice::onResize(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
@@ -139,7 +135,6 @@ ErrorCode CPUSlice::onResize(const std::vector<Tensor*>& inputs, const std::vect
         MNN_ASSERT(inputs[0]->buffer().dim[1].flags == MNN::Tensor::REORDER_4);
         if (mAxis == 1) {
             bool useSlowMethod = false;
-
             // Last one need not be 4 aligned
             for (size_t b = 0; b < outputs.size() - 1; b++) {
                 auto& outputTensor = outputs[b]->buffer();
@@ -163,9 +158,6 @@ ErrorCode CPUSlice::onResize(const std::vector<Tensor*>& inputs, const std::vect
 
 ErrorCode CPUSlice::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
     auto input = inputs[0];
-    if (-1 == mAxis) {
-        mAxis = input->dimensions() - 1;
-    }
     const auto tensorFormat = input->getDimensionType();
     if (Tensor::CAFFE == tensorFormat) {
         MNN_ASSERT(inputs[0]->buffer().dim[1].flags == MNN::Tensor::REORDER_4);
@@ -185,7 +177,15 @@ class CPUSliceCreator : public CPUBackend::Creator {
 public:
     virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
                                 const MNN::Op* op, Backend* backend) const {
-        return new CPUSlice(backend, op);
+        auto slice = op->main_as_Slice();
+        if (nullptr == slice || inputs.empty()) {
+            return nullptr;
+        }
+        auto axis = slice->axis();
+        if (axis < 0) {
+            axis = axis + inputs[0]->dimensions();
+        }
+        return new CPUSlice(backend, axis);
     }
 };
 

diff --git a/source/backend/cpu/CPUSlice.hpp b/source/backend/cpu/CPUSlice.hpp
@@ -14,14 +14,13 @@
 namespace MNN {
 class CPUSlice : public Execution {
 public:
-    CPUSlice(Backend *b, const MNN::Op *op);
+    CPUSlice(Backend *b, int axis);
     virtual ~CPUSlice() = default;
     virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
     virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
 
 private:
     int mAxis;
-    std::vector<int> mSlicePoints;
     std::shared_ptr<Tensor> mTempInput;
 };
 

diff --git a/source/backend/cpu/compute/CommonOptFunction.cpp b/source/backend/cpu/compute/CommonOptFunction.cpp
@@ -541,7 +541,7 @@ void MNNRelu6(float* dst, const float* src, size_t size) {
 }
 
 void MNNExp(float* dst, const float* src, size_t dataSize) {
-    int countC8        = dataSize / 8;
+    int countC8        = (int)dataSize / 8;
     if (countC8 > 0) {
         // Align to eight so asm is easier to write
         static float parameters[] = {

diff --git a/source/backend/cpu/compute/ConvOpt.cpp b/source/backend/cpu/compute/ConvOpt.cpp
@@ -242,7 +242,7 @@ void MNNConvRunForLineint8_t(float* dst, const int8_t* src, const int8_t* weight
 
 void MNNGemmFloatUnit_4(float* dstOrigin, const float* src, const float* weight, size_t src_depth_quad, size_t dst_step,
                         size_t dst_depth_quad, size_t weight_depth_offset) {
-    MNNGemmFloatCommon_4(dstOrigin, src, weight, src_depth_quad, dst_step, dst_depth_quad, CONVOLUTION_TILED_NUMBWR,
+    MNNGemmFloatCommon_4(dstOrigin, src, weight, src_depth_quad, dst_step, dst_depth_quad, CONVOLUTION_TILED_NUMBER,
                          weight_depth_offset);
 }
 void MNNGemmFloatOne_4(float* dstOrigin, const float* src, const float* weight, size_t src_depth_quad, size_t dst_step,