Skip to content

Commit

Permalink
beta 0.2.0.1
Browse files Browse the repository at this point in the history
- support both armv7/arm64 in podspec (pod version >= 1.5.0 required)
- refactor neg axis support
- fix memory overlap in de-conv
- fix CONVOLUTION_TILED_NUMBER spell error
- fix few warnings
- add binary / interp / permute / relu / reshape / softmax support and optimize conv for OpenGL backend
- add clean in nmake build script
  • Loading branch information
liqing committed Jun 24, 2019
1 parent 6a4213f commit ad759eb
Show file tree
Hide file tree
Showing 77 changed files with 2,786 additions and 587 deletions.
40 changes: 26 additions & 14 deletions MNN.podspec
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,32 @@ Pod::Spec.new do |s|

s.source = { :git => "[email protected]:alibaba/MNN.git", :branch => 'master' }
s.frameworks = 'Metal', 'Accelerate'
s.source_files = \
'include/*.{h,hpp}',\
'schema/current/*.{h}',\
'3rd_party/flatbuffers/include/flatbuffers/*.{h}',\
'source/backend/cpu/*.{h,c,m,mm,cc,S,hpp,cpp}',\
'source/backend/cpu/arm/*.{h,c,m,mm,cc,S,hpp,cpp}',\
'source/backend/cpu/arm/arm64/*.{h,c,m,mm,cc,S,hpp,cpp}',\
'source/backend/cpu/compute/*.{h,c,m,mm,cc,S,hpp,cpp}',\
'source/backend/metal/**/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
'source/core/**/*.{h,c,m,mm,cc,hpp,cpp}',\
'source/cv/**/*.{h,c,m,mm,cc,hpp,cpp}',\
'source/math/**/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
'source/shape/*.{h,c,m,mm,cc,hpp,cpp}'

s.library = 'c++'

s.subspec 'core' do |a|
a.source_files = \
'include/*.{h,hpp}',\
'schema/current/*.{h}',\
'3rd_party/flatbuffers/include/flatbuffers/*.{h}',\
'source/core/**/*.{h,c,m,mm,cc,hpp,cpp}',\
'source/cv/**/*.{h,c,m,mm,cc,hpp,cpp}',\
'source/math/**/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
'source/shape/*.{h,c,m,mm,cc,hpp,cpp}',\
'source/backend/cpu/*.{h,c,m,mm,cc,S,hpp,cpp}',\
'source/backend/cpu/arm/*.{h,c,m,mm,cc,S,hpp,cpp}',\
'source/backend/cpu/compute/*.{h,c,m,mm,cc,S,hpp,cpp}'
end
s.subspec 'armv7' do |a|
a.source_files = 'source/backend/cpu/arm/arm32/*.{h,c,m,mm,cc,S,hpp,cpp}'
end
s.subspec 'aarch64' do |a|
a.source_files = 'source/backend/cpu/arm/arm64/*.{h,c,m,mm,cc,S,hpp,cpp}'
end
s.subspec 'metal' do |a|
a.source_files = 'source/backend/metal/**/*.{h,c,m,mm,cc,hpp,cpp,metal}'
end

s.default_subspecs = 'core', 'armv7', 'aarch64', 'metal'
s.pod_target_xcconfig = {'METAL_LIBRARY_FILE_BASE' => 'mnn', 'HEADER_SEARCH_PATHS' => ' "$(PODS_TARGET_SRCROOT)/3rd_party/flatbuffers/include" ', 'GCC_PREPROCESSOR_DEFINITIONS' => '$(inherited) MNN_CODEGEN_REGISTER=1'}
s.user_target_xcconfig = { 'OTHER_LDFLAGS' => '-force_load $(BUILD_DIR)/$(CONFIGURATION)$(EFFECTIVE_PLATFORM_NAME)/MNN/libMNN.a'}
end
13 changes: 12 additions & 1 deletion demo/exec/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,22 @@ Use [Top CMakeLists.txt](../../CMakeLists.txt) to construct demo like this:

```bash
cd path/to/MNN
mkdir build
mkdir build && cd build
cmake -DMNN_BUILD_DEMO=ON ..
make -j8
```

# Build this Demo on Windows

Use [Top CMakeLists.txt](../../CMakeLists.txt) to construct demo like this:
```powershell
cd path/to/MNN
mkdir build
cd build
cmake -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=Release -DMNN_BUILD_DEMO=ON ..
nmake
```

# MultiPose

1. Download [pose model](https://github.com/czy2014hust/posenet-python/raw/master/models/model-mobilenet_v1_075.pb)
Expand Down
4 changes: 0 additions & 4 deletions project/ios/MNN.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,6 @@
920004A021EDBDF600BCE892 /* SqueezeTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9200046621EDBDF600BCE892 /* SqueezeTest.cpp */; };
920004A121EDBDF600BCE892 /* LSTMTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9200046721EDBDF600BCE892 /* LSTMTest.cpp */; };
920004A221EDBDF600BCE892 /* QuantizedReshapeTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9200046821EDBDF600BCE892 /* QuantizedReshapeTest.cpp */; };
920004A321EDBDF600BCE892 /* QuantizedSoftmaxTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9200046921EDBDF600BCE892 /* QuantizedSoftmaxTest.cpp */; };
920004A421EDBDF600BCE892 /* MatMulTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9200046A21EDBDF600BCE892 /* MatMulTest.cpp */; };
920004A521EDBDF600BCE892 /* TileTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9200046B21EDBDF600BCE892 /* TileTest.cpp */; };
920004A621EDBDF600BCE892 /* LRNTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9200046C21EDBDF600BCE892 /* LRNTest.cpp */; };
Expand Down Expand Up @@ -1151,7 +1150,6 @@
9200046621EDBDF600BCE892 /* SqueezeTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = SqueezeTest.cpp; sourceTree = "<group>"; };
9200046721EDBDF600BCE892 /* LSTMTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = LSTMTest.cpp; sourceTree = "<group>"; };
9200046821EDBDF600BCE892 /* QuantizedReshapeTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = QuantizedReshapeTest.cpp; sourceTree = "<group>"; };
9200046921EDBDF600BCE892 /* QuantizedSoftmaxTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = QuantizedSoftmaxTest.cpp; sourceTree = "<group>"; };
9200046A21EDBDF600BCE892 /* MatMulTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = MatMulTest.cpp; sourceTree = "<group>"; };
9200046B21EDBDF600BCE892 /* TileTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = TileTest.cpp; sourceTree = "<group>"; };
9200046C21EDBDF600BCE892 /* LRNTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = LRNTest.cpp; sourceTree = "<group>"; };
Expand Down Expand Up @@ -2204,7 +2202,6 @@
9200047D21EDBDF600BCE892 /* QuantizedAvgPoolTest.cpp */,
9200047321EDBDF600BCE892 /* QuantizedMaxPoolTest.cpp */,
9200046821EDBDF600BCE892 /* QuantizedReshapeTest.cpp */,
9200046921EDBDF600BCE892 /* QuantizedSoftmaxTest.cpp */,
9200049221EDBDF600BCE892 /* RangeTest.cpp */,
9200046D21EDBDF600BCE892 /* RankTest.cpp */,
9200046F21EDBDF600BCE892 /* ReductionTest.cpp */,
Expand Down Expand Up @@ -3163,7 +3160,6 @@
buildActionMask = 2147483647;
files = (
92A4E0FC21F05A4F000B0919 /* MemoryUtilsTest.cpp in Sources */,
920004A321EDBDF600BCE892 /* QuantizedSoftmaxTest.cpp in Sources */,
920004B521EDBDF600BCE892 /* BinaryOPTest.cpp in Sources */,
92D765BD222819EF00178BE5 /* DirectedAcyclicGraphTest.cpp in Sources */,
920004D221EDBE1100BCE892 /* MNNTestSuite.cpp in Sources */,
Expand Down
12 changes: 4 additions & 8 deletions source/backend/cpu/CPUConcat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,7 @@ static int _concatTf(const Tensor* outputTensor, const vector<Tensor*>& inputTen
ErrorCode CPUConcat::onResize(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
MNN_ASSERT(outputs.size() == 1);
MNN_ASSERT(inputs.size() >= 2);
auto output = outputs[0];
mAxis = mOriginAxis;
if (-1 == mAxis) {
mAxis = output->dimensions() - 1;
}
auto output = outputs[0];
mUseSlowMethod = false;
mTempOutput.reset();
if (output->buffer().dimensions > 1 && output->buffer().dim[1].flags == Tensor::REORDER_4) {
Expand Down Expand Up @@ -228,9 +224,6 @@ ErrorCode CPUConcat::onExecute(const vector<Tensor*>& inputs, const std::vector<
}
} else {
int axis = mAxis;
if (mAxis == -1) {
axis = outputs[0]->buffer().dimensions - 1;
}
// tf concat
_concatTf(outputs[0], inputs, axis);
}
Expand All @@ -244,6 +237,9 @@ class CPUConcatCreator : public CPUBackend::Creator {
const MNN::Op* op, Backend* backend) const {
auto axis = op->main_as_Axis();
if (nullptr != axis) {
if (axis->axis() < 0) {
return new CPUConcat(backend, outputs[0]->dimensions() + axis->axis());
}
return new CPUConcat(backend, axis->axis());
}
return new CPUConcat(backend, 0);
Expand Down
7 changes: 3 additions & 4 deletions source/backend/cpu/CPUConcat.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,15 @@
namespace MNN {
class CPUConcat : public Execution {
public:
CPUConcat(Backend *b, int axis) : Execution(b), mOriginAxis(axis) {
mAxis = mOriginAxis;
CPUConcat(Backend *b, int axis) : Execution(b), mAxis(axis) {
// Do nothing
}
virtual ~CPUConcat() = default;
virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;

private:
int mOriginAxis = 1;
int mAxis = 1;
int mAxis = 1;
std::shared_ptr<Tensor> mTempOutput;
bool mUseSlowMethod = false;
};
Expand Down
2 changes: 1 addition & 1 deletion source/backend/cpu/CPUDeconvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,8 @@ ErrorCode CPUDeconvolutionMultiInput::onResize(const std::vector<Tensor*>& input
mTempInputs = {inputs[0], mWeight.get(), mBias.get()};
backend()->onAcquireBuffer(mWeight.get(), Backend::DYNAMIC);
backend()->onAcquireBuffer(mCacheWeight.get(), Backend::DYNAMIC);
backend()->onReleaseBuffer(mCacheWeight.get(), Backend::DYNAMIC);
backend()->onAcquireBuffer(mBias.get(), Backend::DYNAMIC);
backend()->onReleaseBuffer(mCacheWeight.get(), Backend::DYNAMIC);
auto error = mOrigin->onResize(mTempInputs, outputs);
backend()->onReleaseBuffer(mWeight.get(), Backend::DYNAMIC);
backend()->onReleaseBuffer(mBias.get(), Backend::DYNAMIC);
Expand Down
37 changes: 24 additions & 13 deletions source/backend/cpu/CPUPool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ static void pooling_max_pad(const float *channelInput, float *offsetOutput, int

static void poolingMax(const float *channelInput, int inputWidth, int inputHeight, float *channelOutput,
int outputWidth, int outputHeight, int kernelWidth, int kernelHeight, int strideWidth,
int strideHeight, int padWidth, int padHeight) {
int strideHeight, int padWidth, int padHeight, MNN::PoolPadType padType) {
int padTop = padHeight <= 0 ? 0 : (padHeight + strideHeight - 1) / strideHeight;
int padBottom = (padHeight + inputHeight - kernelHeight) / strideHeight + 1;
int padLeft = padWidth <= 0 ? 0 : (padWidth + strideWidth - 1) / strideWidth;
Expand Down Expand Up @@ -166,7 +166,8 @@ static void poolingMax(const float *channelInput, int inputWidth, int inputHeigh
}

static void poolingAvgPad(const float *offsetInput, float *offsetOutput, int inputWidth, int inputHeight,
int kernelWidth, int kernelHeight, int inputStep4, int iw, int ih) {
int kernelWidth, int kernelHeight, int inputStep4, int iw, int ih, int padWidth,
int padHeight, MNN::PoolPadType padType) {
#ifdef MNN_USE_NEON
float32x4_t sum = vdupq_n_f32(0);
#else
Expand All @@ -175,15 +176,23 @@ static void poolingAvgPad(const float *offsetInput, float *offsetOutput, int inp
float sum2 = 0;
float sum3 = 0;
#endif

const int khs = 0 < -ih ? -ih : 0; // max
const int khe = kernelHeight < inputHeight - ih ? kernelHeight : inputHeight - ih; // min
const int kws = 0 < -iw ? -iw : 0; // max
const int kwe = kernelWidth < inputWidth - iw ? kernelWidth : inputWidth - iw; // min

// sum
int count = 0;
if (padType == MNN::PoolPadType_CAFFE) {
count = (ALIMIN(ih + kernelHeight, inputHeight + padHeight) - ih) *
(ALIMIN(iw + kernelWidth, inputWidth + padWidth) - iw);
} else {
count = (khe - khs) * (kwe - kws);
}

const int khs = 0 < -ih ? -ih : 0; // max
const int khe = kernelHeight < inputHeight - ih ? kernelHeight : inputHeight - ih; // min
const float *kernelInput = offsetInput + khs * inputStep4;
for (int kh = khs; kh < khe; kh++, kernelInput += inputStep4) {
const int kws = 0 < -iw ? -iw : 0; // max
const int kwe = kernelWidth < inputWidth - iw ? kernelWidth : inputWidth - iw; // min
const float *cursorInput = kernelInput + kws * 4;
for (int kw = kws; kw < kwe; kw++, cursorInput += 4) {
#ifdef MNN_USE_NEON
Expand All @@ -194,7 +203,6 @@ static void poolingAvgPad(const float *offsetInput, float *offsetOutput, int inp
sum2 += cursorInput[2];
sum3 += cursorInput[3];
#endif
count++;
}
}

Expand Down Expand Up @@ -222,7 +230,7 @@ static void poolingAvgPad(const float *offsetInput, float *offsetOutput, int inp

static void poolingAvg(const float *channelInput, int inputWidth, int inputHeight, float *channelOutput,
int outputWidth, int outputHeight, int kernelWidth, int kernelHeight, int strideWidth,
int strideHeight, int padWidth, int padHeight) {
int strideHeight, int padWidth, int padHeight, MNN::PoolPadType padType) {
int padTop = padHeight <= 0 ? 0 : (padHeight + strideHeight - 1) / strideHeight;
int padBottom = (padHeight + inputHeight - kernelHeight) / strideHeight + 1;
int padLeft = padWidth <= 0 ? 0 : (padWidth + strideWidth - 1) / strideWidth;
Expand All @@ -243,7 +251,7 @@ static void poolingAvg(const float *channelInput, int inputWidth, int inputHeigh
for (int ow = 0, iw = -padWidth; ow < outputWidth;
ow++, iw += strideWidth, offsetOutput += 4, offsetInput += strideWidth4) {
poolingAvgPad(offsetInput, offsetOutput, inputWidth, inputHeight, kernelWidth, kernelHeight, inputStep4,
iw, ih);
iw, ih, padWidth, padHeight, padType);
}
}
for (int oh = padTop, ih = -padHeight + oh * strideHeight; oh < padBottom;
Expand All @@ -253,14 +261,14 @@ static void poolingAvg(const float *channelInput, int inputWidth, int inputHeigh
for (int ow = 0, iw = -padWidth; ow < padLeft;
ow++, iw += strideWidth, offsetOutput += 4, offsetInput += strideWidth4) {
poolingAvgPad(offsetInput, offsetOutput, inputWidth, inputHeight, kernelWidth, kernelHeight, inputStep4,
iw, ih);
iw, ih, padWidth, padHeight, padType);
}
offsetInput = lineInput + padRight * strideWidth * 4;
offsetOutput = lineOutput + padRight * 4;
for (int ow = padRight, iw = -padWidth + ow * strideWidth; ow < outputWidth;
ow++, iw += strideWidth, offsetOutput += 4, offsetInput += strideWidth4) {
poolingAvgPad(offsetInput, offsetOutput, inputWidth, inputHeight, kernelWidth, kernelHeight, inputStep4,
iw, ih);
iw, ih, padWidth, padHeight, padType);
}
}
for (int oh = padBottom, ih = -padHeight + oh * strideHeight; oh < outputHeight;
Expand All @@ -270,7 +278,7 @@ static void poolingAvg(const float *channelInput, int inputWidth, int inputHeigh
for (int ow = 0, iw = -padWidth; ow < outputWidth;
ow++, iw += strideWidth, offsetOutput += 4, offsetInput += strideWidth4) {
poolingAvgPad(offsetInput, offsetOutput, inputWidth, inputHeight, kernelWidth, kernelHeight, inputStep4,
iw, ih);
iw, ih, padWidth, padHeight, padType);
}
}
}
Expand Down Expand Up @@ -368,6 +376,8 @@ ErrorCode CPUPool::onResize(const std::vector<Tensor *> &inputs, const std::vect
int padNeededHeight = (output->height() - 1) * strideHeight + kernelHeight - input->height();
padWidth = padNeededWidth > 0 ? padNeededWidth / 2 : 0;
padHeight = padNeededHeight > 0 ? padNeededHeight / 2 : 0;
} else if (layer->padType() == PoolPadType_VALID) {
padWidth = padHeight = 0;
}
auto poolType = layer->type();
auto planeFunction = poolingMax;
Expand All @@ -380,13 +390,14 @@ ErrorCode CPUPool::onResize(const std::vector<Tensor *> &inputs, const std::vect
auto inputPlaneStride = 4 * input->width() * input->height();
auto outputPlaneStride = 4 * output->width() * output->height();
int threadNumber = ((CPUBackend *)backend())->threadNumber();
auto padType = layer->padType();
mFunction = [=]() {
MNN_CONCURRENCY_BEGIN(tId, threadNumber) {
for (int channel = (int)tId; channel < totalDepth; channel += threadNumber) {
// run
planeFunction(inputData + channel * inputPlaneStride, input->width(), input->height(),
outputData + outputPlaneStride * channel, output->width(), output->height(), kernelWidth,
kernelHeight, strideWidth, strideHeight, padWidth, padHeight);
kernelHeight, strideWidth, strideHeight, padWidth, padHeight, padType);
}
}
MNN_CONCURRENCY_END();
Expand Down
22 changes: 11 additions & 11 deletions source/backend/cpu/CPUSlice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,8 @@ static int _sliceChannel(const Tensor* inputTensor, const vector<Tensor*>& outpu
return 0;
}

CPUSlice::CPUSlice(Backend* b, const MNN::Op* op) : MNN::Execution(b) {
auto slice = op->main_as_Slice();
mAxis = slice->axis();
for (int i = 0; i < slice->slicePoints()->size(); ++i) {
mSlicePoints.push_back(slice->slicePoints()->data()[i]);
}
CPUSlice::CPUSlice(Backend* b, int axis) : MNN::Execution(b) {
mAxis = axis;
}

ErrorCode CPUSlice::onResize(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
Expand All @@ -139,7 +135,6 @@ ErrorCode CPUSlice::onResize(const std::vector<Tensor*>& inputs, const std::vect
MNN_ASSERT(inputs[0]->buffer().dim[1].flags == MNN::Tensor::REORDER_4);
if (mAxis == 1) {
bool useSlowMethod = false;

// Last one need not be 4 aligned
for (size_t b = 0; b < outputs.size() - 1; b++) {
auto& outputTensor = outputs[b]->buffer();
Expand All @@ -163,9 +158,6 @@ ErrorCode CPUSlice::onResize(const std::vector<Tensor*>& inputs, const std::vect

ErrorCode CPUSlice::onExecute(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
auto input = inputs[0];
if (-1 == mAxis) {
mAxis = input->dimensions() - 1;
}
const auto tensorFormat = input->getDimensionType();
if (Tensor::CAFFE == tensorFormat) {
MNN_ASSERT(inputs[0]->buffer().dim[1].flags == MNN::Tensor::REORDER_4);
Expand All @@ -185,7 +177,15 @@ class CPUSliceCreator : public CPUBackend::Creator {
public:
virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
const MNN::Op* op, Backend* backend) const {
return new CPUSlice(backend, op);
auto slice = op->main_as_Slice();
if (nullptr == slice || inputs.empty()) {
return nullptr;
}
auto axis = slice->axis();
if (axis < 0) {
axis = axis + inputs[0]->dimensions();
}
return new CPUSlice(backend, axis);
}
};

Expand Down
3 changes: 1 addition & 2 deletions source/backend/cpu/CPUSlice.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,13 @@
namespace MNN {
class CPUSlice : public Execution {
public:
CPUSlice(Backend *b, const MNN::Op *op);
CPUSlice(Backend *b, int axis);
virtual ~CPUSlice() = default;
virtual ErrorCode onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;

private:
int mAxis;
std::vector<int> mSlicePoints;
std::shared_ptr<Tensor> mTempInput;
};

Expand Down
2 changes: 1 addition & 1 deletion source/backend/cpu/compute/CommonOptFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,7 @@ void MNNRelu6(float* dst, const float* src, size_t size) {
}

void MNNExp(float* dst, const float* src, size_t dataSize) {
int countC8 = dataSize / 8;
int countC8 = (int)dataSize / 8;
if (countC8 > 0) {
// Align to eight so asm is easier to write
static float parameters[] = {
Expand Down
2 changes: 1 addition & 1 deletion source/backend/cpu/compute/ConvOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ void MNNConvRunForLineint8_t(float* dst, const int8_t* src, const int8_t* weight

void MNNGemmFloatUnit_4(float* dstOrigin, const float* src, const float* weight, size_t src_depth_quad, size_t dst_step,
size_t dst_depth_quad, size_t weight_depth_offset) {
MNNGemmFloatCommon_4(dstOrigin, src, weight, src_depth_quad, dst_step, dst_depth_quad, CONVOLUTION_TILED_NUMBWR,
MNNGemmFloatCommon_4(dstOrigin, src, weight, src_depth_quad, dst_step, dst_depth_quad, CONVOLUTION_TILED_NUMBER,
weight_depth_offset);
}
void MNNGemmFloatOne_4(float* dstOrigin, const float* src, const float* weight, size_t src_depth_quad, size_t dst_step,
Expand Down
Loading

0 comments on commit ad759eb

Please sign in to comment.