Skip to content

Commit db155b4

Browse files
author
liqing
committed
beta 0.2.0.2
- CPU - add padding support - fix bug in permute when channel % 4 != 0 - fix bug in exp with extreme value - OpenCL - add protecting logics - OpenGL - add protecting logics - support NCHW format in Squeeze and Reshape - Converter - add ShuffleChannel support for Caffe - add Clip/Transpose/Unary/Pad supports for ONNX
1 parent ad759eb commit db155b4

File tree

87 files changed

+2005
-765
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+2005
-765
lines changed

demo/exec/pictureRecognition.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,15 +74,15 @@ int main(int argc, const char* argv[]) {
7474
}
7575
MNN_PRINT("origin size: %d, %d\n", width, height);
7676
Matrix trans;
77-
// Dst -> [0, 1]
78-
trans.postScale(1.0 / size_w, 1.0 / size_h);
79-
//[0, 1] -> Src
80-
trans.postScale(width, height);
77+
// Set scale, from dst scale to src
78+
trans.setScale((float)(width-1) / (size_w-1), (float)(height-1) / (size_h-1));
8179
ImageProcess::Config config;
8280
config.filterType = BILINEAR;
8381
float mean[3] = {103.94f, 116.78f, 123.68f};
84-
::memcpy(config.mean, mean, sizeof(mean));
8582
float normals[3] = {0.017f, 0.017f, 0.017f};
83+
// float mean[3] = {127.5f, 127.5f, 127.5f};
84+
// float normals[3] = {0.00785f, 0.00785f, 0.00785f};
85+
::memcpy(config.mean, mean, sizeof(mean));
8686
::memcpy(config.normal, normals, sizeof(normals));
8787
config.sourceFormat = RGBA;
8888
config.destFormat = BGR;

project/android/build_32.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ cmake ../../../ \
55
-DANDROID_ABI="armeabi-v7a" \
66
-DANDROID_STL=c++_static \
77
-DCMAKE_BUILD_TYPE=Release \
8-
-DANDROID_NATIVE_API_LEVEL=android-21 \
8+
-DANDROID_NATIVE_API_LEVEL=android-19 \
99
-DANDROID_TOOLCHAIN=gcc \
1010
-DMNN_BUILD_FOR_ANDROID_COMMAND=true \
1111
-DMNN_DEBUG=false \

project/ios/MNN.xcodeproj/project.pbxproj

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@
4444
4843AA5922A7E9AB00889A63 /* CPUConv2DBackPropFilter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4843AA5122A7E9AB00889A63 /* CPUConv2DBackPropFilter.cpp */; };
4545
4843AA5A22A7E9AB00889A63 /* CPUSoftmaxGrad.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4843AA5222A7E9AB00889A63 /* CPUSoftmaxGrad.cpp */; };
4646
4843AA5B22A7E9AB00889A63 /* CPUSoftmaxGrad.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 4843AA5322A7E9AB00889A63 /* CPUSoftmaxGrad.hpp */; };
47+
4847D41D22C0739A0049F3CA /* ShapePadding.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4847D41C22C0739A0049F3CA /* ShapePadding.cpp */; };
48+
4847D42022C07E850049F3CA /* CPUPadding.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4847D41E22C07E850049F3CA /* CPUPadding.cpp */; };
49+
4847D42122C07E850049F3CA /* CPUPadding.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 4847D41F22C07E850049F3CA /* CPUPadding.hpp */; };
4750
4851BE102122C1BC009BB0AC /* Tensor.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 4851BE0F2122C1BC009BB0AC /* Tensor.hpp */; settings = {ATTRIBUTES = (Public, ); }; };
4851
485DD411217F495500129159 /* CPUQuantizedAdd.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 485DD40B217F495400129159 /* CPUQuantizedAdd.hpp */; };
4952
485DD412217F495500129159 /* CPUQuantizedSoftmax.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 485DD40C217F495500129159 /* CPUQuantizedSoftmax.cpp */; };
@@ -752,6 +755,9 @@
752755
4843AA5122A7E9AB00889A63 /* CPUConv2DBackPropFilter.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CPUConv2DBackPropFilter.cpp; sourceTree = "<group>"; };
753756
4843AA5222A7E9AB00889A63 /* CPUSoftmaxGrad.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CPUSoftmaxGrad.cpp; sourceTree = "<group>"; };
754757
4843AA5322A7E9AB00889A63 /* CPUSoftmaxGrad.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = CPUSoftmaxGrad.hpp; sourceTree = "<group>"; };
758+
4847D41C22C0739A0049F3CA /* ShapePadding.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = ShapePadding.cpp; sourceTree = "<group>"; };
759+
4847D41E22C07E850049F3CA /* CPUPadding.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = CPUPadding.cpp; sourceTree = "<group>"; };
760+
4847D41F22C07E850049F3CA /* CPUPadding.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = CPUPadding.hpp; sourceTree = "<group>"; };
755761
4851BE0F2122C1BC009BB0AC /* Tensor.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = Tensor.hpp; sourceTree = "<group>"; };
756762
485DD40B217F495400129159 /* CPUQuantizedAdd.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = CPUQuantizedAdd.hpp; sourceTree = "<group>"; };
757763
485DD40C217F495500129159 /* CPUQuantizedSoftmax.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CPUQuantizedSoftmax.cpp; sourceTree = "<group>"; };
@@ -1908,6 +1914,8 @@
19081914
48B904A522953E0F003116BB /* CPUZeroLike.hpp */,
19091915
4829D54E22AF5C340093E3BE /* CPUSetDiff1D.cpp */,
19101916
4829D54F22AF5C340093E3BE /* CPUSetDiff1D.hpp */,
1917+
4847D41E22C07E850049F3CA /* CPUPadding.cpp */,
1918+
4847D41F22C07E850049F3CA /* CPUPadding.hpp */,
19111919
);
19121920
name = cpu;
19131921
path = backend/cpu;
@@ -2341,6 +2349,7 @@
23412349
EBB38EF421E748B9005F76D7 /* ShapeUnpack.cpp */,
23422350
EBB38EDC21E748B9005F76D7 /* ShapeWhere.cpp */,
23432351
48B904A8229550CF003116BB /* ShapeSelect.cpp */,
2352+
4847D41C22C0739A0049F3CA /* ShapePadding.cpp */,
23442353
);
23452354
path = shape;
23462355
sourceTree = "<group>";
@@ -2475,6 +2484,7 @@
24752484
488875D5215B639F0079B12E /* MetalInterp.hpp in Headers */,
24762485
488875B4215B639F0079B12E /* MetalReLU.hpp in Headers */,
24772486
48887644215B639F0079B12E /* ConvolutionTiledExecutor.hpp in Headers */,
2487+
4847D42122C07E850049F3CA /* CPUPadding.hpp in Headers */,
24782488
488875B7215B639F0079B12E /* MetalSlice.hpp in Headers */,
24792489
92EEFEB2217F0CBB00F89377 /* CPUCrop.hpp in Headers */,
24802490
921722F021DDF63A004583BF /* GpuLibrary_generated.h in Headers */,
@@ -2827,6 +2837,7 @@
28272837
92C674F922549A1600011D33 /* MetalReLU6.mm in Sources */,
28282838
488875D3215B639F0079B12E /* MetalSpatialProduct.metal in Sources */,
28292839
48887630215B639F0079B12E /* CPUTopKV2.cpp in Sources */,
2840+
4847D42022C07E850049F3CA /* CPUPadding.cpp in Sources */,
28302841
48BF218621A4257500AFF78E /* MNNSamplerC1BilinearOpt.S in Sources */,
28312842
CE96FE8121707D58004AB400 /* MetalMatMul.metal in Sources */,
28322843
48887689215B639F0079B12E /* MNNCubicLineC4.S in Sources */,
@@ -2844,6 +2855,7 @@
28442855
488875FF215B639F0079B12E /* CPUSize.cpp in Sources */,
28452856
EB4925C3224A147E00C512BB /* CPUMoments.cpp in Sources */,
28462857
92256950219D6E0200F251E2 /* MetalRange.mm in Sources */,
2858+
4847D41D22C0739A0049F3CA /* ShapePadding.cpp in Sources */,
28472859
924F132521ABD47F006D46A4 /* MetalQuantizedSoftmax.metal in Sources */,
28482860
EBB38F1521E748B9005F76D7 /* ShapeWhere.cpp in Sources */,
28492861
488876D9215B639F0079B12E /* CPUTanh.cpp in Sources */,

schema/default/MNN.fbs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,7 @@ enum OpType : int {
142142

143143
table Plugin {
144144
type: string;
145-
sizeCompute: Net;
146-
buffer: [byte];
145+
buffer: [Blob];
147146
}
148147

149148
union OpParameter {

source/backend/cpu/CPUDequantize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ void dequantizeMinFirst(uint8_t* input, float* output, float* rangeScale, float*
3030
namespace MNN {
3131

3232
template <typename T>
33-
CPUDequantize<T>::CPUDequantize(Backend* backend, QuantizeMode mode, const Op* op) : mMode(mode), Execution(backend) {
33+
CPUDequantize<T>::CPUDequantize(Backend* backend, QuantizeMode mode, const Op* op) : Execution(backend), mMode(mode) {
3434
mHalfRange = !std::is_signed<T>::value ? 0.0f
3535
: (static_cast<double>(std::numeric_limits<T>::max()) -
3636
static_cast<double>(std::numeric_limits<T>::min()) + 1) /

source/backend/cpu/CPUGatherV2.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,10 @@ class CPUGatherV2Creator : public CPUBackend::Creator {
6363
public:
6464
virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
6565
const MNN::Op *op, Backend *backend) const override {
66-
switch (op->main_as_GatherV2()->Tparams()) {
67-
case DataType_DT_INT32:
66+
switch (inputs[0]->getType().code) {
67+
case halide_type_int:
6868
return new CPUGatherV2<int32_t>(backend, op);
69-
case DataType_DT_FLOAT:
69+
case halide_type_float:
7070
return new CPUGatherV2<float>(backend, op);
7171
default:
7272
MNN_ASSERT(false); // unsupported type

source/backend/cpu/CPUOPRegister.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ extern void ___CPUMomentsCreator__OpType_Moments__();
3333
extern void ___CPUNonMaxSuppressionV2Creator__OpType_NonMaxSuppressionV2__();
3434
extern void ___CPUNormalizeCreator__OpType_Normalize__();
3535
extern void ___CPUPackCreator__OpType_Pack__();
36+
extern void ___CPUPaddingCreator__OpType_Padding__();
3637
extern void ___CPUPermuteCreator__OpType_Permute__();
3738
extern void ___CPUPoolCreator__OpType_Pooling__();
3839
extern void ___CPUPoolGradCreator__OpType_PoolGrad__();
@@ -119,6 +120,7 @@ ___CPUMomentsCreator__OpType_Moments__();
119120
___CPUNonMaxSuppressionV2Creator__OpType_NonMaxSuppressionV2__();
120121
___CPUNormalizeCreator__OpType_Normalize__();
121122
___CPUPackCreator__OpType_Pack__();
123+
___CPUPaddingCreator__OpType_Padding__();
122124
___CPUPermuteCreator__OpType_Permute__();
123125
___CPUPoolCreator__OpType_Pooling__();
124126
___CPUPoolGradCreator__OpType_PoolGrad__();

source/backend/cpu/CPUPadding.cpp

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
//
2+
// CPUPadding.cpp
3+
// MNN
4+
//
5+
// Created by MNN on 2019/6/24.
6+
// Copyright © 2018 Alibaba. All rights reserved.
7+
//
8+
9+
#include "CPUPadding.hpp"
10+
#include "Macro.h"
11+
#include "TensorUtils.hpp"
12+
namespace MNN {
13+
ErrorCode CPUPadding::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
14+
auto input = inputs[0];
15+
auto output = outputs[0];
16+
auto padding = inputs[1]->host<int32_t>();
17+
::memset(output->host<char>(), 0, output->size());
18+
auto bytes = input->getType().bytes();
19+
auto unit = input->length(3) * bytes;
20+
for (int b = 0; b < input->length(0); ++b) {
21+
auto outputB = output->host<char>() + output->stride(0) * (b + padding[2 * 0]) * bytes;
22+
auto inputB = input->host<char>() + input->stride(0) * b * bytes;
23+
for (int h = 0; h < input->length(1); ++h) {
24+
auto outputH = outputB + output->stride(1) * (h + padding[2 * 1]) * bytes;
25+
auto inputH = inputB + input->stride(1) * h * bytes;
26+
for (int w = 0; w < input->length(2); ++w) {
27+
auto outputW = outputH + output->stride(2) * (w + padding[2 * 2]) * bytes;
28+
auto inputW = inputH + input->stride(2) * w * bytes;
29+
::memcpy(outputW + padding[3 * 2] * bytes, inputW, unit);
30+
}
31+
}
32+
}
33+
return NO_ERROR;
34+
}
35+
36+
ErrorCode CPUPaddingPacked::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
37+
auto input = inputs[0];
38+
auto output = outputs[0];
39+
auto iw = input->width();
40+
auto ih = input->height();
41+
auto ic = input->channel();
42+
auto ib = input->batch();
43+
44+
auto ow = output->width();
45+
auto oh = output->height();
46+
auto icC4 = UP_DIV(ic, 4);
47+
auto padding = inputs[1]->host<int32_t>();
48+
::memset(output->host<float>(), 0, output->size());
49+
for (int n = 0; n < ib; ++n) {
50+
auto inputN = input->host<float>() + input->stride(0) * n;
51+
auto outputN = output->host<float>() + output->stride(0) * (padding[2 * 0] + n);
52+
for (int c = 0; c < icC4; ++c) {
53+
auto inputC = inputN + c * iw * ih * 4;
54+
auto outputC = outputN + c * ow * oh * 4;
55+
56+
for (int h = 0; h < ih; ++h) {
57+
auto inputH = inputC + h * iw * 4;
58+
auto outputH = outputC + (h + padding[2 * 2]) * ow * 4;
59+
60+
::memcpy(outputH + padding[2 * 3] * 4, inputH, iw * 4 * sizeof(float));
61+
}
62+
}
63+
}
64+
65+
return NO_ERROR;
66+
}
67+
class CPUPaddingCreator : public CPUBackend::Creator {
68+
public:
69+
virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
70+
const MNN::Op *op, Backend *backend) const {
71+
if (inputs[0]->dimensions() != 4) {
72+
MNN_ERROR("Currently padding only support NHWC or NC4HW4\n");
73+
return nullptr;
74+
}
75+
auto padding = inputs[1];
76+
auto paddingPtr = padding->host<int32_t>();
77+
if (TensorUtils::getDescribe(inputs[0])->dimensionFormat == MNN_DATA_FORMAT_NHWC) {
78+
return new CPUPadding(backend);
79+
}
80+
if (paddingPtr[2] != 0 || paddingPtr[3] != 0) {
81+
MNN_ERROR("Currently padding NC4HW4 don't support channel padding\n");
82+
return nullptr;
83+
}
84+
if (inputs[0]->buffer().type.code != halide_type_float) {
85+
MNN_ERROR("Currently padding NC4HW4 only support float padding\n");
86+
return nullptr;
87+
}
88+
return new CPUPaddingPacked(backend);
89+
}
90+
};
91+
92+
REGISTER_CPU_OP_CREATOR(CPUPaddingCreator, OpType_Padding);
93+
}; // namespace MNN

source/backend/cpu/CPUPadding.hpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
//
2+
// CPUPadding.hpp
3+
// MNN
4+
//
5+
// Created by MNN on 2019/6/24.
6+
// Copyright © 2018 Alibaba. All rights reserved.
7+
//
8+
9+
#ifndef CPUPadding_hpp
10+
#define CPUPadding_hpp
11+
12+
#include <stdio.h>
13+
#include "CPUBackend.hpp"
14+
namespace MNN {
15+
class CPUPaddingPacked : public Execution {
16+
public:
17+
CPUPaddingPacked(Backend *bn) : Execution(bn) {
18+
// Do nothing
19+
}
20+
virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
21+
};
22+
class CPUPadding : public Execution {
23+
public:
24+
CPUPadding(Backend *bn) : Execution(bn) {
25+
// Do nothing
26+
}
27+
virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) override;
28+
};
29+
}; // namespace MNN
30+
31+
#endif /* CPUPadding_hpp */

source/backend/cpu/CPUPermute.cpp

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ ErrorCode CPUPermute::onResize(const std::vector<Tensor *> &inputs, const std::v
2929
ErrorCode CPUPermute::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
3030
MNN_ASSERT(1 == inputs.size());
3131
MNN_ASSERT(1 == outputs.size());
32-
32+
3333
auto &input = inputs[0]->buffer();
3434
auto &output = outputs[0]->buffer();
3535

@@ -73,7 +73,7 @@ ErrorCode CPUPermute::onExecute(const std::vector<Tensor *> &inputs, const std::
7373
if (output.dimensions > 3) {
7474
outputWidth = output.dim[3].extent;
7575
}
76-
const int outputChannelAlign4 = ALIGN_UP4(output.dim[1].extent);
76+
const int outputChannel = output.dim[1].extent;
7777

7878
int strides[4][4]; // map from change of output index to change of input index on N, C4, H and W
7979

@@ -99,7 +99,7 @@ ErrorCode CPUPermute::onExecute(const std::vector<Tensor *> &inputs, const std::
9999

100100
for (int ob = 0, outputIndex = 0, inputIndex = 0; ob < output.dim[0].extent; ++ob) {
101101
const int inputIndex1 = inputIndex;
102-
for (int oz = 0; oz < outputChannelAlign4; oz += 4) {
102+
for (int oz = 0; oz <= outputChannel - 4; oz += 4) {
103103
const int inputIndex2 = inputIndex;
104104
for (int oy = 0; oy < outputHeight; ++oy) {
105105
const int inputIndex3 = inputIndex;
@@ -114,6 +114,22 @@ ErrorCode CPUPermute::onExecute(const std::vector<Tensor *> &inputs, const std::
114114
}
115115
inputIndex = inputIndex2 + ocTotalStride;
116116
}
117+
if (outputChannel % 4 != 0) {
118+
for (int oy = 0; oy < outputHeight; ++oy) {
119+
const int inputIndex3 = inputIndex;
120+
for (int ox = 0; ox < outputWidth; ++ox) {
121+
originOutput[outputIndex++] = originInput[inputIndex];
122+
for (int oz = 0; oz < outputChannel % 4 - 1; ++oz) {
123+
originOutput[outputIndex++] = originInput[inputIndex + strides[1][oz]];
124+
}
125+
for (int oz = outputChannel % 4; oz < 4; ++oz) {
126+
originOutput[outputIndex++] = 0.0f;
127+
}
128+
inputIndex += strides[3][ox % 4];
129+
}
130+
inputIndex = inputIndex3 + strides[2][oy % 4];
131+
}
132+
}
117133
inputIndex = inputIndex1 + strides[0][ob % 4];
118134
}
119135

0 commit comments

Comments
 (0)