Skip to content

Commit

Permalink
Merge pull request #2470 from alibaba/feature/sync
Browse files Browse the repository at this point in the history
2.6 版本同步
  • Loading branch information
jxt1234 committed Jul 5, 2023
2 parents 24a2e4e + e1ae448 commit c293f9e
Show file tree
Hide file tree
Showing 61 changed files with 2,803 additions and 2,446 deletions.
1 change: 1 addition & 0 deletions docs/compile/cmake.md
Expand Up @@ -44,6 +44,7 @@ MNN使用CMake构建项目,CMake中的宏定义列表如下:
| MNN_CUDA | 是否构建`Cuda`后端,默认为`OFF` |
| MNN_CUDA_PROFILE | 是否打开CUDA profile工具,默认为`OFF` |
| MNN_CUDA_QUANT | 是否打开CUDA 量化文件编译,默认为`OFF` |
| MNN_CUDA_BF16 | 是否打开CUDA Bf16文件编译,默认为`OFF` |
| MNN_TENSORRT | 是否构建`TensorRT`后端,默认为`OFF` |
| MNN_COREML | 是否构建`CoreML`后端,默认为`OFF` |
| MNN_NNAPI | 是否构建`NNAPI`后端,默认为`OFF` |
Expand Down
1 change: 1 addition & 0 deletions docs/index.rst
Expand Up @@ -47,6 +47,7 @@
:maxdepth: 1
:caption: 表达式
:name: expr

inference/expr

.. toctree::
Expand Down
4 changes: 2 additions & 2 deletions include/MNN/MNNDefine.h
Expand Up @@ -68,7 +68,7 @@ MNN_ERROR("Check failed: %s ==> %s\n", #success, #log); \
#define STR_IMP(x) #x
#define STR(x) STR_IMP(x)
#define MNN_VERSION_MAJOR 2
#define MNN_VERSION_MINOR 5
#define MNN_VERSION_PATCH 3
#define MNN_VERSION_MINOR 6
#define MNN_VERSION_PATCH 0
#define MNN_VERSION STR(MNN_VERSION_MAJOR) "." STR(MNN_VERSION_MINOR) "." STR(MNN_VERSION_PATCH)
#endif /* MNNDefine_h */
16 changes: 8 additions & 8 deletions project/ios/MNN.xcodeproj/project.pbxproj
Expand Up @@ -763,6 +763,8 @@
C4F906B327688C3A0026B847 /* NMSModule.hpp in Headers */ = {isa = PBXBuildFile; fileRef = C4F906B127688C3A0026B847 /* NMSModule.hpp */; };
C4F906B427688C3A0026B847 /* NMSModule.cpp in Sources */ = {isa = PBXBuildFile; fileRef = C4F906B227688C3A0026B847 /* NMSModule.cpp */; };
C4FB6CB22769DF0800963B07 /* GeometryCumSum.cpp in Sources */ = {isa = PBXBuildFile; fileRef = C4FB6CB12769DF0800963B07 /* GeometryCumSum.cpp */; };
CE125CC82A52BF6B003698C9 /* MNNBilinearSampleC8.S in Sources */ = {isa = PBXBuildFile; fileRef = CE125CC62A52BF6B003698C9 /* MNNBilinearSampleC8.S */; };
CE125CC92A52BF6B003698C9 /* MNNBilinearLineC8.S in Sources */ = {isa = PBXBuildFile; fileRef = CE125CC72A52BF6B003698C9 /* MNNBilinearLineC8.S */; };
CE7DC00028E2DE6B00797689 /* ShapeConvTranspose3D.cpp in Sources */ = {isa = PBXBuildFile; fileRef = CE7DBFFF28E2DE6B00797689 /* ShapeConvTranspose3D.cpp */; };
CE9AFED628E54E3300566949 /* CPUInterp3D.cpp in Sources */ = {isa = PBXBuildFile; fileRef = CE9AFED428E54E3300566949 /* CPUInterp3D.cpp */; };
CE9AFED728E54E3300566949 /* CPUInterp3D.hpp in Headers */ = {isa = PBXBuildFile; fileRef = CE9AFED528E54E3300566949 /* CPUInterp3D.hpp */; };
Expand All @@ -785,9 +787,7 @@
CEDB211C2846D59C00AE9DC4 /* mobilenet_v2.caffe.mnn in Resources */ = {isa = PBXBuildFile; fileRef = CEDB211B2846D59C00AE9DC4 /* mobilenet_v2.caffe.mnn */; };
CEDB211D284706F900AE9DC4 /* MNN.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 0F1465B71FA18D1000F9860A /* MNN.framework */; };
CEDB211E2847070600AE9DC4 /* MNN.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 0F1465B71FA18D1000F9860A /* MNN.framework */; };
CEE9B9522A3AA4C4006438F2 /* MNNBilinearSampleC16.S in Sources */ = {isa = PBXBuildFile; fileRef = CEE9B94E2A3AA4C4006438F2 /* MNNBilinearSampleC16.S */; };
CEE9B9532A3AA4C4006438F2 /* MNNCubicLineC16.S in Sources */ = {isa = PBXBuildFile; fileRef = CEE9B94F2A3AA4C4006438F2 /* MNNCubicLineC16.S */; };
CEE9B9542A3AA4C4006438F2 /* MNNBilinearLineC16.S in Sources */ = {isa = PBXBuildFile; fileRef = CEE9B9502A3AA4C4006438F2 /* MNNBilinearLineC16.S */; };
CEE9B9552A3AA4C4006438F2 /* MNNCubicSampleC16.S in Sources */ = {isa = PBXBuildFile; fileRef = CEE9B9512A3AA4C4006438F2 /* MNNCubicSampleC16.S */; };
CEE9B95A2A3AA4D4006438F2 /* MNNCubicLineC16.S in Sources */ = {isa = PBXBuildFile; fileRef = CEE9B9562A3AA4D4006438F2 /* MNNCubicLineC16.S */; };
CEE9B95B2A3AA4D4006438F2 /* MNNBilinearLineC8.S in Sources */ = {isa = PBXBuildFile; fileRef = CEE9B9572A3AA4D4006438F2 /* MNNBilinearLineC8.S */; };
Expand Down Expand Up @@ -1590,6 +1590,8 @@
C4F906B127688C3A0026B847 /* NMSModule.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = NMSModule.hpp; sourceTree = "<group>"; };
C4F906B227688C3A0026B847 /* NMSModule.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = NMSModule.cpp; sourceTree = "<group>"; };
C4FB6CB12769DF0800963B07 /* GeometryCumSum.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = GeometryCumSum.cpp; sourceTree = "<group>"; };
CE125CC62A52BF6B003698C9 /* MNNBilinearSampleC8.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = MNNBilinearSampleC8.S; sourceTree = "<group>"; };
CE125CC72A52BF6B003698C9 /* MNNBilinearLineC8.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = MNNBilinearLineC8.S; sourceTree = "<group>"; };
CE7DBFFF28E2DE6B00797689 /* ShapeConvTranspose3D.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ShapeConvTranspose3D.cpp; sourceTree = "<group>"; };
CE9AFED428E54E3300566949 /* CPUInterp3D.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CPUInterp3D.cpp; sourceTree = "<group>"; };
CE9AFED528E54E3300566949 /* CPUInterp3D.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = CPUInterp3D.hpp; sourceTree = "<group>"; };
Expand All @@ -1614,9 +1616,7 @@
CEDB21172846D58200AE9DC4 /* testcat.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; name = testcat.jpg; path = ../../../demo/model/MobileNet/testcat.jpg; sourceTree = "<group>"; };
CEDB21182846D58200AE9DC4 /* synset_words.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = synset_words.txt; path = ../../../demo/model/MobileNet/synset_words.txt; sourceTree = "<group>"; };
CEDB211B2846D59C00AE9DC4 /* mobilenet_v2.caffe.mnn */ = {isa = PBXFileReference; lastKnownFileType = file; name = mobilenet_v2.caffe.mnn; path = ../../../resource/model/MobileNet/v2/mobilenet_v2.caffe.mnn; sourceTree = "<group>"; };
CEE9B94E2A3AA4C4006438F2 /* MNNBilinearSampleC16.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = MNNBilinearSampleC16.S; sourceTree = "<group>"; };
CEE9B94F2A3AA4C4006438F2 /* MNNCubicLineC16.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = MNNCubicLineC16.S; sourceTree = "<group>"; };
CEE9B9502A3AA4C4006438F2 /* MNNBilinearLineC16.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = MNNBilinearLineC16.S; sourceTree = "<group>"; };
CEE9B9512A3AA4C4006438F2 /* MNNCubicSampleC16.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = MNNCubicSampleC16.S; sourceTree = "<group>"; };
CEE9B9562A3AA4D4006438F2 /* MNNCubicLineC16.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = MNNCubicLineC16.S; sourceTree = "<group>"; };
CEE9B9572A3AA4D4006438F2 /* MNNBilinearLineC8.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = MNNBilinearLineC8.S; sourceTree = "<group>"; };
Expand Down Expand Up @@ -2501,8 +2501,8 @@
92FF013A23AA0B4E00AC97F6 /* arm32 */ = {
isa = PBXGroup;
children = (
CEE9B9502A3AA4C4006438F2 /* MNNBilinearLineC16.S */,
CEE9B94E2A3AA4C4006438F2 /* MNNBilinearSampleC16.S */,
CE125CC72A52BF6B003698C9 /* MNNBilinearLineC8.S */,
CE125CC62A52BF6B003698C9 /* MNNBilinearSampleC8.S */,
CEE9B94F2A3AA4C4006438F2 /* MNNCubicLineC16.S */,
CEE9B9512A3AA4C4006438F2 /* MNNCubicSampleC16.S */,
950B28DF29F627E00002F454 /* MNNBinaryAddInt8.S */,
Expand Down Expand Up @@ -3356,6 +3356,7 @@
950B28ED29F627F70002F454 /* MNNBinaryMulInt8.S in Sources */,
481FA853259C27E00047F01F /* ShapeTensorArray.cpp in Sources */,
6A131E3F25823349002EC3D6 /* PluginShapeInference.cpp in Sources */,
CE125CC82A52BF6B003698C9 /* MNNBilinearSampleC8.S in Sources */,
92FF025723AA0B5A00AC97F6 /* CPUQuanConvolutionDepthwise.cpp in Sources */,
48034563254157CE004738E3 /* MNNNV21ToBGRAUnit.S in Sources */,
48FA474823AA127B00172C3B /* Expr.cpp in Sources */,
Expand All @@ -3375,7 +3376,6 @@
48747D61245D9E33000B9709 /* ConvertUtils.cpp in Sources */,
92FF043B23AA0B7100AC97F6 /* ShapeDetectionPostProcess.cpp in Sources */,
48417FF124D13BF50056D9A7 /* GeometryELU.cpp in Sources */,
CEE9B9522A3AA4C4006438F2 /* MNNBilinearSampleC16.S in Sources */,
48C84B9A250F720C00EE7666 /* CPULayerNorm.cpp in Sources */,
4DF87C4A2887D3560003E2D4 /* calib3d.cpp in Sources */,
48F34734273A7C8400C45394 /* ImageProcessFunction.cpp in Sources */,
Expand Down Expand Up @@ -3515,6 +3515,7 @@
CECF8C7D299CAD9400D3875B /* md5.c in Sources */,
92FF041923AA0B7100AC97F6 /* ShapeQuantizedMaxPool.cpp in Sources */,
92FF038A23AA0B5A00AC97F6 /* CPURange.cpp in Sources */,
CE125CC92A52BF6B003698C9 /* MNNBilinearLineC8.S in Sources */,
92FF03A123AA0B5A00AC97F6 /* Int8FunctionsOpt.cpp in Sources */,
92FF026523AA0B5A00AC97F6 /* CPUQuantizedAvgPool.cpp in Sources */,
92FF029423AA0B5A00AC97F6 /* CPUMatMul.cpp in Sources */,
Expand Down Expand Up @@ -3555,7 +3556,6 @@
482BFBD028351BA1009210E4 /* AllShader.cpp in Sources */,
92FF04BA23AA0BFB00AC97F6 /* WrapExecution.cpp in Sources */,
11A01A06258785EA00745FA7 /* MNNVectorTop1Int32.S in Sources */,
CEE9B9542A3AA4C4006438F2 /* MNNBilinearLineC16.S in Sources */,
48FB9DC124A8445A008E1A2D /* MNNAxByClampBroadcastC4.S in Sources */,
EBD4842F2485FF660083CE95 /* Arm82Interp.cpp in Sources */,
4819FB3B24C69E680050BD09 /* GeometrySpatialProduct.cpp in Sources */,
Expand Down
14 changes: 12 additions & 2 deletions pymnn/src/util.h
Expand Up @@ -107,13 +107,23 @@ inline int64_t unpackLong(PyObject* obj) {
}
return (int64_t)value;
}
inline double unpackDoubleOrLong(PyObject* obj) {
if (PyLong_Check(obj)
#if PY_MAJOR_VERSION < 3
|| PyInt_Check(obj)
#endif
) {
return static_cast<float>(unpackLong(obj));
}
return unpackDouble(obj);
}
inline void store_scalar(void* data, int dtype, PyObject* obj) {
switch (dtype) {
case 4: *(uint8_t*)data = (uint8_t)unpackLong(obj); break;
case 3: *(int32_t*)data = (int32_t)unpackLong(obj); break;
case 9: *(int64_t*)data = unpackLong(obj); break;
case 1: *(float*)data = (float)unpackDouble(obj); break;
case 2: *(double*)data = (double)unpackDouble(obj); break;
case 1: *(float*)data = (float)unpackDoubleOrLong(obj); break;
case 2: *(double*)data = (double)unpackDoubleOrLong(obj); break;
case 6: *(int8_t*)data = (int8_t)unpackLong(obj); break;
default: PyMNN_ERROR_LOG("store_scalar: invalid type");
}
Expand Down
16 changes: 8 additions & 8 deletions source/backend/cpu/BinaryUtils.hpp
Expand Up @@ -330,7 +330,7 @@ void execute(void* outputRaw, const void* inputRaw0, const void* inputRaw1, int
}

template<typename Tin, typename Tout, typename Func>
void executeInt8(int8_t* outputRaw, const int8_t* inputRaw0, const int8_t* inputRaw1, const float* inputScale0, const float* inputScale1, const float* outputScale, int elementSize, int needBroadcast) {
void executeInt8 (int8_t* outputRaw, const int8_t* inputRaw0, const int8_t* inputRaw1, ssize_t* inputScalesInt32, float* inputScalesFp32, const int8_t* inputOffset0, const int8_t* inputOffset1, const int8_t* outputOffset, size_t elementSize, size_t needBroadcast) {
Func f;
int size = elementSize;
#ifdef MNN_USE_NEON
Expand All @@ -355,19 +355,19 @@ void executeInt8(int8_t* outputRaw, const int8_t* inputRaw0, const int8_t* input
#endif
for (int i = 0; i < size; ++i) {
if (needBroadcast == 0) {
inp0 = (inputData0[0]- zeroPoint) * inputScale0[0];
inp1 = (inputData1[i]- zeroPoint) * inputScale1[0];
inp0 = (inputData0[0]- zeroPoint - inputOffset0[0]) * inputScalesFp32[0];
inp1 = (inputData1[i]- zeroPoint - inputOffset1[0]) * inputScalesFp32[1];
output = f(inp0, inp1);
} else if (needBroadcast == 1) {
inp0 = (inputData0[i] - zeroPoint) * inputScale0[0];
inp1 = (inputData1[0] - zeroPoint) * inputScale1[0];
inp0 = (inputData0[i] - zeroPoint - inputOffset0[0]) * inputScalesFp32[0];
inp1 = (inputData1[0] - zeroPoint - inputOffset1[0]) * inputScalesFp32[1];
output = f(inp0, inp1);
} else {
inp0 = (inputData0[i] - zeroPoint) * inputScale0[0];
inp1 = (inputData1[i] - zeroPoint) * inputScale1[0];
inp0 = (inputData0[i] - zeroPoint - inputOffset0[0]) * inputScalesFp32[0];
inp1 = (inputData1[i] - zeroPoint - inputOffset1[0]) * inputScalesFp32[1];
output = f(inp0, inp1);
}
int value = (int)roundf(output * outputScale[0]) + zeroPoint;
int value = (int)roundf(output * inputScalesFp32[2]) + zeroPoint + outputOffset[0];
if (value > maxValue) {
value = maxValue;
}
Expand Down
43 changes: 20 additions & 23 deletions source/backend/cpu/CPUBinaryInt8.cpp
Expand Up @@ -16,8 +16,6 @@
#include "BinaryUtils.hpp"
#include "math/Vec.hpp"

using Vec16 = MNN::Math::Vec<int8_t, 16>;

namespace MNN {

ErrorCode CPUBinaryInt8::onResize(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs) {
Expand All @@ -37,22 +35,24 @@ ErrorCode CPUBinaryInt8::onResize(const std::vector<Tensor*>& inputs, const std:

auto core = static_cast<CPUBackend*>(backend())->functions();

mInputQuant0.resize(core->pack); // prepare for arm neon. float32x4
mInputQuant1.resize(core->pack);
mOutputQuant.resize(core->pack);
std::fill(mInputQuant0.begin(), mInputQuant0.end(), TensorUtils::getDescribe(inputs[0])->quantAttr->scale);
std::fill(mInputQuant1.begin(), mInputQuant1.end(), TensorUtils::getDescribe(inputs[1])->quantAttr->scale);
mInputOffset0.resize(1);
mInputOffset1.resize(1);
mOutputOffset.resize(1);
mQuantScalesInt32.resize(2); // When use int32 scales computing, output scale is needless.
mQuantScalesFp32.resize(3);
mQuantScalesInt32[0] = TensorUtils::getDescribe(inputs[0])->quantAttr->scale * (1 << 16);
mQuantScalesInt32[1] = TensorUtils::getDescribe(inputs[1])->quantAttr->scale * (1 << 16);
mQuantScalesFp32[0] = TensorUtils::getDescribe(inputs[0])->quantAttr->scale;
mQuantScalesFp32[1] = TensorUtils::getDescribe(inputs[1])->quantAttr->scale;
if (TensorUtils::getDescribe(outputs[0])->quantAttr->scale != 0) {
std::fill(mOutputQuant.begin(), mOutputQuant.end(), 1 / TensorUtils::getDescribe(outputs[0])->quantAttr->scale);
mQuantScalesFp32[2] = 1 / TensorUtils::getDescribe(outputs[0])->quantAttr->scale;
} else {
std::fill(mOutputQuant.begin(), mOutputQuant.end(), 0);
mQuantScalesFp32[2] = 0;
}

mInputOffset0[0] = (int8_t)TensorUtils::getDescribe(inputs[0])->quantAttr->zero;
mInputOffset1[0] = (int8_t)TensorUtils::getDescribe(inputs[1])->quantAttr->zero;
mOutputOffset[0] = (int8_t)TensorUtils::getDescribe(outputs[0])->quantAttr->zero;

if(mActivationType == 1 && outputs[0]->getType().code == halide_type_float) {
mActivationExe.reset(new CPURelu(backend(), 0.0));
mActivationExe->onResize(outputs, outputs);
}
return NO_ERROR;
}

Expand All @@ -79,27 +79,24 @@ ErrorCode CPUBinaryInt8::onExecute(const std::vector<Tensor*>& inputs, const std
if (realSize > 0) {
auto inp0 = input0Ptr + start * inpBytes;
auto inp1 = input1Ptr + start * inpBytes;
auto scale0 = mInputQuant0.data() + start;
auto scale1 = mInputQuant1.data() + start;
auto scaleDst = mOutputQuant.data() + start;
auto offset0 = mInputOffset0.data();
auto offset1 = mInputOffset1.data();
auto offsetDst = mOutputOffset.data();
if (mNeedBroadcastIndex == 0) {
inp0 = input0Ptr;
} else if (mNeedBroadcastIndex == 1) {
inp1 = input1Ptr;
}
auto out = outputPtr + start * outBytes;
#ifdef MNN_USE_NEON
mProc(out, inp0, inp1, scale0, scale1, scaleDst, realSize / 4, mNeedBroadcastIndex);
mProc(out, inp0, inp1, mQuantScalesInt32.data(), mQuantScalesFp32.data(), offset0, offset1, offsetDst, realSize / 4, mNeedBroadcastIndex);
#else
mProc(out, inp0, inp1, scale0, scale1, scaleDst, realSize, mNeedBroadcastIndex);
mProc(out, inp0, inp1, mQuantScalesInt32.data(), mQuantScalesFp32.data(), offset0, offset1, offsetDst, realSize, mNeedBroadcastIndex);
#endif
}
}
MNN_CONCURRENCY_END();

if(mActivationType == 1 && output->getType().code == halide_type_float) {
mActivationExe->onExecute(outputs, outputs);;
}

return NO_ERROR;
}

Expand Down
8 changes: 5 additions & 3 deletions source/backend/cpu/CPUBinaryInt8.hpp
Expand Up @@ -31,9 +31,11 @@ class CPUBinaryInt8 : public Execution {
int mTotalSize;
int mActivationType = 0;
std::shared_ptr<Execution> mActivationExe;
std::vector<float> mInputQuant0;
std::vector<float> mInputQuant1;
std::vector<float> mOutputQuant;
std::vector<ssize_t> mQuantScalesInt32; // input0 and input1
std::vector<float> mQuantScalesFp32; // input0, input1 and output
std::vector<int8_t> mInputOffset0;
std::vector<int8_t> mInputOffset1;
std::vector<int8_t> mOutputOffset;
};
} // namespace MNN
#endif /* CPUBinary_hpp */

0 comments on commit c293f9e

Please sign in to comment.