Skip to content

Commit

Permalink
Merge pull request #2550 from alibaba/feature/sync
Browse files Browse the repository at this point in the history
[MNN:Sync] Sync Internal 2.6.3
  • Loading branch information
jxt1234 committed Aug 22, 2023
2 parents c603c52 + 6bb23da commit c442ff3
Show file tree
Hide file tree
Showing 89 changed files with 3,754 additions and 654 deletions.
Expand Up @@ -271,7 +271,7 @@ struct GemmBatchedIdentityThreadblockSwizzle {
return GemmCoord(
(problem_size.m() + tile_size.m() - 1) / tile_size.m(),
(problem_size.n() + tile_size.n() - 1) / tile_size.n(),
batch_count % (1 << 16));
batch_count >= 65536 ? 65535 : batch_count);
}

/// Computes CUDA grid dimensions given a size in units of logical tiles
Expand Down
45 changes: 24 additions & 21 deletions codegen/opencl/OpenCLTarget.cpp
Expand Up @@ -22,6 +22,9 @@ std::string OpenCLTarget::type() {
}
std::string OpenCLTarget::macro() {
return
"#ifdef MNN_SUPPORT_FP16\n"
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"#endif\n"
"#define OFFSET_CHECK\\\n"
"\tconst int c = get_global_id(0), w = get_global_id(1), hb = get_global_id(2);\\\n"
"\tif (c >= global_size_dim0 || w >= global_size_dim1 || hb >= global_size_dim2) { return; }\\\n"
Expand Down Expand Up @@ -113,61 +116,61 @@ std::string OpenCLTarget::codegen(std::vector<std::string>& inputs, const Comman
ss << inpName << "=" << operand << " * " << operand;
break;
case UnaryOpOperation_ERF:
ss << inpName << "=erf(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(erf(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_ERFC:
ss << inpName << "=erfc(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(erfc(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_SQRT:
ss << inpName << "=sqrt(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(sqrt(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_RSQRT:
ss << inpName << "=rsqrt(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(rsqrt(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_ABS:
ss << inpName << "=fabs(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(fabs(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_SIN:
ss << inpName << "=sin(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(sin(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_COS:
ss << inpName << "=cos(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(cos(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_SIGN:
ss << inpName << "=sign(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(sign(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_EXP:
ss << inpName << "=exp(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(exp(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_NEG:
ss << inpName << "=-(" << operand << ")";
break;
case UnaryOpOperation_TAN:
ss << inpName << "=tan(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(tan(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_CEIL:
ss << inpName << "=ceil(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(ceil(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_LOG1P:
ss << inpName << "=log1p(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(log1p(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_FLOOR:
ss << inpName << "=floor(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(floor(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_ROUND:
ss << inpName << "=round(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(round(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_SIGMOID:
ss << inpName << "=native_recip((float4)1+native_exp(convert_float4(-" << operand << ")))";
ss << inpName << "=CONVERT_FLOAT4(native_recip((float4)1+native_exp(convert_float4(-" << operand << "))))";
break;
case UnaryOpOperation_TANH:
ss << inpName << "=tanh(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(tanh(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_RECIPROCAL:
ss << inpName << "=native_recip(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(native_recip(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_LOG:
ss << inpName << "=native_log(convert_float4(" << operand << "+(float4)((float)0.0000001)))";
ss << inpName << "=CONVERT_FLOAT4(native_log(convert_float4(" << operand << ")+(float4)((float)0.0000001)))";
break;
default:
MNN_ASSERT(false);
Expand Down Expand Up @@ -198,13 +201,13 @@ std::string OpenCLTarget::codegen(std::vector<std::string>& inputs, const Comman
return ss.str();
}
std::string OpenCLTarget::load(const std::string& base, const std::string& offset, const Command* cmd, std::string& inpName) {
return "FLOAT4 " + inpName + "=read_imagef(" + base + ", SAMPLER, " + offset + ")";
return "FLOAT4 " + inpName + "=RI_F(" + base + ", SAMPLER, " + offset + ")";
}
std::string OpenCLTarget::loadscalar(const std::string& base, std::string& inpName) {
return "FLOAT4 " + inpName + "=((float4)read_imagef(" + base + ", SAMPLER, (int2)(0, 0)).x)";
return "FLOAT4 " + inpName + "=(RI_F(" + base + ", SAMPLER, (int2)(0, 0)).x)";
}
std::string OpenCLTarget::store(const std::string base, const std::string& offset, const std::string& data) {
return "write_imagef(" + base + ", " + offset + ", " + data + ");\n";
return "WI_F(" + base + ", " + offset + ", " + data + ");\n";
}

std::string OpenCLTarget::proto(const std::string& name, const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, bool hasSingleConvertRaster) {
Expand Down
2 changes: 1 addition & 1 deletion codegen/opencl/OpenCLTarget.hpp
Expand Up @@ -22,7 +22,7 @@ class OpenCLTarget : public Target {
std::string store(const std::string base, const std::string& offset, const std::string& data) override;
std::string proto(const std::string& name, const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, bool hasSingleConvertRaster = false) override;
template <typename T>
std::string numval(T t) { return "((float4)" + std::to_string(t) + ")"; }
std::string numval(T t) { return "((FLOAT4)" + std::to_string(t) + ")"; }
};

}
154 changes: 53 additions & 101 deletions docs/inference/python.md
Expand Up @@ -29,7 +29,59 @@ MNN在C++的基础上,增加了Python扩展。扩展单元包括两个部分
### MNNTools
MNNTools提供目前主要是2个工具,用法可以参考[mnnconvert](../tools/python.html#mnnconvert)[mnnquant](../tools/python.html#mnnquant)

## 使用Python Session API
## 使用Python Module API
### 数据类型
Python中的`Module API`与C++中的函数名略有区别,用法相似。主要数据类型如下:
- [_Module](../pymnn/_Module.md) 模型实例
- [Var](../pymnn/Var.md) 模型的输入输出
### 推理流程
基本推理流程如下:
- [创建Module](../pymnn/nn.html#load-module-from-file-file-name-input-names-output-names-dynamic-shape-mutable-rearrange-backend-memory-mode-power-mode-precision-mode)
- 创建输入: 使用`expr``numpy`函数创建`Var`即可作为输入
- [执行推理](../pymnn/_Module.html#forward-input)
- 获取输出: 输出为`Var`类型,可以通过`expr``numpy`函数执行后处理
### 示例
```python
import MNN.nn as nn
import MNN.cv as cv
import MNN.numpy as np
import MNN.expr as expr

# 配置执行后端,线程数,精度等信息;key-vlaue请查看API介绍
config = {}
config['precision'] = 'low' # 当硬件支持(armv8.2)时使用fp16推理
config['backend'] = 0 # CPU
config['numThread'] = 4 # 线程数

rt = nn.create_runtime_manager((config,))
# 加载模型创建_Module
net = nn.load_module_from_file('mobilenet_v1.mnn', ['data'], ['prob'], runtime_manager=rt)

# 读取图片
image = cv.imread('cat.jpg')
# 转换为float32, 形状为[224,224,3]
image = cv.resize(image, (224, 224), mean=[103.94, 116.78, 123.68], norm=[0.017, 0.017, 0.017])
# 增加batch HWC to NHWC
input_var = np.expand_dims(image, 0)
# NHWC to NC4HW4
input_var = expr.convert(input_var, expr.NC4HW4)

# 执行推理
output_var = net.forward(input_var)

# NC4HW4 to NHWC
output_var = expr.convert(output_var, expr.NHWC)
# 打印出分类结果, 282为猫
print("output belong to class: {}".format(np.argmax(output_var)))
# output belong to class: 282
```
其他示例可以参考[示例](../pymnn/RuntimeManager.html#example);也可以参考[示例工程](../start/demo.html#id5)


## 使用Python Session API *[deprecated]*

不建议使用该API执行推理,建议使用Module API

### 数据类型
Python中`Session API`的函数名与用法与C++基本一样。使用的主要数据类型如下:
- [Interpreter](../pymnn/Interpreter.md) 解释器,持有模型资源
Expand Down Expand Up @@ -118,107 +170,7 @@ print("output belong to class: {}".format(np.argmax(output_var, 1)))
# output belong to class: array([282, 385], dtype=int32)
```
其他示例可以参考[示例](../pymnn/Interpreter.html#example);也可以参考[示例工程](../start/demo.html#session)
## 使用Python Module API
### 数据类型
Python中的`Module API`与C++中的函数名略有区别,用法相似。主要数据类型如下:
- [_Module](../pymnn/_Module.md) 模型实例
- [Var](../pymnn/Var.md) 模型的输入输出
### 推理流程
基本推理流程如下:
- [创建Module](../pymnn/nn.html#load-module-from-file-file-name-input-names-output-names-dynamic-shape-mutable-rearrange-backend-memory-mode-power-mode-precision-mode)
- 创建输入: 使用`expr``numpy`函数创建`Var`即可作为输入
- [执行推理](../pymnn/_Module.html#forward-input)
- 获取输出: 输出为`Var`类型,可以通过`expr``numpy`函数执行后处理
### 示例
```python
import MNN.nn as nn
import MNN.cv as cv
import MNN.numpy as np
import MNN.expr as expr

# 配置执行后端,线程数,精度等信息;key-vlaue请查看API介绍
config = {}
config['precision'] = 'low' # 当硬件支持(armv8.2)时使用fp16推理
config['backend'] = 0 # CPU
config['numThread'] = 4 # 线程数

rt = nn.create_runtime_manager((config,))
# 加载模型创建_Module
net = nn.load_module_from_file('mobilenet_v1.mnn', ['data'], ['prob'], runtime_manager=rt)

# 读取图片
image = cv.imread('cat.jpg')
# 转换为float32, 形状为[224,224,3]
image = cv.resize(image, (224, 224), mean=[103.94, 116.78, 123.68], norm=[0.017, 0.017, 0.017])
# 增加batch HWC to NHWC
input_var = np.expand_dims(image, 0)
# NHWC to NC4HW4
input_var = expr.convert(input_var, expr.NC4HW4)

# 执行推理
output_var = net.forward(input_var)

# NC4HW4 to NHWC
output_var = expr.convert(output_var, expr.NHWC)
# 打印出分类结果, 282为猫
print("output belong to class: {}".format(np.argmax(output_var)))
# output belong to class: 282
```
其他示例可以参考[示例](../pymnn/RuntimeManager.html#example);也可以参考[示例工程](../start/demo.html#id5)

## 使用Python Expr API
### 数据类型
Python的`Expr API`相比C++在命名和使用方式上略有区别,但是功能一致。主要数据类型如下:
- [Var](../pymnn/Var.md) 表达式计算中的变量
### 主要用法
因为`Expr`不仅有模型推理的能力,还具备数值计算的能力。在实际使用中`Expr`被用作构图或者计算的情况更多,实际用来执行模型推理的情况并不多,当`Expr`用作模型推理时的主要流程如下:
- [加载计算图](../pymnn/expr.html#load-as-dict-filename)
- 获取输入输出:直接使用Python中的`dict`的方式获取,如:`net['input']`
- [写入输入数据](../pymnn/Var.html#write-data)
- [读取输出数据](../pymnn/Var.html#read):读取数据不限于`read`,尝试打印和使用都可能触发读取操作
### 示例
`Expr`用作模型推理:
```python
import MNN.cv as cv
import MNN.numpy as np
import MNN.expr as expr

net = expr.load_as_dict('mobilenet_v1.mnn')
input_var = net['data']
output_var = net['prob']

# 读取图片
image = cv.imread('cat.jpg')
# 转换为float32, 形状为[224,224,3]
image = cv.resize(image, (224, 224), mean=[103.94, 116.78, 123.68], norm=[0.017, 0.017, 0.017])
# 增加batch HWC to NHWC
input_data = np.expand_dims(image, 0)
# NHWC to NC4HW4
input_data = expr.convert(input_data, expr.NC4HW4)

input_var.write(input_data.read_as_tuple())

# 打印出分类结果, 282为猫
print("output belong to class: {}".format(np.argmax(output_var)))
```
`Expr`用于数值计算与数据存取:
```python
import MNN.numpy as np
import MNN.expr as expr

x = expr.range(0., 10., 1.)
y = expr.fill([10], 3.1415)
z = expr.sin(x * y + x / y)
expr.save([z], 'z.mnn')
a = expr.load_as_list('z.mnn')[0]
print(a)
'''
array([ 0. , -0.31288275, 0.59434694, -0.8161286 , 0.955958 ,
-0.9997932 , 0.943233 , -0.79195637, 0.561154 , -0.27400237],
dtype=float32)
'''
```
其他示例可以参考[示例](../pymnn/Var.html#example);也可以参考[示例工程](../start/demo.html#id5)
## 使用cv/numpy API
### 数据类型
Python的`cv``numpy`接口,其中`cv`是对C++中`tools/cv`实现的封装;`numpy`则是对`expr`接口的封装;这两个接口主要为了提高MNN的易用性,与`opencv``numpy`做到了再接口上的部分兼容,在用法和思路上基本一致。主要数据类型如下:
Expand Down

0 comments on commit c442ff3

Please sign in to comment.