参考官方文档：
[离线量化](https://paddleslim.readthedocs.io/zh_CN/latest/quick_start/static/quant_post_static_tutorial.html)

# 量化
模型量化包含三种量化方法，分别是动态离线量化方法、静态离线量化方法和量化训练方法。

下图展示了如何选择模型量化方法。
![](https://user-images.githubusercontent.com/52520497/83991261-cbe55800-a97e-11ea-880c-d83fb7924454.png)

In [None]:
# 1. 导入依赖

import paddle
import paddleslim as slim
import numpy as np
paddle.enable_static()

# 2. 构建网络
'''
构造一个用于对MNIST数据进行分类的分类模型，
选用MobileNetV1，并将输入大小设置为[1, 28, 28]，输出类别数为10。
为了方便展示示例，我们在paddleslim.models下预定义了用于构建分类模型的方法，执行以下代码构建分类模型：
'''
USE_GPU = False
model = slim.models.MobileNet()
train_program = paddle.static.Program()
startup = paddle.static.Program()
with paddle.static.program_guard(train_program, startup):
    image = paddle.static.data(
        name='image', shape=[None, 1, 28, 28], dtype='float32')
    label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
    gt = paddle.reshape(label, [-1, 1])
    out = model.net(input=image, class_dim=10)
    cost = paddle.nn.functional.loss.cross_entropy(input=out, label=gt)
    avg_cost = paddle.mean(x=cost)
    acc_top1 = paddle.metric.accuracy(input=out, label=gt, k=1)
    acc_top5 = paddle.metric.accuracy(input=out, label=gt, k=5)
    opt = paddle.optimizer.Momentum(0.01, 0.9)
    opt.minimize(avg_cost)

place = paddle.CUDAPlace(0) if USE_GPU else paddle.CPUPlace()
exe = paddle.static.Executor(place)
exe.run(startup)
val_program = train_program.clone(for_test=True)

# 3. 训练模型
import paddle.vision.transforms as T
transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
train_dataset = paddle.vision.datasets.MNIST(
    mode="train", backend="cv2", transform=transform)
test_dataset = paddle.vision.datasets.MNIST(
    mode="test", backend="cv2", transform=transform)
train_loader = paddle.io.DataLoader(
    train_dataset,
    places=place,
    feed_list=[image, label],
    drop_last=True,
    batch_size=64,
    return_list=False,
    shuffle=True)
test_loader = paddle.io.DataLoader(
    test_dataset,
    places=place,
    feed_list=[image, label],
    drop_last=True,
    batch_size=64,
    return_list=False,
    shuffle=False)

outputs = [acc_top1.name, acc_top5.name, avg_cost.name]
def train(prog):
    iter = 0
    for data in train_loader():
        acc1, acc5, loss = exe.run(prog, feed=data, fetch_list=outputs)
        if iter % 100 == 0:
            print('train iter={}, top1={}, top5={}, loss={}'.format(iter, acc1.mean(), acc5.mean(), loss.mean()))
        iter += 1

def test(prog, outputs=outputs):
    iter = 0
    res = [[], []]
    for data in test_loader():
        acc1, acc5, loss = exe.run(prog, feed=data, fetch_list=outputs)
        if iter % 100 == 0:
            print('test iter={}, top1={}, top5={}, loss={}'.format(iter, acc1.mean(), acc5.mean(), loss.mean()))
        res[0].append(acc1.mean())
        res[1].append(acc5.mean())
        iter += 1
    print('final test result', np.array(res[0]).mean(), np.array(res[1]).mean())

In [None]:
train(train_program)
test(val_program)


In [None]:
# 4. 保存模型
paddle.static.save_inference_model(
        path_prefix='./inference_model/fp32',
        feed_vars=[image, label],
        fetch_vars=[acc_top1, acc_top5, avg_cost],
        executor=exe,
        program=val_program)

In [None]:
# 5. 静态离线量化
slim.quant.quant_post_static(
        executor=exe,
        model_dir='./inference_model',
        quantize_model_path='./quant_post_static_model',
        sample_generator=paddle.dataset.mnist.test(),
        model_filename='fp32.pdmodel',
        params_filename='fp32.pdiparams',
        batch_nums=10)
