In [3]:
import torch
from torch import nn
from torch.ao.quantization import QConfigMapping, default_dynamic_qconfig
from torch.ao.quantization.quantize_fx import prepare_fx, convert_fx

# 定义一个最简单的模型
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.linear = nn.Linear(10, 5)

    def forward(self, x):
        return self.linear(x)

# 创建模型实例
float_model = SimpleModel()

# 设置模型为评估模式
float_model.eval()

# 生成随机输入数据
example_inputs = (torch.randn(1, 10),)

# 设置量化配置
qconfig = default_dynamic_qconfig  # 使用动态量化配置
qconfig_mapping = QConfigMapping().set_global(qconfig)

# 准备模型进行量化（插入观察者）
prepared_model = prepare_fx(float_model, qconfig_mapping, example_inputs)

# 动态量化不需要校准步骤，直接转换模型
quantized_model = convert_fx(prepared_model)

# 验证量化模型
input_data = torch.randn(1, 10)
output = quantized_model(input_data)
print("Quantized model output:", output)

# 对比原始模型的输出
float_output = float_model(input_data)
print("Original model output:", float_output)

Quantized model output: tensor([[ 1.0689,  0.2674,  0.6647,  0.5039, -0.1480]])
Original model output: tensor([[ 1.0724,  0.2702,  0.6685,  0.5064, -0.1490]],
       grad_fn=<AddmmBackward0>)


In [8]:
quantized_model.linear

DynamicQuantizedLinear(in_features=10, out_features=5, dtype=torch.qint8, qscheme=torch.per_tensor_affine)