In [17]:
from transformers import LlamaForCausalLM, AutoTokenizer
import torch

加载预训练模型

In [18]:
# 定义模型路径
model_dir = "models/story/"

# 加载分词器
tokenizer = AutoTokenizer.from_pretrained(model_dir)

model = LlamaForCausalLM.from_pretrained(model_dir)

### 创建输入数据
使用分词器将文本转化为模型的输入形式。

In [19]:
input_text = "这是一个测试句子."
input_ids = tokenizer(input_text, return_tensors="pt").input_ids
print(input_ids.shape)
print(input_ids)

torch.Size([1, 4])
tensor([[ 1, 80,  0, 10]])


### 启用调试模式并逐层检查
通过torch.no_grad()确保模型在推理模式下运行，并且不会计算梯度。你可以通过注册hook来提取模型各层的输出。

In [20]:
# 存储中间层的输出
outputs = {}

def hook_fn_forward(module, input, output):
    outputs[module] = output

# 注册hook
for name, module in model.model.named_modules():
    module.register_forward_hook(hook_fn_forward)

# 运行模型
with torch.no_grad():
    model(input_ids)


In [21]:
# 打印每一层的输出
for layer, output in outputs.items():
    print(f"Layer: {layer}")
    # print(output.shape)
    print(output)

Layer: Embedding(2048, 128)
tensor([[[-3.1250e-01, -3.1641e-01,  6.8359e-02,  4.5410e-02,  3.2617e-01,
          -4.5898e-01,  2.5146e-02,  4.0625e-01, -2.8516e-01,  9.1016e-01,
          -1.9727e-01,  3.2715e-02, -2.4658e-02,  2.0020e-01,  2.1973e-01,
           4.9609e-01, -1.0010e-01, -1.7188e-01,  1.5430e-01, -4.1797e-01,
           1.9531e-01,  5.2734e-01, -5.9375e-01,  1.7676e-01, -3.1250e-01,
          -4.1748e-02,  5.1758e-02, -5.0000e-01,  3.0884e-02, -2.6953e-01,
          -4.9072e-02,  6.4941e-02, -4.4922e-01,  1.8848e-01,  2.5586e-01,
           1.0078e+00,  5.2734e-02, -3.4570e-01, -1.5625e-01, -3.9062e-01,
          -3.2617e-01, -1.8359e-01, -1.3867e-01,  1.9043e-01,  1.5039e-01,
          -3.5938e-01,  2.7734e-01, -1.4844e-01,  1.3672e-01, -8.1055e-02,
           8.9062e-01, -3.0469e-01,  6.7871e-02, -7.9102e-02, -1.9336e-01,
          -6.0547e-02, -3.6865e-02,  1.7480e-01, -3.4180e-01, -3.0469e-01,
          -4.9219e-01, -2.1289e-01,  1.6309e-01,  4.0527e-02,  2.7148e-0