In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import matplotlib.pyplot as plt
import os
import numpy as np

model_name = "/home/xwj/Model/llama-3-8b"
model = AutoModelForCausalLM.from_pretrained(model_name)
model.eval()

output_dir = "parameter_distribution/llama3-8b"
os.makedirs(output_dir, exist_ok=True)

# 1️⃣ 先遍历获取整体最大最小
all_vals = []

with torch.no_grad():
    for name, param in model.named_parameters():
        # if ("self_attn" in name or "mlp" in name) and "weight" in name:
        if ("self_attn" in name) and "weight" in name:
            data = param.detach().cpu().numpy().flatten()
            all_vals.append(data)

all_vals = np.concatenate(all_vals)
min_val = all_vals.min()
max_val = all_vals.max()

print(f"整体最小:{min_val},整体最大:{max_val}")

# 2️⃣ 再渲染时统一坐标轴刻度
with torch.no_grad():
    for name, param in model.named_parameters():
        if ("self_attn" in name or "mlp" in name) and "weight" in name:
            data = param.detach().cpu().numpy().flatten()
            mean = data.mean()
            std = data.std()
            print(f'{name}, mean: {mean}, std: {std}')

            plt.figure()
            plt.hist(data, bins=100, density=True)
            plt.title(f'{name}\nmean: {mean:.4f}, std: {std:.4f}')
            plt.xlabel("Weight values")
            plt.ylabel("Frequency")
            plt.xlim(min_val, max_val)  # 统一坐标轴刻度

            fname = os.path.join(output_dir, f'{name}.png')
            plt.savefig(fname)
            plt.close()


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

整体最小:-0.7734375,整体最大:0.83203125
model.layers.0.self_attn.q_proj.weight, mean: 2.4811811272229534e-08, std: 0.017749862745404243
model.layers.0.self_attn.k_proj.weight, mean: -7.251437637023628e-06, std: 0.025779811665415764
model.layers.0.self_attn.v_proj.weight, mean: -2.1604150788334664e-06, std: 0.006510408595204353
model.layers.0.self_attn.o_proj.weight, mean: 4.487260696350859e-07, std: 0.007428198587149382
model.layers.0.mlp.gate_proj.weight, mean: 7.041986918920884e-06, std: 0.011575045995414257
model.layers.0.mlp.up_proj.weight, mean: 5.368186783982765e-09, std: 0.010499231517314911
model.layers.0.mlp.down_proj.weight, mean: -4.739023609090509e-07, std: 0.010473846457898617
model.layers.1.self_attn.q_proj.weight, mean: -3.3340138543280773e-06, std: 0.018395552411675453
model.layers.1.self_attn.k_proj.weight, mean: 3.0159699235809967e-06, std: 0.026296289637684822
model.layers.1.self_attn.v_proj.weight, mean: 1.4056530517336796e-06, std: 0.007758066523820162
model.layers.1.self_