In [1]:
# 参考https://medium.com/@edandwe/a-guide-to-craft-your-own-custom-hugging-face-model-ba9cd555a646
# 参考https://huggingface.co/docs/transformers/v4.34.0/custom_models
import torch
import torch.nn as nn
import torchvision

print("Torch version:", torch.__version__)
print("Torchvision version:", torchvision.__version__)
print(torch.version.cuda)

import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('ggplot')

from trl import SFTTrainer
from transformers import TrainingArguments, TextStreamer, LlamaForCausalLM
from datasets import Dataset

from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig, AutoModel

# Warnings
import warnings
warnings.filterwarnings("ignore")

%matplotlib inline

from typing import Type, Optional, Tuple
from custom_llama.modeling_custom_llama import CustomLlamaModel, CustomLlamaForCausalLM, print_model_layers
from custom_llama.configuration_custom_llama import MyLlamaConfig

save_directory="model/custom_llama"
repo_id = "yuntaozh/custom_llama"
base_model="meta-llama/Llama-3.2-1B"
max_seq_length=5020

Torch version: 2.4.0+cu121
Torchvision version: 0.19.0+cu121
12.1


2024-11-29 20:32:02.796687: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-29 20:32:02.796718: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-29 20:32:02.798270: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-29 20:32:02.809036: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# 注册给AutoModel
MyLlamaConfig.register_for_auto_class()
CustomLlamaModel.register_for_auto_class("AutoModel")
CustomLlamaForCausalLM.register_for_auto_class("AutoModelForCausalLM")

In [3]:
llama = AutoModel.from_pretrained(base_model)
llama_config = AutoConfig.from_pretrained(base_model)
myconfig = MyLlamaConfig(**vars(llama_config))
myconfig.auto_map={
    "AutoModel": "modeling_custom_llama.CustomLlamaModel",
    "AutoModelForCausalLM": "modeling_custom_llama.CustomLlamaForCausalLM",
    "AutoConfig": "configuration_custom_llama.MyLlamaConfig"
}
print(myconfig)

MyLlamaConfig {
  "_name_or_path": "meta-llama/Llama-3.2-1B",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "configuration_custom_llama.MyLlamaConfig",
    "AutoModel": "modeling_custom_llama.CustomLlamaModel",
    "AutoModelForCausalLM": "modeling_custom_llama.CustomLlamaForCausalLM"
  },
  "bos_token_id": 128000,
  "eos_token_id": 128001,
  "head_dim": 64,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 131072,
  "mlp_bias": false,
  "model_type": "custom_llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 16,
  "num_key_value_heads": 8,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 32.0,
    "high_freq_factor": 4.0,
    "low_freq_factor": 1.0,
    "original_max_position_embeddings": 8192,
    "rope_type": "llama3"
  },
  "rope_theta": 500000.0,
  "tie_word_embeddin

In [4]:
custom_llama = CustomLlamaModel(myconfig)
print_model_layers(custom_llama)

Replacing:  Linear <class 'custom_llama.modeling_custom_llama.MyLinear'>
Replacing:  Linear <class 'custom_llama.modeling_custom_llama.MyLinear'>
Replacing:  Linear <class 'custom_llama.modeling_custom_llama.MyLinear'>
Replacing:  Linear <class 'custom_llama.modeling_custom_llama.MyLinear'>
Replacing:  Linear <class 'custom_llama.modeling_custom_llama.MyLinear'>
Replacing:  Linear <class 'custom_llama.modeling_custom_llama.MyLinear'>
Replacing:  Linear <class 'custom_llama.modeling_custom_llama.MyLinear'>
Replacing:  Linear <class 'custom_llama.modeling_custom_llama.MyLinear'>
Replacing:  Linear <class 'custom_llama.modeling_custom_llama.MyLinear'>
Replacing:  Linear <class 'custom_llama.modeling_custom_llama.MyLinear'>
Replacing:  Linear <class 'custom_llama.modeling_custom_llama.MyLinear'>
Replacing:  Linear <class 'custom_llama.modeling_custom_llama.MyLinear'>
Replacing:  Linear <class 'custom_llama.modeling_custom_llama.MyLinear'>
Replacing:  Linear <class 'custom_llama.modeling_cu

In [5]:
# state_dict()是一个字典(快照)
custom_state_dict = custom_llama.state_dict()

for name, param in llama.named_parameters():
    source_weight = param.data.clone()
    if "q_proj" in name or "k_proj" in name:
        target_name = name.replace("weight", "linear.weight")
        # print("1",source_weight)
        # print("2",custom_llama.state_dict()[target_name])
        custom_state_dict[target_name]=source_weight
    else:
        custom_state_dict[name]=source_weight

# 需要重新load才能生效
custom_llama.load_state_dict(custom_state_dict)

# # 检查是否成功加载参数
# for name, param in custom_llama.named_parameters():
#     print(name,param)

# for name, param in llama.named_parameters():
#     print(name,param)

<All keys matched successfully>

In [6]:
# 先创建huggingface repo，并与本地目录关联
from huggingface_hub import HfApi, HfFolder, Repository
api = HfApi()
api.create_repo(repo_id=repo_id, exist_ok=True)

repo = Repository(local_dir=save_directory, clone_from=repo_id)

Cloning https://huggingface.co/yuntaozh/custom_llama into local empty directory.


In [7]:
# git lfs追踪大文件
repo.lfs_track(["*.json", "*.safetensors"])

repo.git_add(".gitattributes")
repo.git_commit("Modify git lfs")
repo.git_push()

To https://huggingface.co/yuntaozh/custom_llama
   3453744..11b63b5  main -> main



'https://huggingface.co/yuntaozh/custom_llama/commit/11b63b5d3e3277ecbc57f014efc2fd398e239920'

In [8]:
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
# # 不加这两句话 trainer.train()可能报错
# if tokenizer.pad_token_id is None:
#     tokenizer.pad_token_id = tokenizer.eos_token_id
# if model.config.pad_token_id is None:
#     model.config.pad_token_id = model.config.eos_token_id

In [9]:
custom_llama.save_pretrained(save_directory)
tokenizer.save_pretrained(save_directory)
myconfig.save_pretrained(save_directory)

In [10]:
import shutil
shutil.copy("custom_llama/modeling_custom_llama.py", save_directory)
shutil.copy("custom_llama/configuration_custom_llama.py", save_directory)
shutil.copy("custom_llama/__init__.py", save_directory)

'model/custom_llama/__init__.py'

In [11]:
# # Add all files and push
repo.git_add()
repo.git_commit("Upload custom_llama")
repo.git_push()

Upload file model.safetensors:   0%|          | 1.00/4.60G [00:00<?, ?B/s]

Upload file tokenizer.json:   0%|          | 1.00/16.4M [00:00<?, ?B/s]

Upload file special_tokens_map.json:   0%|          | 1.00/301 [00:00<?, ?B/s]

Upload file tokenizer_config.json:   0%|          | 1.00/49.3k [00:00<?, ?B/s]

Upload file config.json:   0%|          | 1.00/1.08k [00:00<?, ?B/s]

To https://huggingface.co/yuntaozh/custom_llama
   11b63b5..ab192f6  main -> main



'https://huggingface.co/yuntaozh/custom_llama/commit/ab192f6037e0cb576942f0e6ff9ed9071dddb3ef'

In [12]:
# from huggingface_hub import HfApi, HfFolder, Repository

# save_directory="model/ours_pretrained"
# api = HfApi()

# repo = Repository(local_dir=save_directory, clone_from=repo_id)

# # # Add all files and push
# repo.git_add()
# repo.git_commit("Update config")
# repo.git_push()

In [13]:
# from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig, AutoModel
# config = AutoConfig.from_pretrained(
#     "model/ours_pretrained",
#     trust_remote_code=True
# )
# # model = AutoModel.from_config(config, trust_remote_code=True)

# model = AutoModelForCausalLM.from_pretrained(
#     "model/ours_pretrained",
#     trust_remote_code=True
# )

In [14]:
config = AutoConfig.from_pretrained(
    "yuntaozh/custom_llama",
    trust_remote_code=True
)
print(config)

config.json:   0%|          | 0.00/1.11k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/yuntaozh/custom_llama:
- configuration_custom_llama.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


MyLlamaConfig {
  "_name_or_path": "yuntaozh/custom_llama",
  "architectures": [
    "CustomLlamaModel"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "yuntaozh/custom_llama--configuration_custom_llama.MyLlamaConfig",
    "AutoModel": "yuntaozh/custom_llama--modeling_custom_llama.CustomLlamaModel",
    "AutoModelForCausalLM": "yuntaozh/custom_llama--modeling_custom_llama.CustomLlamaForCausalLM"
  },
  "bos_token_id": 128000,
  "eos_token_id": 128001,
  "head_dim": 64,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 131072,
  "mlp_bias": false,
  "model_type": "custom_llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 16,
  "num_key_value_heads": 8,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 32.0,
    "high_freq_factor": 4.0,
    "low_freq_factor": 1.0,
    "original_max_position_embeddings": 8192,
    "rope_

In [18]:
model = AutoModelForCausalLM.from_pretrained(
    "yuntaozh/custom_llama",
    trust_remote_code=True
)

Replacing:  Linear <class 'transformers_modules.yuntaozh.custom_llama.ab192f6037e0cb576942f0e6ff9ed9071dddb3ef.modeling_custom_llama.MyLinear'>
Replacing:  Linear <class 'transformers_modules.yuntaozh.custom_llama.ab192f6037e0cb576942f0e6ff9ed9071dddb3ef.modeling_custom_llama.MyLinear'>
Replacing:  Linear <class 'transformers_modules.yuntaozh.custom_llama.ab192f6037e0cb576942f0e6ff9ed9071dddb3ef.modeling_custom_llama.MyLinear'>
Replacing:  Linear <class 'transformers_modules.yuntaozh.custom_llama.ab192f6037e0cb576942f0e6ff9ed9071dddb3ef.modeling_custom_llama.MyLinear'>
Replacing:  Linear <class 'transformers_modules.yuntaozh.custom_llama.ab192f6037e0cb576942f0e6ff9ed9071dddb3ef.modeling_custom_llama.MyLinear'>
Replacing:  Linear <class 'transformers_modules.yuntaozh.custom_llama.ab192f6037e0cb576942f0e6ff9ed9071dddb3ef.modeling_custom_llama.MyLinear'>
Replacing:  Linear <class 'transformers_modules.yuntaozh.custom_llama.ab192f6037e0cb576942f0e6ff9ed9071dddb3ef.modeling_custom_llama.MyL

In [19]:
print_model_layers(model)

model: CustomLlamaModel
model.embed_tokens: Embedding
model.layers: ModuleList
model.layers.0: LlamaDecoderLayer
model.layers.0.self_attn: LlamaSdpaAttention
model.layers.0.self_attn.q_proj: MyLinear
model.layers.0.self_attn.q_proj.linear: Linear
model.layers.0.self_attn.q_proj.rms_norm: RMSNorm
model.layers.0.self_attn.k_proj: MyLinear
model.layers.0.self_attn.k_proj.linear: Linear
model.layers.0.self_attn.k_proj.rms_norm: RMSNorm
model.layers.0.self_attn.v_proj: Linear
model.layers.0.self_attn.o_proj: Linear
model.layers.0.self_attn.rotary_emb: LlamaRotaryEmbedding
model.layers.0.mlp: LlamaMLP
model.layers.0.mlp.gate_proj: Linear
model.layers.0.mlp.up_proj: Linear
model.layers.0.mlp.down_proj: Linear
model.layers.0.mlp.act_fn: SiLU
model.layers.0.input_layernorm: LlamaRMSNorm
model.layers.0.post_attention_layernorm: LlamaRMSNorm
model.layers.1: LlamaDecoderLayer
model.layers.1.self_attn: LlamaSdpaAttention
model.layers.1.self_attn.q_proj: MyLinear
model.layers.1.self_attn.q_proj.line

In [20]:
print(model.__class__.__name__)

CustomLlamaForCausalLM
