# Step1:导入依赖

In [1]:
from datasets import load_from_disk
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    DataCollatorForSeq2Seq,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import wandb 
wandb.login(key="1b8adc705fb9b3e125c05f15107ad7c22c830811")
wandb.init(project="DeepSeek-R1-Distill-Llama-8B-Qlora")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\21205\_netrc
[34m[1mwandb[0m: Currently logged in as: [33m3407941284[0m ([33m3407941284-hdu[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


# step2:模型加载

In [3]:
# 定义模型路径
model_path = "D:/study/LLM/DeepSeek-R1-Distill-Llama-8B-Qlora/models/DeepSeek-R1-Distill-Llama-8B"

# 加载分词器
tokenizer = AutoTokenizer.from_pretrained(
    model_path,
    max_seq_length=2048,
    padding_side="right",
    use_fast=True
)

# 确保设置 pad_token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# # 加载模型
# model = AutoModelForCausalLM.from_pretrained(
# model_path,
#      device_map="auto",
#     use_cache=False  # 梯度检查点需要
# )
model = AutoModelForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, 
                                             torch_dtype=torch.bfloat16, device_map="auto", load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16,
                                             bnb_4bit_quant_type="nf4", bnb_4bit_use_double_quant=True)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00,  3.70s/it]


In [4]:
print(tokenizer)

LlamaTokenizerFast(name_or_path='D:/study/LLM/DeepSeek-R1-Distill-Llama-8B-Qlora/models/DeepSeek-R1-Distill-Llama-8B', vocab_size=128000, model_max_length=16384, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<｜begin▁of▁sentence｜>', 'eos_token': '<｜end▁of▁sentence｜>', 'pad_token': '<｜end▁of▁sentence｜>'}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	128000: AddedToken("<｜begin▁of▁sentence｜>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	128001: AddedToken("<｜end▁of▁sentence｜>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	128002: AddedToken("<|reserved_special_token_0|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	128003: AddedToken("<|reserved_special_token_1|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	128004: AddedToken("<|finetune_right_pad_id|>", rstrip=False, lstrip=False, si

In [5]:
for name, param in model.named_parameters():
    print(name, param.shape, param.dtype)

model.embed_tokens.weight torch.Size([128256, 4096]) torch.bfloat16
model.layers.0.self_attn.q_proj.weight torch.Size([8388608, 1]) torch.uint8
model.layers.0.self_attn.k_proj.weight torch.Size([2097152, 1]) torch.uint8
model.layers.0.self_attn.v_proj.weight torch.Size([2097152, 1]) torch.uint8
model.layers.0.self_attn.o_proj.weight torch.Size([8388608, 1]) torch.uint8
model.layers.0.mlp.gate_proj.weight torch.Size([29360128, 1]) torch.uint8
model.layers.0.mlp.up_proj.weight torch.Size([29360128, 1]) torch.uint8
model.layers.0.mlp.down_proj.weight torch.Size([29360128, 1]) torch.uint8
model.layers.0.input_layernorm.weight torch.Size([4096]) torch.bfloat16
model.layers.0.post_attention_layernorm.weight torch.Size([4096]) torch.bfloat16
model.layers.1.self_attn.q_proj.weight torch.Size([8388608, 1]) torch.uint8
model.layers.1.self_attn.k_proj.weight torch.Size([2097152, 1]) torch.uint8
model.layers.1.self_attn.v_proj.weight torch.Size([2097152, 1]) torch.uint8
model.layers.1.self_attn.o_

In [6]:
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. Please answer the following medical question.

### Question:
{}

### Response:
<think>
{}
</think>
{}
"""

In [7]:
question = "一个患有急性阑尾炎的病人已经发病5天，腹痛稍有减轻但仍然发热，在体检时发现右下腹有压痛的包块，此时应如何处理？"
inputs = tokenizer([prompt_style.format(question, "","")], return_tensors="pt").to("cuda")
outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)

response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



<think>

</think>

### 思考过程：

1. **病情分析**：
   - 患者已发病5天，腹痛稍有减轻，但仍有发热。
   - 体检发现右下腹有压痛的包块。

2. **初步判断**：
   - 包块的存在提示可能是胰腺炎或其他胰腺相关疾病。
   - 压痛性包块可能表示胰腺组织的炎症。

3. **进一步检查**：
   - **影像学检查**：如超声或CT扫描，确认包块的性质和位置。
   - **血液检查**：评估炎症标志物（如CRP、白细胞计数）和肝功能。

4. **治疗方案**：
   - **抗生素治疗**：根据敏感结果选择药物，通常为第三代 cephalosporin 或 iminogamabin。
   - **胰腺支持治疗**：包括营养支持、疼痛管理和休息。
   - **外科引导下穿刺**：排除感染性腹膜炎或其他严重感染。

5. **监测与随访**：
   - 定期复查肝功能和炎症标志物。
   - 观察病情进展，必要时调整治疗方案。

### 最终答案：

在发现右下腹压痛包块的情况下，应立即进行影像学检查（如超声或CT）以确认包块的性质和位置。同时，进行血液检查以评估炎症标志物和肝功能。根据抗生素敏感结果选择合适的抗生素治疗，并考虑胰腺支持治疗，如营养支持和疼痛管理。若有必要，可进行外科引导下穿刺以排除感染性腹膜炎或其他严重感染。定期复查肝功能和炎症标志物，监测病情进展并调整治疗方案。<｜end▁of▁sentence｜>


# step3:数据集加载

In [8]:
#数据集加载
from datasets import load_from_disk
dataset = load_from_disk("D:/study/LLM/DeepSeek-R1-Distill-Llama-8B-Qlora/data/medical-o1-reasoning-SFT-zh")
print(dataset.column_names)

['Question', 'Complex_CoT', 'Response']


# step4:数据集预处理

In [9]:
#数据预处理
train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. Please answer the following medical question.

### Question:
{}

### Response:
<think>
{}
</think>
{}
"""

In [10]:
EOS_TOKEN = tokenizer.eos_token  # Must add EOS_TOKEN

def formatting_prompts_func(examples):
    inputs = examples["Question"]
    cots = examples["Complex_CoT"]
    outputs = examples["Response"]
    texts = []
    for input, cot, output in zip(inputs, cots, outputs):
        text = train_prompt_style.format(input, cot, output) + EOS_TOKEN
        texts.append(text)
    return {
        "text": texts,
    }

In [11]:
def formatting_prompts_func(examples):
    """
    1. 读取 `Question`, `Complex_CoT`, `Response`
    2. 按 `train_prompt_style` 拼接成完整文本
    3. 进行 Tokenization
    4. 返回 `input_ids`, `attention_mask`, `labels`
    """
    inputs = examples["Question"]
    cots = examples["Complex_CoT"]
    outputs = examples["Response"]

    texts = []
    for input, cot, output in zip(inputs, cots, outputs):
        text = train_prompt_style.format(input, cot, output) + EOS_TOKEN
        texts.append(text)

    # Tokenize 处理
    model_inputs = tokenizer(
        texts,
        padding="max_length",  # 设定填充方式
        max_length=2048,  # 设定最大长度
        truncation=True,  # 超长截断
        return_tensors="pt"  # 返回 PyTorch 格式
    )

    # `labels` = `input_ids`（训练时 Shift Right）
    model_inputs["labels"] = model_inputs["input_ids"].clone()  # 修改为 clone()

    return model_inputs




In [12]:
# 处理数据集
dataset = dataset.map(formatting_prompts_func, batched=True, remove_columns=["Question", "Complex_CoT", "Response"])

# 查看处理后的数据列
print("处理后数据列:", dataset.column_names)

# 检查第一条样本
print("样本数据:", dataset[0])

处理后数据列: ['input_ids', 'attention_mask', 'labels']
样本数据: {'input_ids': [128000, 39314, 374, 459, 7754, 430, 16964, 264, 3465, 11, 35526, 449, 459, 1988, 430, 5825, 4726, 2317, 13, 9842, 264, 2077, 430, 36001, 45695, 279, 1715, 627, 10438, 36864, 11, 1781, 15884, 922, 279, 3488, 323, 1893, 264, 3094, 14656, 30308, 8957, 315, 11555, 311, 6106, 264, 20406, 323, 13687, 2077, 382, 14711, 30151, 512, 2675, 527, 264, 6593, 6335, 449, 11084, 6677, 304, 14830, 33811, 11, 50518, 11, 323, 6514, 9293, 13, 5321, 4320, 279, 2768, 6593, 3488, 382, 14711, 16225, 512, 110747, 54581, 3922, 48044, 16, 93115, 9554, 109780, 19000, 105140, 105343, 65455, 105871, 111935, 43240, 45390, 31809, 37985, 56602, 3922, 46961, 23538, 16937, 31374, 230, 40862, 3922, 103786, 105456, 101734, 106, 27384, 30624, 108686, 3922, 36117, 225, 105150, 89753, 101171, 241, 3922, 40526, 16937, 51109, 8067, 249, 3922, 65455, 105871, 17297, 19361, 35894, 113312, 3922, 111694, 45390, 105871, 57942, 97, 50285, 112950, 1811, 106880, 103

In [13]:
dataset

Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 24772
})

# step5:lora配置

In [14]:
from peft import LoraConfig, TaskType, get_peft_model

config = LoraConfig(task_type=TaskType.CAUSAL_LM,)
config

LoraConfig(task_type=<TaskType.CAUSAL_LM: 'CAUSAL_LM'>, peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, inference_mode=False, r=8, target_modules=None, exclude_modules=None, lora_alpha=8, lora_dropout=0.0, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', trainable_token_indices=None, loftq_config={}, eva_config=None, corda_config=None, use_dora=False, layer_replication=None, runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=False), lora_bias=False)

In [15]:
model = get_peft_model(model, config)

In [16]:
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): Linear4bit(in_features=40

In [17]:
for name, param in model.named_parameters():
    print(name, param.shape, param.dtype)

base_model.model.model.embed_tokens.weight torch.Size([128256, 4096]) torch.bfloat16
base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight torch.Size([8388608, 1]) torch.uint8
base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight torch.Size([8, 4096]) torch.float32
base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight torch.Size([4096, 8]) torch.float32
base_model.model.model.layers.0.self_attn.k_proj.weight torch.Size([2097152, 1]) torch.uint8
base_model.model.model.layers.0.self_attn.v_proj.base_layer.weight torch.Size([2097152, 1]) torch.uint8
base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight torch.Size([8, 4096]) torch.float32
base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight torch.Size([1024, 8]) torch.float32
base_model.model.model.layers.0.self_attn.o_proj.weight torch.Size([8388608, 1]) torch.uint8
base_model.model.model.layers.0.mlp.gate_proj.weight torch.Size([29360128, 1]) torch.uint8
ba

In [18]:
model.enable_input_require_grads() # 开启梯度检查点时，要执行该方法

In [19]:
model.print_trainable_parameters()

trainable params: 3,407,872 || all params: 8,033,669,120 || trainable%: 0.0424


# step6:配置训练参数

In [20]:
args = TrainingArguments(
    output_dir="../output",
    per_device_train_batch_size=3,
    gradient_accumulation_steps=1,
    logging_steps=10,
    num_train_epochs=3,
    learning_rate=2e-4,
    gradient_checkpointing=True,
    fp16=True,                              # 混合精度训练
    optim="paged_adamw_32bit",              # 优化内存使用
    report_to="wandb",
)

# step7:创建训练器

In [21]:
trainer = Trainer(
    model=model,
    args=args,
    tokenizer=tokenizer,
    train_dataset=dataset,
   data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True, max_length=2048)
)

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [22]:
# print(model.forward)

# step8:模型训练

In [None]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
10,16.6632
20,3.1433
30,0.7515
40,0.7296
50,0.6991
60,0.6266


# step9:微调模型验证推理

In [None]:
# question = "一个患有急性阑尾炎的病人已经发病5天，腹痛稍有减轻但仍然发热，在体检时发现右下腹有压痛的包块，此时应如何处理？"

# model.eval()
# inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

# outputs = model.generate(
#     input_ids=inputs.input_ids,
#     attention_mask=inputs.attention_mask,
#     max_new_tokens=1200,
#     use_cache=True,
# )

# response = tokenizer.batch_decode(outputs)
# print(response[0].split("### Response:")[1])

# step10:模型保存

In [None]:
# model.save_pretrained("../output/medical_lora_adapter")