# Step1:导入依赖

In [5]:
from datasets import load_from_disk
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    DataCollatorForSeq2Seq,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model
import torch

In [6]:
# import wandb 
# wandb.login(key="1b8adc705fb9b3e125c05f15107ad7c22c830811")
# wandb.init(project="unslothDeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit-cMedQA2-Qlora")

# step2:模型加载

In [None]:
# 定义模型路径
model_path = "D:/Study/LLM/DeepSeek-R1-Distill-Qwen-1.5B-qlora/model/unsloth-DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit"

# 加载分词器
tokenizer = AutoTokenizer.from_pretrained(
    model_path,
    max_seq_length=2048,
    padding_side="right",
    use_fast=True
)

# 确保设置 pad_token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# 加载模型
model = AutoModelForCausalLM.from_pretrained(
    "D:/Study/LLM/DeepSeek-R1-Distill-Qwen-1.5B-qlora/model/unsloth-DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit",
     device_map="auto",
    use_cache=False  # 梯度检查点需要
)
# model = AutoModelForCausalLM.from_pretrained("D:/Pretrained_models/modelscope/Llama-2-13b-ms", low_cpu_mem_usage=True, 
#                                              torch_dtype=torch.bfloat16, device_map="auto", load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16,
#                                              bnb_4bit_quant_type="nf4", bnb_4bit_use_double_quant=True)

ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 

In [None]:
for name, param in model.named_parameters():
    print(name, param.shape, param.dtype)

In [None]:
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. Please answer the following medical question.

### Question:
{}

### Response:
<think>
{}
</think>
{}
"""

In [None]:
question = "一个患有急性阑尾炎的病人已经发病5天，腹痛稍有减轻但仍然发热，在体检时发现右下腹有压痛的包块，此时应如何处理？"

model.eval()
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)

response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])

# step3:数据集加载

In [None]:
#数据集加载
from datasets import load_from_disk
dataset = load_from_disk("D:/Study/LLM/DeepSeek-R1-Distill-Qwen-1.5B-qlora/data/medical-o1-reasoning-SFT-zh")
print(dataset.column_names)

['Question', 'Complex_CoT', 'Response']


# step4:数据集预处理

In [None]:
#数据预处理
train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. Please answer the following medical question.

### Question:
{}

### Response:
<think>
{}
</think>
{}
"""

In [None]:
EOS_TOKEN = tokenizer.eos_token  # Must add EOS_TOKEN

def formatting_prompts_func(examples):
    inputs = examples["Question"]
    cots = examples["Complex_CoT"]
    outputs = examples["Response"]
    texts = []
    for input, cot, output in zip(inputs, cots, outputs):
        text = train_prompt_style.format(input, cot, output) + EOS_TOKEN
        texts.append(text)
    return {
        "text": texts,
    }

NameError: name 'tokenizer' is not defined

In [None]:
dataset = dataset.map(formatting_prompts_func, batched=True)
dataset["text"][0]

# step5:lora配置

In [None]:
from peft import LoraConfig, TaskType, get_peft_model

config = LoraConfig(task_type=TaskType.CAUSAL_LM,)
config

In [None]:
model = get_peft_model(model, config)

In [None]:
model.enable_input_require_grads() # 开启梯度检查点时，要执行该方法

In [None]:
model.print_trainable_parameters()

# step6:配置训练参数

In [None]:
args = TrainingArguments(
    output_dir="../output",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    logging_steps=10,
    num_train_epochs=3,
    learning_rate=2e-4,
    gradient_checkpointing=True,
    fp16=True,                              # 混合精度训练
    optim="paged_adamw_32bit",              # 优化内存使用
    report_to="wandb",
)

# step7:创建训练器

In [None]:
trainer = Trainer(
    model=model,
    args=args,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=2048,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
)

# step8:模型训练

In [None]:
trainer.train()

# step9:微调模型验证推理

In [None]:
question = "一个患有急性阑尾炎的病人已经发病5天，腹痛稍有减轻但仍然发热，在体检时发现右下腹有压痛的包块，此时应如何处理？"

model.eval()
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)

response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])

# step10:模型保存

In [None]:
model.save_pretrained("../output/medical_lora_adapter")