In [None]:
from huggingface_hub import login
import wandb
login(token = "YOUR_KEY")
wandb.login(key = "YOUR_KEY")
run = wandb.init(
    project='Fine-tune gemma-3n-e4b',
    job_type="training",
    anonymous="allow"
)

In [None]:
csv_file_name = "Datasets-consult-summary.csv"
base_model_name = "unsloth/gemma-3n-E4B-it"
new_model_name = "gemma-3n-privnurse-consult-summary-v1"

In [None]:
from unsloth import FastModel
import torch

model, tokenizer = FastModel.from_pretrained(
    model_name = base_model_name,
    dtype = None, # None for auto detection
    max_seq_length = 8192, # Choose any for long context!
    load_in_4bit = True,  # 4 bit quantization to reduce memory
    full_finetuning = False, # [NEW!] We have full finetuning now!
)

In [None]:
###########################
# Let's finetune Gemma 3N #
###########################

In [None]:
model = FastModel.get_peft_model(
    model,
    finetune_vision_layers     = False, # Turn off for just text!
    finetune_language_layers   = True,  # Should leave on!
    finetune_attention_modules = True,  # Attention good for GRPO
    finetune_mlp_modules       = True,  # SHould leave on always!

    r = 32,           # Larger = higher accuracy, but might overfit
    lora_alpha = 64,  # Recommended alpha == r at least
    lora_dropout = 0,
    bias = "none",
    random_state = 3407,
)

In [None]:
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "gemma-3",
)

In [None]:
from datasets import load_dataset
dataset = load_dataset('csv', data_files=csv_file_name, split="all")
dataset = dataset.shuffle(seed=56)
dataset[100]

In [None]:
# System message for the assistant 
system_prompt = """You are a specialized AI assistant that helps hospital staff create precise consultation summaries by converting physician consultation forms into concise summaries.

## Core Task
Convert physician consultation requests into accurate, concise summaries in Traditional Chinese while following specific medical terminology rules.

## Language Rules
**Primary Language**: Traditional Chinese (繁體中文) output
**Medical Term Translation**: Translate common medical terms to Chinese equivalents (e.g., Hypertension → 高血壓)

**5 Critical Exceptions - Keep in Original English**:
1. **Drug Names**: Never translate medication names (e.g., Amoxicillin, Augmentin, Clindamycin)
2. **Dosage Units & Frequency**: Preserve exactly (e.g., vials, tab, Q12H, Q8H, BID)
3. **Microbiology Names**: Keep scientific names intact (e.g., Escherichia coli, Enterococcus faecalis)
4. **Drug Resistance**: Keep drug names in resistance statements (e.g., 對Clindamycin具有抗藥性)
5. **Uncertain Terms**: When unsure of translation, keep English or add both (e.g., 深部靜脈血栓（DVT）)

## Processing Workflow
1. **Identify**: Consultation department
2. **Extract**: Primary consultation reason from "申請會診內容" section only
3. **Focus**: Only include reasons directly related to the consulting department's expertise
4. **Exclude**: Past medical history and admission reasons unless directly relevant
5. **Respect**: Use only information provided in the consultation form, no speculation

## Output Format
Use this exact structure:

<thinking>
[Your analysis process following the rules above]
</thinking>

<answer>
病患因[會診原因]，申請[某某科]會診。[某某科][醫師]回覆[回覆內容]。

Special cases:
- If no response from physician yet: end with "等待回覆"
- Apply language rules to all medical terms
- Preserve all drug names, dosages, and frequencies in original English
</answer>

## Key Principles
- Extract consultation reasons ONLY from "申請會診內容" section
- Do NOT pre-consider information from "回覆會診內容" when determining consultation reason
- Never speculate about diagnoses - only use information explicitly provided
- Maintain medical accuracy while ensuring linguistic consistency
"""

In [None]:
def formatting_prompts_func(examples):
    # 構建 conversations 列表，每個元素包含 system, user, assistant 的訊息
    convos = []
    for i in range(len(examples["original"])):
        convo = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": examples["original"][i]},
            {"role": "assistant", "content": examples["CoT"][i]}
        ]
        convos.append(convo)
    
    # 應用 chat template 並移除 <bos> 前綴
    texts = [tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False).removeprefix('<bos>') for convo in convos]
    
    return {"text": texts}

# 使用批次處理
dataset = dataset.map(formatting_prompts_func, batched=True)

In [None]:
dataset[100]["text"]

In [None]:
###################
# Train the model #
###################

In [None]:
from trl import SFTTrainer, SFTConfig
training_arguments = SFTConfig(
    dataset_text_field="text",
    output_dir=new_model_name,
    per_device_train_batch_size=6,     # 預設2
    gradient_accumulation_steps=8,     # 預設8
    optim="adamw_torch_fused",         # Options: adamw_hf, adamw_torch, adamw_torch_fused, adamw_8bit
    num_train_epochs=6,                
    ###############
    # per_device_eval_batch_size=2,    # 預設2
    # evaluation_strategy="steps",
    eval_strategy='no',
    # eval_steps=10,
    ###############
    lr_scheduler_type = "linear",      
    max_grad_norm=0.3,                 # Default: 0.3
    warmup_ratio=0.1,                 # Default: 0.03
    # warmup_steps=30,                 # Default: 30
    learning_rate=1e-3,                # Default: 2e-4
    # weight_decay=0.01,               # [new]Default: 0
    # adam_beta1=0.9,                  # [new]Default: 0.9
    # adam_beta2=0.95,                 # [new]Default: 0.999
    # seed=1205,
    logging_strategy="steps",
    logging_steps=10,                # When batch size is big, logging steps should be reduced to 2-5 not 10
    save_strategy="epoch",
    fp16=False,
    bf16=True,
    # group_by_length=True,
    report_to="wandb",
    max_seq_length=8192,
    seed = 3407,
    # packing= False,
)

In [None]:
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    eval_dataset = None, # Can set up evaluation!
    args = training_arguments,
)

In [None]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<start_of_turn>user\n",
    response_part = "<start_of_turn>model\n",
)

In [None]:
tokenizer.decode(trainer.train_dataset[100]["input_ids"])

In [None]:
tokenizer.decode([tokenizer.pad_token_id if x == -100 else x for x in trainer.train_dataset[100]["labels"]]).replace(tokenizer.pad_token, " ")

In [None]:
# [Bug]: InductorError: RuntimeError: Failed to run autotuning code block: PY_SSIZE_T_CLEAN macro must be defined for '#' formats
# Set TORCHDYNAMO_VERBOSE=1 for the internal stack trace (please do this especially if you're reporting a bug to PyTorch). For even more developer context, set TORCH_LOGS="+dynamo"
# [Solution]: Disable TorchInductor Compilation. Add this before your inference code:
import torch
torch._dynamo.config.disable = True
# Or alternatively, disable specific optimizations
# torch._dynamo.config.suppress_errors = True
# >> The error is related to PyTorch's aggressive optimization trying to compile the vision components of Gemma 3, which can be disabled without affecting functionality.)

In [None]:
# @title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

In [None]:
trainer_stats = trainer.train()

In [None]:
# @title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(
    f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training."
)
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

In [None]:
wandb.finish()

In [None]:
#############
# Inference #
#############

In [None]:
# [Bug]: InductorError: RuntimeError: Failed to run autotuning code block: PY_SSIZE_T_CLEAN macro must be defined for '#' formats
# Set TORCHDYNAMO_VERBOSE=1 for the internal stack trace (please do this especially if you're reporting a bug to PyTorch). For even more developer context, set TORCH_LOGS="+dynamo"
# [Solution]: Disable TorchInductor Compilation. Add this before your inference code:
import torch
torch._dynamo.config.disable = True
# Or alternatively, disable specific optimizations
# torch._dynamo.config.suppress_errors = True
# >> The error is related to PyTorch's aggressive optimization trying to compile the vision components of Gemma 3, which can be disabled without affecting functionality.)

In [None]:
from unsloth.chat_templates import get_chat_template
from transformers import TextStreamer

def process_consultation(model, tokenizer, consultation_content, max_tokens=2048):
    tokenizer = get_chat_template(
        tokenizer,
        chat_template = "gemma-3",
    )
    messages = [{"role": "system",  "content": [{"type": "text", "text": system_prompt}]},
                {"role": "user", "content": [{"type": "text", "text": consultation_content}]}]
    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt = True, # Must add for generation
        return_tensors = "pt",
        tokenize = True,
        return_dict = True,
    ).to("cuda")

    _ = model.generate(
        **inputs,
        max_new_tokens = max_tokens,
        ##############################################################################
        # temperature = 1.0, top_p = 0.95, top_k = 64, # Recommended Gemma-3 settings!
        ##############################################################################
        temperature = 0.1,
        top_p = 0.95,
        top_k = 64,
        ##############################################################################
        streamer = TextStreamer(tokenizer, skip_prompt = True),
    )

In [None]:
### Sample 0.1 ###
consultation_content = """
申請會診內容：

Dear doctor:
    This 71-year-old woman is a case of
- Chronic respiratory failure s/p tracheostomy under the ventilator dependent and difficult weaning 
- Chronic kidney disease, stage 4 post Hickman catheter implantation on 113.03.29 
  => Regular hemodialysis via Hickman's catheter on QW1.3.5   
- Multiple cabuncle on the whole body 
- Type 2 DM.
- Pressure injury on the Sacrum 
- Adrenal insufficiency
- HBV carrier 


   She had left proximal radius osteolytic lesion. Bone tumor or osteomyelitis are more likely than old fracture, CPR: 22.39 mg/dL today 
   We want to use Vancomycin injection for infection control and need your agree, thank you very much !! 

被會診諮詢的科別：感染科

回覆醫師：楊XX

感染科回覆會診內容：
Agree with Vancomycin 1 gm iv q5d. (8523)
"""
process_consultation(model, tokenizer, consultation_content)

In [None]:
### Sample 0.2 ###
consultation_content = """申請會診內容：
DEar Dr.
     This 61 year-old women, admission was due to:

. shortness of breath bilateral leg edema +++, 
. Lab : elevated NT-proBNP level
   => Suspected Heart failure
    - Myocardial perfusion scan:
    1. Severe myocardial ischemia in the lateral and inferior walls
      of left ventricle.
    2. Moderate myocardial ischemia in the anterior and apical walls
       of left ventricle.
. Type 2 diabetes mellitus with hyperglycemia
. Hypertension
. Hyperlipidemia


生化免疫 Serum
日     期               TSH          T3 
= = = = =  ================ =========== 
113/10/15   116.493 uIU/mL  0.17 ng/mL 


Due to impaired thyroid level, so we need your consult for further evaluation and suggestion. Thanks a lot.

被會診諮詢的科別：新陳代謝科

回覆醫師：陳XX

新陳代謝科回覆會診內容：
Dear Dr.:
   we are consulted for abnormal thyroid function test results in 61 y/o woman with heart failure.

BP 87/55mmHg, HR 73, BT 36.4''C,
Lab data: TSH 116 (>10), T3 0.17 (<0.7)

medication: no use of amiodarone, steroid or anti-thyroid agents

Impression:
hypothyroidism

Suggestion:
1. check ACTH/cortisol level to rule out glucocorticoid adrenal insufficiency
2. add eltroxin 0.05mg 2 PO QDAC if cortisol not < 10mg/dL, and recheck TSH / free T4 after one week
3. check anti-TPO, anti-Tg, thyroid echo
4. arrange follow up

Thanks for consultation
"""
process_consultation(model, tokenizer, consultation_content)

In [None]:
### Sample 1.1 ###
consultation_content = """申請會診內容：
Dear Dr:
      The 69 y/o male patient sufferred deom:
1. BLUNT CHEST TRAUMA WITH Flail chest, ribs fracture over left 3-7th 
      ribs, with hemopneumothorax 
    =>POST CHEST TUBE INSERTION 10/23 
    =>POST Ribs ORIF over left 3-7th ribs and VATS exploration, left 
            side
2.a big (4.8 cm) right thyroid nodule with suspicious malignancy
3. DM POOR CONTROL ! HBA1C:7.0 
4. HCVD
5. tachycardia (af with RVR:131-156=>under amiodarone iv pump keep )
    =>現有CORDARONE ★150MG/3ML 6AMP in D5W 500ml pump run 17ml/hr(for 
      18hr)使用
6. fever =>tapimycin use 
7.**.CXR為左下肺肋膜積液存:chest tube =>pull out 6cm
      =>add emerson suction:-20cmH2O

Rt thyroid nodule,fever,tachycardia ,and HBALc:7.0 were also noted .Please evaluate the patient for the further evaluation and management.Thanks!

被會診諮詢的科別：新陳代謝科

回覆醫師：蘇XX

新陳代謝科回覆會診內容：
無
"""
process_consultation(model, tokenizer, consultation_content)

In [None]:
### Sample 1.2 ###
consultation_content = """申請會診內容：
Dear DR 
This 53 years old man is a case 
Acute febrile disease, suspected pneumonia and suspected wound infection 
Bilateral legs weakness, cause to be determined, need to exclude neurology problem or deep vein thrombosis 
   -- arrange upper/lower limbs NCV and LVD  
Suspected heart failure

Due to  Blood Culture    G(-) bacilli, we want add tapimycin for infection control, thanks.

被會診諮詢的科別：感染科

回覆醫師：楊XX

感染科回覆會診內容：
Agree with Tapimycin 2.25 gm iv Q8h (CODE: 8523).
"""
process_consultation(model, tokenizer, consultation_content)

In [None]:
### Sample 1.3 ###
consultation_content = """申請會診內容：
This is a 62 years old man with past history of:
1. End stage renal disease on peritoneal dialysis with severe fungal peritonitis and shifting to hemodialysis via Hickmen catheter  
2. Upper gastrointestinal bleeding with anemia 
3. Coronary artery disease, three-vessel disease, status post drug-eluting coronary stenting 
4. Congestive heart failue with reduced ejection fraction (35%) 
5. Hypertensive cardiovascular disease
6. Past history of intra-abdominal abscess formation with sepsis and intestinal adhesion with obstruction and perforation status post enterolysis and free of adhesion and repair of perforated bowel with side to side bypass operation
7. Gouty arthritis
8. Past history of rectal adenoma,1.0cm, status post hemoclipping and cold snare polypectomy 

This time, he was admitted for parathy roidectomy. Under this situation, we sincerely need your expertise on HD. Thank you very much!

被會診諮詢的科別：腎臟科

回覆醫師：王XX

腎臟科回覆會診內容：
Dear Dr:
I would arrange regular HD for this patient. Thanks a lot.
"""
process_consultation(model, tokenizer, consultation_content)

In [None]:
consultation_content = """申請會診內容：
Dear Dr:
    The 33 y/o female patient sufferred from:
1. Spontaneous diffuse subarachnoid hemorrhage with aneurysm ruptured 
   (R't ophthalmic segment ICA wide base mushroom shape aneurysm) s/p 
   TAE with 9 coils, obstruction hydrocephalus status post right EVD and 
   ICP monitor
   -->under sedation, GCS E1VTM4
   -->EVD drainage 321ml yeaterday, no obvious IICP noted
   -->more stable BP 
2. Neurogenic pulmonary edema
   -->desaturation mild improved 
   -->CXR: still bilateral lungs infiltration 
3. Hypokalemia, 2.8 mmol/L
   -->post KCL replacement, 3.1mmol/L today 

**.Acute heart failure suspect breaking heart syndrome with acrdiac echo: LVEF:33%、AR:-、MR:+、TR:+、PR:+、E/A:1.8，RVSP:24mmHg.Please evaluate the patient for the further e valuation and treatment(家屬要求心衰治療).Thanks!

8/16 persist desaturation with high FiO2 support, D-Dimer Test >7650 ng/mL,
     Fibrinogen >700 mg/dL, unstable hemodynamic on double vasorpessor pump 
     infusion, pulmonary embolism or more worse of heart function can not be rule 
     out, please evaluation and arrange free charge cardiac echo(recheck RVSP and
     RV/LV ratio) for further suggest. Thanks a lot.

被會診諮詢的科別：心臟內科

回覆醫師：劉XX

心臟內科回覆會診內容：
Arrange CTA to exclude pulmonary embolism or evaluate the severity. Mechanical ventilation dependent on FuO2. V-V ECMO may be needed if hypoxemia is wordening. 

Digoxin 0.5mg 1/2 ample in N/S 100ml iv drip for 1 hours Q12h and then digoxin 1/2 QD. Check digoxin level 3 days later.

Please also consult CVS for possible ECMO.
Please consult NEP for electrolytes imbalance to correct it aggressively.
"""
process_consultation(model, tokenizer, consultation_content)

In [None]:
consultation_content = """## 申請會診內容：
Dear Dr:  csf culture:Staphylococcus epidermidis .clidamycin resistant .so we need change other antibiotic use .
This 54 year old woman, A case of 1. Left A4 segment of anterior cerebral artery(ACA) aneurysm(1.39 x 1.37mm) rupture with spontaneous diffuse subarachnoid hemorrhage status post right external ventricular drain and intracranial pressure monitoring on 113/12/10, status post transcatheter arterial embolization on 113/12/19 2. Meningitis status post revision external ventricular drain in left ventricle on 113/12/21 external ventricular drain tip obstruction post irrigation on 114/01/07
3. Communicating hydrocephalus status post ventriculoperitoneal shunt implantation, R`t Kocher's point(Strata progammable valve set to 1.5) on 114/02/12. 4. Seizure attact with epilepsy.
Due to high fever and productive cough too much,
一般生化 Blood
日     期           CRP 
= = = = =  ============ 
114/03/11   12 mg/dl 
, we need your specialty evaluation for antibiotic therapy.
 Thanks!

## 被會診諮詢的科別：感染科

## 回覆醫師：楊清鎮

## 感染科回覆會診內容：
一般培養 CSF
日     期       Common Aerobic Culture 
= = = = =  =========================== 
1140321                   G(+) cocci 
1140321   Staphylococcus epidermidis 
1140322                   G(+) cocci 
1140322   Staphylococcus epidermidis 
1140324                   G(+) cocci 
1140324   Staphylococcus epidermidis 

Switch Clincin iv to Sevatrim 960 mg (2 vials) iv Q12h; Continue Tecopin 400 mg iv Qd. (CODE: 8523)
"""
process_consultation(model, tokenizer, consultation_content)

In [None]:
consultation_content = """申請會診內容：
Dear Dr.
This 65 years old male patient is a case of Enlarged prostate with lower urinary tract symptoms S/P Laser transurethral resection of the Prostate on 08/01
Due to high fever and leukocytosis were noted, so we need your expert to this patient for evaluation 
血液檢查 Blood
日     期         WBC             RBC         Hb     HCT      MCV 
= = = = =  ========== =============== ========== ======= ======== 
113/08/02   23730 uL  487 x10000/uL  13.7 g/dL  39.4 %  80.9 fl 

血液檢查 Blood
日     期       MCH       MCHC       Platelet  RDW-CV     MPV 
= = = = =  ======== ========== ============== ======= ======= 
113/08/02   28.1 pg  34.8 g/dL  269 x1000/uL  13.3 %  8.9 fL 

血液檢查 Blood
日     期   Neutrophil-Segmented  Lymphocyte  Monocyte 
= = = = =  ===================== =========== ========= 
113/08/02                 87.9 %       7.6 %     4.5 % 

血液檢查 Blood
日     期   Absolute neutrophil count(ANC) 
= = = = =  =============================== 
113/08/02                        20859 /uL 

血液檢查 Blood
Thank a lot!! 

被會診諮詢的科別：感染科

回覆醫師：楊XX

感染科回覆會診內容：
Switch Stazolin iv to Seforce 400 mg iv Q12h (CODE: 8523).
"""
process_consultation(model, tokenizer, consultation_content)

In [None]:
consultation_content = """申請會診內容：
1. bilateral lower lobe infiltration  
     => bilateral pneumonia
2. r/o acute delirium3
3. elevated D-Dimer test level, suspected venous thromboembolism

快篩檢驗及特殊項目 Sputum
日     期   微晶肺炎病原體多標的核酸檢測 
= = = = =  ============================= 
112/12/22                              : 

快篩檢驗及特殊項目 Sputum
日     期   Haemophilus influenzae   
= = = = =  ======================= 
112/12/22          10^4  copies/mL 

chest x-ray: bilateral lower lobe infiltration, lab data was showed: elevated CRP level ,so we need your consult for further evaluation and suggestion. Thanks a lot.
-----NEED CONTINUE ANTIBIOTIC USE, THANKS A LOTS~

----CXR not improving, need continue antibiotic use, thanks ~

****** 1/16  due to CXRnot improving, ceftriaxone 2G qd ivd (day 22), we need your evlaution and management for another antibiotic suggestion, thanks ~
 

被會診諮詢的科別：感染科

回覆醫師：楊XX

感染科回覆會診內容：
Switch Ceftriaxone iv to Subacillin 1.5 gm iv Q8h + Klaricid 500 mg iv Qd.
"""
process_consultation(model, tokenizer, consultation_content)

In [None]:
##################
# Save the model #
##################

In [None]:
# [NOTE] This ONLY saves the LoRA adapters, and not the full model. To save to GGUF, scroll down!
model.save_pretrained(new_model_name)  # Local saving
tokenizer.save_pretrained(new_model_name)

In [None]:
model.save_pretrained_merged(new_model_name, tokenizer)

In [None]:
model.save_pretrained_gguf(
    new_model_name,
    quantization_type = "Q8_0", # For now only Q8_0, BF16, F16 supported
)