In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# ============================================================
# 0. Setup & Imports
# ============================================================
!pip install deepspeed transformers bitsandbytes accelerate mpi4py --quiet

import torch
from transformers import (
    AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,
    TrainingArguments, Trainer, EarlyStoppingCallback, default_data_collator
)
from peft import LoraConfig, get_peft_model
from datasets import load_dataset
from google.colab import files
from pathlib import Path

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m75.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m16.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m72.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.0/54.0 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m180.7/180.7 kB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for deepspeed (setup.py) ... [?25l[?25hdone


In [4]:
project_path = input("➡️ Enter the full path to the project folder (ex: /content/drive/MyDrive/MyProject) : ").strip()
relative_path = Path(project_path)

➡️ Enter the full path to the project folder (ex: /content/drive/MyDrive/MyProject) : /content/drive/MyDrive/Deep Learning Project


In [None]:
# ============================================================
# 1. Model Loading and Quantization
# ============================================================
# Load Qwen model with 4-bit quantization using BitsAndBytes.

model_id = "Qwen/Qwen3-0.6B"

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    quantization_config=bnb_config,
    device_map="auto"
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/726 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.50G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

In [None]:
# ============================================================
# 2. Apply LoRA Fine-Tuning Configuration
# ============================================================
# Configure LoRA adapters for efficient fine-tuning.

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

In [None]:
# ============================================================
# 3. Load Dataset
# ============================================================
# Load preprocessed medical Q&A dataset from CSV.

dataset = load_dataset("csv", data_files=str(relative_path / "medDataset_processed.csv"), split="train")



Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
# ============================================================
# 4. Split Dataset into Train/Validation/Test
# ============================================================

# First split into 90% train_val and 10% test
train_val, test = dataset.train_test_split(test_size=0.1, seed=42).values()

# Then split train_val into 90% train, 10% val (relative to full dataset)
train, val = train_val.train_test_split(test_size=0.1111, seed=42).values()

In [None]:
# ============================================================
# 5. Data Preprocessing and Tokenization
# ============================================================
# Convert question/answer pairs into model-ready tokenized sequences.

def format_example(batch):
    """Format a single Q/A pair into Qwen chat format"""
    eos = tokenizer.eos_token
    input_ids_list = []
    attention_mask_list = []
    labels_list = []

    for user, assistant in zip(batch["Question"], batch["Answer"]):
        text = (
            "<|im_start|>user\n"
            f"{user}\n"
            "<|im_end|>\n"
            "<|im_start|>assistant\n"
            f"{assistant}{eos}\n"
            "<|im_end|>"
        )

         # Tokenize the full conversation
        tokenized = tokenizer(
            text,
            truncation=True,
            padding="max_length",
            max_length=128,
        )

        labels = tokenized["input_ids"].copy()

        # Mask the user prompt
        user_part = "<|im_start|>user\n" + user + "\n<|im_end|>\n<|im_start|>assistant\n"
        user_len = len(tokenizer(user_part)["input_ids"])
        labels[:user_len] = [-100] * user_len

        input_ids_list.append(tokenized["input_ids"])
        attention_mask_list.append(tokenized["attention_mask"])
        labels_list.append(labels)

    return {
        "input_ids": input_ids_list,
        "attention_mask": attention_mask_list,
        "labels": labels_list,
    }

tokenized_train = train.map(format_example, remove_columns=train.column_names, batched=True, batch_size=16)
tokenized_val = val.map(format_example, remove_columns=val.column_names, batched=True, batch_size=16)
tokenized_test = test.map(format_example, remove_columns=test.column_names, batched=True, batch_size=16)

tokenized_train.set_format("torch")
tokenized_val.set_format("torch")
tokenized_test.set_format("torch")

tokenized_test.save_to_disk(str(relative_path / "tokenized_test"))
print("Saved tokenized test set.")


Map:   0%|          | 0/13125 [00:00<?, ? examples/s]

Map:   0%|          | 0/1641 [00:00<?, ? examples/s]

Map:   0%|          | 0/1641 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/1641 [00:00<?, ? examples/s]

Saved tokenized test set.


In [None]:
# ============================================================
# 6. Define Training Arguments
# ============================================================
# Configure training hyperparameters and hardware setup.

training_args = TrainingArguments(
    output_dir="./qwen-medquad-finetuned",
    eval_strategy="steps",
    save_strategy="steps",
    eval_steps=500,
    save_steps=500,
    save_total_limit=2,

    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,
    num_train_epochs=3,
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,

    logging_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,

    report_to="none",
    seed=42,
    deepspeed=str(relative_path / "ds_config.json"),
    fp16=True,
)



In [None]:
# ============================================================
# 7. Define Data Collator
# ============================================================
# The default collator handles padding and batching.

data_collator = default_data_collator

In [None]:
# ============================================================
# 8. Initialize Trainer and Start Training
# ============================================================
# Set up Trainer with early stopping and begin fine-tuning.

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    data_collator=data_collator,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)],
)

  trainer = Trainer(


In [None]:
model.train()
trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151645}.
W1116 14:32:57.037000 362 torch/utils/cpp_extension.py:2425] TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. 
W1116 14:32:57.037000 362 torch/utils/cpp_extension.py:2425] If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'] to specific architectures.


Installed CUDA version 12.5 does not match the version torch was compiled with 12.6 but since the APIs are compatible, accepting this combination


Step,Training Loss,Validation Loss
500,1.4598,1.395123
1000,1.2926,1.33193
1500,1.271,1.298607
2000,1.1925,1.281199


TrainOutput(global_step=2463, training_loss=1.3822962427545842, metrics={'train_runtime': 6646.1792, 'train_samples_per_second': 5.924, 'train_steps_per_second': 0.371, 'total_flos': 1.338909917184e+16, 'train_loss': 1.3822962427545842, 'epoch': 3.0})

In [None]:
# ============================================================
# 9. Save and Export Model
# ============================================================
# Save fine-tuned model and tokenizer, then zip for download.

trainer.save_model("./my-qwen-model")
tokenizer.save_pretrained("./my-qwen-model")

('./my-qwen-model/tokenizer_config.json',
 './my-qwen-model/special_tokens_map.json',
 './my-qwen-model/chat_template.jinja',
 './my-qwen-model/vocab.json',
 './my-qwen-model/merges.txt',
 './my-qwen-model/added_tokens.json',
 './my-qwen-model/tokenizer.json')

In [None]:
!zip -r model.zip my-qwen-model/
from google.colab import files
files.download('model.zip')

  adding: my-qwen-model/ (stored 0%)
  adding: my-qwen-model/README.md (deflated 65%)
  adding: my-qwen-model/merges.txt (deflated 57%)
  adding: my-qwen-model/tokenizer.json (deflated 81%)
  adding: my-qwen-model/adapter_model.safetensors (deflated 7%)
  adding: my-qwen-model/added_tokens.json (deflated 68%)
  adding: my-qwen-model/tokenizer_config.json (deflated 90%)
  adding: my-qwen-model/special_tokens_map.json (deflated 63%)
  adding: my-qwen-model/vocab.json (deflated 61%)
  adding: my-qwen-model/adapter_config.json (deflated 56%)
  adding: my-qwen-model/chat_template.jinja (deflated 76%)
  adding: my-qwen-model/training_args.bin (deflated 53%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>