In [None]:
%%capture
!pip install unsloth "xformers==0.0.28.post2"
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [None]:
import torch
print(torch.__version__)

2.5.0+cu124


In [None]:
pip install torch --upgrade

Collecting torch
  Downloading torch-2.5.1-cp310-cp310-manylinux1_x86_64.whl.metadata (28 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)


In [None]:
import pandas as pd
from datasets import Dataset
from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template, standardize_sharegpt
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
import torch

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


In [None]:
max_seq_length = 2048
dtype = None  # Adjust as needed (e.g., torch.float16 for T4 or V100, torch.bfloat16 for Ampere+)
load_in_4bit = True

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-3B-Instruct",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit
)

==((====))==  Unsloth 2024.10.7: Fast Llama patching. Transformers = 4.44.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.0+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

Unsloth: We fixed a gradient accumulation bug, but it seems like you don't have the latest transformers version!
Please update transformers, TRL and unsloth via:
`pip install --upgrade --no-cache-dir --no-deps unsloth transformers git+https://github.com/huggingface/trl.git`


In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407
)

Unsloth 2024.10.7 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [None]:
tokenizer = get_chat_template(tokenizer, chat_template="llama-3.1")

In [None]:
dataset_df = pd.read_csv("/content/Surgical_Dataset.csv")

# Convert DataFrame to Hugging Face dataset format
dataset = Dataset.from_pandas(dataset_df)

In [None]:
def convert_to_conversations(row):
    return {
        "conversations": [
            {"role": "user", "content": row["question"]},
            {"role": "assistant", "content": row["answer"]}
        ]
    }


In [None]:
dataset = dataset.map(convert_to_conversations)

Map:   0%|          | 0/4397 [00:00<?, ? examples/s]

In [None]:
dataset[5]["conversations"]

[{'content': 'What empirical treatment is commonly used for urinary tract infections?',
  'role': 'user'},
 {'content': 'Empirical treatment commonly used for urinary tract infections includes trimethoprim 400mg twice daily until sensitivity results are known.',
  'role': 'assistant'}]

In [None]:
dataset = standardize_sharegpt(dataset)

Standardizing format:   0%|          | 0/4397 [00:00<?, ? examples/s]

In [None]:
def formatting_prompts_func(examples):
    texts = []
    for convo in examples["conversations"]:
        if convo:  # Check if the conversation exists
            # Filter out None values in content
            filtered_convo = [
                {"content": message["content"] if message["content"] is not None else "", "role": message["role"]}
                for message in convo
            ]
            try:
                # Apply the chat template to format the text
                formatted_text = tokenizer.apply_chat_template(filtered_convo, tokenize=False, add_generation_prompt=False)
                texts.append(formatted_text if formatted_text is not None else "")
            except Exception as e:
                print(f"Error formatting conversation: {filtered_convo}. Error: {e}")
                texts.append("")  # Fallback to an empty string in case of error
        else:
            texts.append("")  # Fallback for None conversations
    return {"text": texts}


In [None]:
dataset = dataset.map(formatting_prompts_func, batched=True)

Map:   0%|          | 0/4397 [00:00<?, ? examples/s]

In [None]:
# Define the tokenize_and_add_labels function as provided
def tokenize_and_add_labels(examples):
    # Tokenize inputs and targets
    model_inputs = tokenizer(
        examples["text"],
        max_length=max_seq_length,
        truncation=True,
        padding="max_length",
    )
    # Set labels equal to input_ids for the SFT task
    model_inputs["labels"] = model_inputs["input_ids"].copy()
    return model_inputs

# Apply tokenization function to dataset
tokenized_dataset = dataset.map(tokenize_and_add_labels, batched=True)

# Use DataCollatorForSeq2Seq to handle padding and masking for both input_ids and labels
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)


Map:   0%|          | 0/4397 [00:00<?, ? examples/s]

In [None]:
from transformers import Trainer

class CustomSFTTrainer(SFTTrainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        # Check if labels are present in the inputs
        if "labels" not in inputs:
            raise ValueError("Labels are missing in the inputs. Please check that labels are properly added to the dataset.")

        # Ensure labels are passed with inputs
        labels = inputs.get("labels")

        # Pass inputs to the model
        outputs = model(**inputs)

        # Ensure loss is present in outputs
        if "loss" not in outputs:
            raise ValueError("Loss is not found in the model outputs. Please check that the model supports computing loss.")

        # Get the loss value from the outputs
        loss = outputs.get("loss")

        # Return the loss (and optionally outputs) for training
        return (loss, outputs) if return_outputs else loss

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported

trainer =  CustomSFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = tokenized_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

max_steps is given, it will override any value given in num_train_epochs


In [None]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 4,397 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 24,313,856


**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!
`pip install --upgrade --no-cache-dir --no-deps unsloth transformers git+https://github.com/huggingface/trl.git`


Step,Training Loss
1,14.3947
2,14.3576
3,13.8436
4,12.0133
5,9.4485
6,8.8744
7,8.5451
8,8.0437
9,7.7013
10,7.4143


TrainOutput(global_step=60, training_loss=6.626964402198792, metrics={'train_runtime': 1244.6473, 'train_samples_per_second': 0.386, 'train_steps_per_second': 0.048, 'total_flos': 1.90929784799232e+16, 'train_loss': 6.626964402198792, 'epoch': 0.10914051841746249})

In [None]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

messages = [
    {"role": "user", "content": "What is the mortality rate for patients requiring surgical intervention who were unstable preoperatively?"},
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True, # Must add for generation
    return_tensors = "pt",
).to("cuda")

outputs = model.generate(input_ids = inputs, max_new_tokens = 200, use_cache = True,
                         temperature = 1.5, min_p = 0.1)
tokenizer.batch_decode(outputs)

['<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 July 2024\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the mortality rate for patients requiring surgical intervention who were unstable preoperatively?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nA study published in the British Journal of Surgery found that the mortality rate for patients requiring surgical intervention who were unstable preoperatively ranged from 20-50%.<|eot_id|>']

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [None]:
!huggingface-cli login

model.push_to_hub("vishal042002/Llama3.2-3b-Instruct-ClinicalSurgery", check_pr=True)

tokenizer.push_to_hub("vishal042002/Llama3.2-3b-Instruct-ClinicalSurgery",check_pr=True)


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) y
Token is valid (permission: write).
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your ter

README.md:   0%|          | 0.00/601 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/97.3M [00:00<?, ?B/s]

Saved model to https://huggingface.co/vishal042002/Llama3.2-3b-Instruct-ClinicalSurgery


In [None]:
# Convert the dataset to a pandas DataFrame
formatted_dataset_df = dataset.to_pandas()

# Save the DataFrame to a CSV file
formatted_dataset_df.to_csv("/content/Formatted_Surgical_Dataset.csv", index=False)

print("Formatted dataset saved as CSV.")


Formatted dataset saved as CSV.
