In [2]:
import os
import pandas as pd
import numpy as np
import torch
import evaluate
from datasets import Dataset
from sklearn.model_selection import train_test_split
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TrainingArguments,
    Trainer
)
from google.colab import drive

## BERT Model

In [None]:
MODEL_NAME = "distilbert-base-uncased"

## Configurations

In [4]:
FINAL_MODEL_PATH = "/content/drive/MyDrive/drug_bank_bert_classifier_final"


DATA_FILE_PATH = "/content/BERT train data.csv"

LABEL_MAPPING = {
    "A": 0,  # RELEVANT_FACT
    "B": 1,  # UNSAFE_MEDICAL
    "C": 2,  # IRRELEVANT_GENERAL
}

NUM_LABELS = len(LABEL_MAPPING)
ID2LABEL = {i: label for label, i in LABEL_MAPPING.items()}
LABEL2ID = LABEL_MAPPING

#Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


## Data Loading

In [6]:
try:
    # Load the data
    df = pd.read_csv(DATA_FILE_PATH)
except FileNotFoundError:
    print(f"Error: Data file not found at {DATA_FILE_PATH}. Please check the path and upload/mount.")
    exit()


df.rename(columns={'question': 'text', 'type': 'label_text'}, inplace=True)
df['label'] = df['label_text'].map(LABEL2ID)

# 80% train and 20% test
train_df, eval_df = train_test_split(
    df,
    test_size=0.2,
    random_state=42,
    stratify=df['label']
)

train_dataset = Dataset.from_pandas(train_df).remove_columns(["__index_level_0__", "label_text"])
eval_dataset = Dataset.from_pandas(eval_df).remove_columns(["__index_level_0__", "label_text"])

print(f"\nTotal Samples: {len(df)}")


Total Samples: 460


## Tokenization

In [7]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_eval = eval_dataset.map(tokenize_function, batched=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/368 [00:00<?, ? examples/s]

Map:   0%|          | 0/92 [00:00<?, ? examples/s]

In [8]:
# MODEL LOADING
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=NUM_LABELS,
    id2label=ID2LABEL,
    label2id=LABEL2ID,
)

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Model Training

In [10]:
# --- 6. METRICS & TRAINING ---
metric = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels, average="macro")

training_args = TrainingArguments(
    output_dir='bert_classifier_output', # Temporary folder
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

print("\nStarting BERT Classifier Fine-tuning...")
trainer.train()



  trainer = Trainer(



Starting BERT Classifier Fine-tuning...


Epoch,Training Loss,Validation Loss,F1
1,No log,0.454713,0.642495
2,No log,0.275201,1.0
3,No log,0.217228,0.978645


TrainOutput(global_step=69, training_loss=0.4446488532467165, metrics={'train_runtime': 82.571, 'train_samples_per_second': 13.37, 'train_steps_per_second': 0.836, 'total_flos': 146246616170496.0, 'train_loss': 0.4446488532467165, 'epoch': 3.0})

In [11]:
# SAVING MODEL
trainer.save_model(FINAL_MODEL_PATH)
tokenizer.save_pretrained(FINAL_MODEL_PATH)
print(f"\n BERT Classifier saved successfully to: {FINAL_MODEL_PATH}")

print("\nFinal Evaluation Report on Test Set:")
evaluation_results = trainer.evaluate(tokenized_eval)
print(evaluation_results)


 BERT Classifier saved successfully to: /content/drive/MyDrive/drug_bank_bert_classifier_final

Final Evaluation Report on Test Set:


{'eval_loss': 0.21722793579101562, 'eval_f1': 0.9786453119786452, 'eval_runtime': 1.2895, 'eval_samples_per_second': 71.343, 'eval_steps_per_second': 4.653, 'epoch': 3.0}


In [22]:
# INFERENCE

def classify_query(query, model_path=FINAL_MODEL_PATH):
    """Loads the trained model and classifies a single user query."""
    # Ensure model is loaded once
    try:
        classifier_model = AutoModelForSequenceClassification.from_pretrained(model_path)
        classifier_tokenizer = AutoTokenizer.from_pretrained(model_path)
    except Exception as e:
        print(f"Error loading saved classifier model from Drive: {e}")
        return "MODEL_LOAD_ERROR"

    # Tokenize the input query
    inputs = classifier_tokenizer(query, return_tensors="pt", truncation=True, padding=True)

    # Run the model
    with torch.no_grad():
        outputs = classifier_model(**inputs)

    # Get the predicted label (index of the highest logit)
    predicted_id = torch.argmax(outputs.logits, dim=1).item()

    # Map the ID back to the human-readable tag (A, B, or C)
    predicted_label = classifier_model.config.id2label[predicted_id]

    return predicted_label

# --- EXAMPLE GUARDRAIL EXECUTION ---

# Example 1: Relevant Query (Type A)
query1 = "Are you eating Aspirin?"
classification1 = classify_query(query1)

# Example 2: General Query (Expected Type C)
query2 = "Are you eating Chicken?"
classification2 = classify_query(query2)

# Example 3: Medical Advice Query (Type B - ADDED)
query3 = "if i have constipation shd i aspirin?"
classification3 = classify_query(query3)


print("\n--- Guardrail Test Results ---")
print(f"Query 1: {query1} -> Classification: {classification1}")
print(f"Query 2: {query2} -> Classification: {classification2}")
print(f"Query 3: {query3} -> Classification: {classification3}") # ADDED

if classification3 == 'B':
    print("\nAction for Query 3: BLOCK - Return safety warning.")
elif classification3 == 'A':
    print("\nAction for Query 3: PROCEED - Execute RAG pipeline.")
elif classification3 == 'C':
    print("\nAction for Query 3: BLOCK - Return 'Data not available' warning.")
else:
    print("\nAction for Query 3: UNKNOWN CLASSIFICATION.")


--- Guardrail Test Results ---
Query 1: Are you eating Aspirin? -> Classification: B
Query 2: Are you eating Chicken? -> Classification: C
Query 3: if i have constipation shd i aspirin? -> Classification: B

