# Fine-Tuning HuggingFace Models in Isolated Sandboxes

This notebook demonstrates a complete ML workflow: loading a model from HuggingFace and fine-tuning it on custom data.

**Why isolation matters:**
- This runs in your own sandbox - your packages and training don't affect other team members
- You can safely experiment with any HuggingFace model without risking shared infrastructure
- Other data scientists work independently in their own sandboxes

## Step 1: Load Model from HuggingFace

We'll use a pre-trained sentiment analysis model.

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import torch
import numpy as np

# Load model from HuggingFace
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
print(f"Loading: {model_name}")

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

print(f"Loaded successfully ({model.num_parameters():,} parameters)")

## Step 2: Test Baseline Performance

In [None]:
# Test on sample reviews
reviews = [
    "This product is amazing! Best purchase ever.",
    "Terrible quality. Complete waste of money.",
    "It's okay, nothing special."
]

print("Baseline predictions:\n")
for review in reviews:
    inputs = tokenizer(review, return_tensors="pt", truncation=True, padding=True)
    outputs = model(**inputs)
    probs = torch.softmax(outputs.logits, dim=1)
    
    sentiment = "POSITIVE" if probs[0][1] > 0.5 else "NEGATIVE"
    conf = probs[0][1].item() if sentiment == "POSITIVE" else probs[0][0].item()
    
    print(f"{sentiment} ({conf:.0%}): {review}")

## Step 3: Prepare Training Data

In [None]:
# Create training dataset
train_data = {
    'text': [
        "Outstanding quality and service",
        "Very poor experience, not recommended",
        "Exceptional value for money",
        "Disappointed, requesting refund",
        "Highly recommend to everyone",
        "Defective product, broke immediately",
        "Perfect for my needs",
        "Terrible customer support",
        "Exceeded expectations",
        "Complete waste of time and money",
        "Will definitely buy again",
        "Avoid this product",
        "Exactly what I needed",
        "Very disappointed with purchase",
        "Best in its category",
        "Poor quality, not as described",
    ],
    'label': [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
}

val_data = {
    'text': [
        "Great product, worth it",
        "Awful, do not buy",
        "Excellent quality",
        "Total disappointment",
    ],
    'label': [1, 0, 1, 0]
}

# Create datasets
train_dataset = Dataset.from_dict(train_data)
val_dataset = Dataset.from_dict(val_data)

# Tokenize
def tokenize(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=128)

train_dataset = train_dataset.map(tokenize, batched=True)
val_dataset = val_dataset.map(tokenize, batched=True)

print(f"Training samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")

## Step 4: Fine-Tune the Model

In [None]:
# Training configuration
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    warmup_steps=10,
    weight_decay=0.01,
    logging_steps=5,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {'accuracy': (predictions == labels).mean()}

# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

print("Starting fine-tuning...")
result = trainer.train()

print(f"Training complete")
print(f"Final loss: {result.training_loss:.4f}")

## Step 5: Evaluate Fine-Tuned Model

In [None]:
# Evaluate
eval_results = trainer.evaluate()
print(f"Validation Accuracy: {eval_results['eval_accuracy']:.1%}")

# Test on original reviews
print("\nFine-tuned predictions:\n")
for review in reviews:
    inputs = tokenizer(review, return_tensors="pt", truncation=True, padding=True)
    outputs = model(**inputs)
    probs = torch.softmax(outputs.logits, dim=1)
    
    sentiment = "POSITIVE" if probs[0][1] > 0.5 else "NEGATIVE"
    conf = probs[0][1].item() if sentiment == "POSITIVE" else probs[0][0].item()
    
    print(f"{sentiment} ({conf:.0%}): {review}")

## Step 6: Save the Model

In [None]:
# Save to persistent workspace
save_dir = "./fine_tuned_model"
model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)

print(f"Model saved to {save_dir}")
print("This is in your persistent workspace and survives pod restarts")