In [None]:
# GoEmotions DeBERTa-v3-large Multi-Label Classification
# Using local caching for fast, offline training

!nvidia-smi

# Install system dependencies for SentencePiece
print("🔧 Installing system dependencies for SentencePiece...")
!apt-get update -qq
!apt-get install -y cmake build-essential pkg-config libgoogle-perftools-dev

# Install packages with conflict resolution
!pip install --upgrade pip --root-user-action=ignore
# Install PyTorch 2.6+ to fix CVE-2025-32434 vulnerability
!pip install torch>=2.6.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 --root-user-action=ignore

# Install SentencePiece properly (C++ library + Python wrapper)
print("📦 Installing SentencePiece with C++ support...")
!pip install sentencepiece --root-user-action=ignore

# Install other packages
!pip install transformers accelerate datasets evaluate scikit-learn tensorboard pyarrow tiktoken --root-user-action=ignore

# Change to the project root directory
import os
os.chdir('/home/user/goemotions-deberta')
print(f"📁 Current directory: {os.getcwd()}")

# Setup local caching (run this first time only)
print("🚀 Setting up local cache...")
!python3 scripts/setup_local_cache.py


Wed Sep  3 11:46:22 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.127.08             Driver Version: 550.127.08     CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3090        On  |   00000000:C1:00.0 Off |                  N/A |
| 30%   26C    P8             38W /  350W |       2MiB /  24576MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA GeForce RTX 3090        On  |   00

In [17]:
# Training with DeBERTa-v3-large using local cache
!accelerate launch --num_processes=2 --mixed_precision=fp16 \
scripts/train_deberta_local.py \
--output_dir "./outputs/deberta" \
--model_type "deberta-v3-large" \
--per_device_train_batch_size 8 --per_device_eval_batch_size 16 \
--gradient_accumulation_steps 4 \
--num_train_epochs 3 \
--learning_rate 1e-5 --lr_scheduler_type cosine --warmup_ratio 0.1 \
--weight_decay 0.01 --fp16 --tf32 --gradient_checkpointing


🚀 GoEmotions DeBERTa Training (LOCAL CACHE VERSION)
📁 Output directory: ./outputs/deberta
🤖 Model: deberta-v3-large (from local cache)
📊 Dataset: GoEmotions (from local cache)
🤖 Loading deberta-v3-large...
📁 Found local cache at models/deberta-v3-large
🚀 GoEmotions DeBERTa Training (LOCAL CACHE VERSION)
📁 Output directory: ./outputs/deberta
🤖 Model: deberta-v3-large (from local cache)
📊 Dataset: GoEmotions (from local cache)
🤖 Loading deberta-v3-large...
📁 Found local cache at models/deberta-v3-large
✅ deberta-v3-large tokenizer loaded from local cache
✅ deberta-v3-large tokenizer loaded from local cache
⚠️  Failed to load from local cache: Error no file named pytorch_model.bin, model.safetensors, tf_model.h5, model.ckpt.index or flax_model.msgpack found in directory models/deberta-v3-large.
🔄 Will download fresh copy...
🔄 Downloading deberta-v3-large with offline mode strategy...
⚠️  Failed to load from local cache: Error no file named pytorch_model.bin, model.safetensors, tf_model.h5

In [18]:
# Check results
import json
import os

# Ensure we're in the right directory
os.chdir('/home/user/goemotions-deberta')
print(f"📁 Current directory: {os.getcwd()}")

# Check DeBERTa results
deberta_report = "./outputs/deberta/eval_report.json"
if os.path.exists(deberta_report):
    print("🎉 DeBERTa-v3-large training completed!")
    with open(deberta_report, "r") as f:
        rep = json.load(f)
    print("Model:", rep["model"])
    print("F1_micro:", rep["f1_micro"], " F1_macro:", rep["f1_macro"])
    print("F1_micro (t=0.3):", rep.get("f1_micro_t3", "N/A"), " F1_macro (t=0.3):", rep.get("f1_macro_t3", "N/A"))
    print("F1_micro (t=0.5):", rep.get("f1_micro_t5", "N/A"), " F1_macro (t=0.5):", rep.get("f1_macro_t5", "N/A"))
else:
    print("❌ DeBERTa training not completed yet")


📁 Current directory: /home/user/goemotions-deberta
❌ DeBERTa training not completed yet
