# 🚀 COMPREHENSIVE ULTIMATE BULLETPROOF EMOTION DETECTION TRAINING
## All Advanced Features + Technical Fixes

**FEATURES INCLUDED:**
✅ Configuration preservation (prevents 8.3% vs 75% discrepancy)
✅ Focal loss (handles class imbalance)
✅ Class weighting (WeightedLossTrainer)
✅ Data augmentation (sophisticated techniques)
✅ Advanced validation (proper testing)
✅ WandB integration with secrets
✅ Model architecture fixes
✅ Comprehensive dataset

**Target**: Reliable 75-85% F1 score with consistent performance

In [None]:
# Install required packages
!pip install transformers torch scikit-learn numpy pandas huggingface_hub wandb

In [None]:
import torch
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, DataCollatorWithPadding
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score, precision_score, recall_score
from sklearn.utils.class_weight import compute_class_weight
import json
import warnings
warnings.filterwarnings('ignore')

print('✅ All packages imported successfully')
print(f'PyTorch version: {torch.__version__}')
print(f'CUDA available: {torch.cuda.is_available()}')

## 🔑 WANDB API KEY SETUP

In [None]:
# Setup Weights & Biases API key from Google Colab secrets
import os
import wandb

print('🔑 SETTING UP WANDB API KEY')
print('=' * 40)

# Try to get API key from Colab secrets
try:
    from google.colab import userdata
    
    # Try different possible secret names
    possible_secret_names = [
        'WANDB_API_KEY',
        'wandb_api_key',
        'WANDB_KEY',
        'wandb_key',
        'WANDB_TOKEN',
        'wandb_token'
    ]
    
    api_key = None
    used_secret_name = None
    
    for secret_name in possible_secret_names:
        try:
            api_key = userdata.get(secret_name)
            used_secret_name = secret_name
            print(f'✅ Found API key in secret: {secret_name}')
            break
        except:
            continue
    
    if api_key:
        # Set the environment variable
        os.environ['WANDB_API_KEY'] = api_key
        print(f'✅ API key set from secret: {used_secret_name}')
        
        # Test wandb login
        try:
            wandb.login(key=api_key)
            print('✅ WandB login successful!')
        except Exception as e:
            print(f'⚠️ WandB login failed: {str(e)}')
            print('Continuing without WandB...')
    else:
        print('❌ No WandB API key found in secrets')
        print('\n📋 TO SET UP WANDB SECRET:')
        print('1. Go to Colab → Settings → Secrets')
        print('2. Add a new secret with name: WANDB_API_KEY')
        print('3. Value: Your WandB API key from https://wandb.ai/authorize')
        print('4. Restart runtime and run this cell again')
        print('\n⚠️ Continuing without WandB logging...')
        
except ImportError:
    print('⚠️ Google Colab secrets not available')
    print('\n📋 TO SET UP WANDB:')
    print('1. Get your API key from: https://wandb.ai/authorize')
    print('2. Run: wandb login')
    print('3. Enter your API key when prompted')
    print('\n⚠️ Continuing without WandB logging...')

print('\n✅ WandB setup completed')

## 🔍 VERIFYING SPECIALIZED MODEL ACCESS

In [None]:
print('🔍 VERIFYING SPECIALIZED MODEL ACCESS')
print('=' * 50)

specialized_model_name = 'j-hartmann/emotion-english-distilroberta-base'

try:
    print(f'Testing access to: {specialized_model_name}')
    test_tokenizer = AutoTokenizer.from_pretrained(specialized_model_name)
    test_model = AutoModelForSequenceClassification.from_pretrained(specialized_model_name)
    
    print('✅ SUCCESS: Specialized model loaded!')
    print(f'Model type: {test_model.config.model_type}')
    print(f'Architecture: {test_model.config.architectures[0]}')
    print(f'Hidden layers: {test_model.config.num_hidden_layers}')
    print(f'Hidden size: {test_model.config.hidden_size}')
    print(f'Number of labels: {test_model.config.num_labels}')
    print(f'Original labels: {test_model.config.id2label}')
    
    # Verify it's actually DistilRoBERTa
    if test_model.config.num_hidden_layers == 6:
        print('✅ CONFIRMED: This is DistilRoBERTa architecture')
    else:
        print('⚠️  WARNING: This may not be the expected DistilRoBERTa model')
    
except Exception as e:
    print(f'❌ ERROR: Cannot access specialized model: {str(e)}')
    print('\n🔧 FALLBACK: Using roberta-base instead')
    specialized_model_name = 'roberta-base'
    test_tokenizer = AutoTokenizer.from_pretrained(specialized_model_name)
    test_model = AutoModelForSequenceClassification.from_pretrained(specialized_model_name, num_labels=12)
    print(f'✅ Fallback model loaded: {specialized_model_name}')

## 🎯 DEFINING EMOTION CLASSES

In [None]:
# Define our emotion classes
emotions = ['anxious', 'calm', 'content', 'excited', 'frustrated', 'grateful', 'happy', 'hopeful', 'overwhelmed', 'proud', 'sad', 'tired']
print(f'🎯 Our emotion classes: {emotions}')
print(f'📊 Number of emotions: {len(emotions)}')

## 📊 CREATING COMPREHENSIVE ENHANCED DATASET

In [None]:
print('📊 CREATING COMPREHENSIVE ENHANCED DATASET')
print('=' * 50)

# Comprehensive balanced dataset with multiple samples per emotion
base_data = [
    # anxious (20 samples)
    {'text': 'I feel anxious about the presentation.', 'label': 0},
    {'text': 'I am anxious about the future.', 'label': 0},
    {'text': 'This makes me feel anxious.', 'label': 0},
    {'text': 'I am feeling anxious today.', 'label': 0},
    {'text': 'The uncertainty makes me anxious.', 'label': 0},
    {'text': 'I feel anxious about the results.', 'label': 0},
    {'text': 'This situation is making me anxious.', 'label': 0},
    {'text': 'I am anxious about the meeting.', 'label': 0},
    {'text': 'The pressure is making me anxious.', 'label': 0},
    {'text': 'I feel anxious about the decision.', 'label': 0},
    {'text': 'This is causing me anxiety.', 'label': 0},
    {'text': 'I am anxious about the changes.', 'label': 0},
    {'text': 'I feel worried about the outcome.', 'label': 0},
    {'text': 'I am nervous about the interview.', 'label': 0},
    {'text': 'This makes me feel uneasy.', 'label': 0},
    {'text': 'I am concerned about the situation.', 'label': 0},
    {'text': 'I feel tense about the deadline.', 'label': 0},
    {'text': 'I am stressed about the project.', 'label': 0},
    {'text': 'This gives me anxiety.', 'label': 0},
    {'text': 'I feel restless about the future.', 'label': 0},
    
    # calm (20 samples)
    {'text': 'I feel calm and peaceful.', 'label': 1},
    {'text': 'I am feeling calm today.', 'label': 1},
    {'text': 'This makes me feel calm.', 'label': 1},
    {'text': 'I am calm about the situation.', 'label': 1},
    {'text': 'I feel calm and relaxed.', 'label': 1},
    {'text': 'This gives me a sense of calm.', 'label': 1},
    {'text': 'I am feeling calm and centered.', 'label': 1},
    {'text': 'This brings me calm.', 'label': 1},
    {'text': 'I feel calm and at peace.', 'label': 1},
    {'text': 'I am calm about the outcome.', 'label': 1},
    {'text': 'This creates a feeling of calm.', 'label': 1},
    {'text': 'I feel calm and collected.', 'label': 1},
    {'text': 'I am feeling serene today.', 'label': 1},
    {'text': 'This makes me feel tranquil.', 'label': 1},
    {'text': 'I feel peaceful and relaxed.', 'label': 1},
    {'text': 'This gives me inner peace.', 'label': 1},
    {'text': 'I am feeling centered and calm.', 'label': 1},
    {'text': 'This brings me tranquility.', 'label': 1},
    {'text': 'I feel at ease with everything.', 'label': 1},
    {'text': 'I am in a peaceful state of mind.', 'label': 1},
    
    # content (20 samples)
    {'text': 'I feel content with my life.', 'label': 2},
    {'text': 'I am content with the results.', 'label': 2},
    {'text': 'This makes me feel content.', 'label': 2},
    {'text': 'I am feeling content today.', 'label': 2},
    {'text': 'I feel content and satisfied.', 'label': 2},
    {'text': 'This gives me contentment.', 'label': 2},
    {'text': 'I am content with my choices.', 'label': 2},
    {'text': 'I feel content and fulfilled.', 'label': 2},
    {'text': 'This brings me contentment.', 'label': 2},
    {'text': 'I am content with the situation.', 'label': 2},
    {'text': 'I feel content and at ease.', 'label': 2},
    {'text': 'This creates contentment in me.', 'label': 2},
    {'text': 'I am satisfied with my progress.', 'label': 2},
    {'text': 'This makes me feel fulfilled.', 'label': 2},
    {'text': 'I feel pleased with the outcome.', 'label': 2},
    {'text': 'This gives me satisfaction.', 'label': 2},
    {'text': 'I am happy with my current state.', 'label': 2},
    {'text': 'I feel gratified with the results.', 'label': 2},
    {'text': 'This brings me fulfillment.', 'label': 2},
    {'text': 'I am at peace with my situation.', 'label': 2},
    
    # excited (20 samples)
    {'text': 'I am excited about the new opportunity.', 'label': 3},
    {'text': 'I feel excited about the future.', 'label': 3},
    {'text': 'This makes me feel excited.', 'label': 3},
    {'text': 'I am feeling excited today.', 'label': 3},
    {'text': 'I feel excited and enthusiastic.', 'label': 3},
    {'text': 'This gives me excitement.', 'label': 3},
    {'text': 'I am excited about the project.', 'label': 3},
    {'text': 'I feel excited and motivated.', 'label': 3},
    {'text': 'This brings me excitement.', 'label': 3},
    {'text': 'I am excited about the possibilities.', 'label': 3},
    {'text': 'I feel excited and energized.', 'label': 3},
    {'text': 'This creates excitement in me.', 'label': 3},
    {'text': 'I am thrilled about the news.', 'label': 3},
    {'text': 'This makes me feel enthusiastic.', 'label': 3},
    {'text': 'I feel eager about the opportunity.', 'label': 3},
    {'text': 'This gives me energy and motivation.', 'label': 3},
    {'text': 'I am pumped about the challenge.', 'label': 3},
    {'text': 'I feel energized by the possibilities.', 'label': 3},
    {'text': 'This brings me enthusiasm.', 'label': 3},
    {'text': 'I am looking forward to this.', 'label': 3},
    
    # frustrated (20 samples)
    {'text': 'I am so frustrated with this project.', 'label': 4},
    {'text': 'I feel frustrated about the situation.', 'label': 4},
    {'text': 'This makes me feel frustrated.', 'label': 4},
    {'text': 'I am feeling frustrated today.', 'label': 4},
    {'text': 'I feel frustrated and annoyed.', 'label': 4},
    {'text': 'This gives me frustration.', 'label': 4},
    {'text': 'I am frustrated with the results.', 'label': 4},
    {'text': 'I feel frustrated and irritated.', 'label': 4},
    {'text': 'This brings me frustration.', 'label': 4},
    {'text': 'I am frustrated with the process.', 'label': 4},
    {'text': 'I feel frustrated and upset.', 'label': 4},
    {'text': 'This creates frustration in me.', 'label': 4},
    {'text': 'I am annoyed with the problems.', 'label': 4},
    {'text': 'This makes me feel irritated.', 'label': 4},
    {'text': 'I feel aggravated by the situation.', 'label': 4},
    {'text': 'This gives me annoyance.', 'label': 4},
    {'text': 'I am bothered by the issues.', 'label': 4},
    {'text': 'I feel irritated with the process.', 'label': 4},
    {'text': 'This brings me annoyance.', 'label': 4},
    {'text': 'I am upset with the situation.', 'label': 4},
    
    # grateful (20 samples)
    {'text': 'I am grateful for all the support.', 'label': 5},
    {'text': 'I feel grateful for the opportunity.', 'label': 5},
    {'text': 'This makes me feel grateful.', 'label': 5},
    {'text': 'I am feeling grateful today.', 'label': 5},
    {'text': 'I feel grateful and thankful.', 'label': 5},
    {'text': 'This gives me gratitude.', 'label': 5},
    {'text': 'I am grateful for the help.', 'label': 5},
    {'text': 'I feel grateful and appreciative.', 'label': 5},
    {'text': 'This brings me gratitude.', 'label': 5},
    {'text': 'I am grateful for the kindness.', 'label': 5},
    {'text': 'I feel grateful and blessed.', 'label': 5},
    {'text': 'This creates gratitude in me.', 'label': 5},
    {'text': 'I am thankful for the support.', 'label': 5},
    {'text': 'This makes me feel appreciative.', 'label': 5},
    {'text': 'I feel blessed by the opportunity.', 'label': 5},
    {'text': 'This gives me appreciation.', 'label': 5},
    {'text': 'I am indebted to the help.', 'label': 5},
    {'text': 'I feel thankful for the kindness.', 'label': 5},
    {'text': 'This brings me appreciation.', 'label': 5},
    {'text': 'I am blessed with good fortune.', 'label': 5},
    
    # happy (20 samples)
    {'text': 'I am feeling really happy today!', 'label': 6},
    {'text': 'I feel happy about the news.', 'label': 6},
    {'text': 'This makes me feel happy.', 'label': 6},
    {'text': 'I am feeling happy today.', 'label': 6},
    {'text': 'I feel happy and joyful.', 'label': 6},
    {'text': 'This gives me happiness.', 'label': 6},
    {'text': 'I am happy with the results.', 'label': 6},
    {'text': 'I feel happy and delighted.', 'label': 6},
    {'text': 'This brings me happiness.', 'label': 6},
    {'text': 'I am happy about the success.', 'label': 6},
    {'text': 'I feel happy and cheerful.', 'label': 6},
    {'text': 'This creates happiness in me.', 'label': 6},
    {'text': 'I am joyful about the completion.', 'label': 6},
    {'text': 'This makes me feel delighted.', 'label': 6},
    {'text': 'I feel cheerful about the outcome.', 'label': 6},
    {'text': 'This gives me joy.', 'label': 6},
    {'text': 'I am pleased with the results.', 'label': 6},
    {'text': 'I feel delighted by the news.', 'label': 6},
    {'text': 'This brings me joy.', 'label': 6},
    {'text': 'I am cheerful about the future.', 'label': 6},
    
    # hopeful (20 samples)
    {'text': 'I am hopeful for the future.', 'label': 7},
    {'text': 'I feel hopeful about the outcome.', 'label': 7},
    {'text': 'This makes me feel hopeful.', 'label': 7},
    {'text': 'I am feeling hopeful today.', 'label': 7},
    {'text': 'I feel hopeful and optimistic.', 'label': 7},
    {'text': 'This gives me hope.', 'label': 7},
    {'text': 'I am hopeful about the changes.', 'label': 7},
    {'text': 'I feel hopeful and positive.', 'label': 7},
    {'text': 'This brings me hope.', 'label': 7},
    {'text': 'I am hopeful about the possibilities.', 'label': 7},
    {'text': 'I feel hopeful and confident.', 'label': 7},
    {'text': 'This creates hope in me.', 'label': 7},
    {'text': 'I am optimistic about tomorrow.', 'label': 7},
    {'text': 'This makes me feel positive.', 'label': 7},
    {'text': 'I feel confident about the future.', 'label': 7},
    {'text': 'This gives me optimism.', 'label': 7},
    {'text': 'I am assured about the outcome.', 'label': 7},
    {'text': 'I feel positive about the changes.', 'label': 7},
    {'text': 'This brings me optimism.', 'label': 7},
    {'text': 'I am confident about the possibilities.', 'label': 7},
    
    # overwhelmed (20 samples)
    {'text': 'I am feeling overwhelmed with tasks.', 'label': 8},
    {'text': 'I feel overwhelmed by the workload.', 'label': 8},
    {'text': 'This makes me feel overwhelmed.', 'label': 8},
    {'text': 'I am feeling overwhelmed today.', 'label': 8},
    {'text': 'I feel overwhelmed and stressed.', 'label': 8},
    {'text': 'This gives me overwhelm.', 'label': 8},
    {'text': 'I am overwhelmed with responsibilities.', 'label': 8},
    {'text': 'I feel overwhelmed and exhausted.', 'label': 8},
    {'text': 'This brings me overwhelm.', 'label': 8},
    {'text': 'I am overwhelmed with the pressure.', 'label': 8},
    {'text': 'I feel overwhelmed and drained.', 'label': 8},
    {'text': 'This creates overwhelm in me.', 'label': 8},
    {'text': 'I am stressed with the workload.', 'label': 8},
    {'text': 'This makes me feel burdened.', 'label': 8},
    {'text': 'I feel swamped with tasks.', 'label': 8},
    {'text': 'This gives me stress.', 'label': 8},
    {'text': 'I am flooded with responsibilities.', 'label': 8},
    {'text': 'I feel burdened by the pressure.', 'label': 8},
    {'text': 'This brings me stress.', 'label': 8},
    {'text': 'I am exhausted from the workload.', 'label': 8},
    
    # proud (20 samples)
    {'text': 'I am proud of my accomplishments.', 'label': 9},
    {'text': 'I feel proud of the results.', 'label': 9},
    {'text': 'This makes me feel proud.', 'label': 9},
    {'text': 'I am feeling proud today.', 'label': 9},
    {'text': 'I feel proud and accomplished.', 'label': 9},
    {'text': 'This gives me pride.', 'label': 9},
    {'text': 'I am proud of my achievements.', 'label': 9},
    {'text': 'I feel proud and satisfied.', 'label': 9},
    {'text': 'This brings me pride.', 'label': 9},
    {'text': 'I am proud of my progress.', 'label': 9},
    {'text': 'I feel proud and confident.', 'label': 9},
    {'text': 'This creates pride in me.', 'label': 9},
    {'text': 'I am accomplished in my work.', 'label': 9},
    {'text': 'This makes me feel satisfied.', 'label': 9},
    {'text': 'I feel confident about my abilities.', 'label': 9},
    {'text': 'This gives me confidence.', 'label': 9},
    {'text': 'I am pleased with my performance.', 'label': 9},
    {'text': 'I feel satisfied with my work.', 'label': 9},
    {'text': 'This brings me satisfaction.', 'label': 9},
    {'text': 'I am confident in my skills.', 'label': 9},
    
    # sad (20 samples)
    {'text': 'I feel sad about the loss.', 'label': 10},
    {'text': 'I am sad about the situation.', 'label': 10},
    {'text': 'This makes me feel sad.', 'label': 10},
    {'text': 'I am feeling sad today.', 'label': 10},
    {'text': 'I feel sad and down.', 'label': 10},
    {'text': 'This gives me sadness.', 'label': 10},
    {'text': 'I am sad about the outcome.', 'label': 10},
    {'text': 'I feel sad and depressed.', 'label': 10},
    {'text': 'This brings me sadness.', 'label': 10},
    {'text': 'I am sad about the news.', 'label': 10},
    {'text': 'I feel sad and heartbroken.', 'label': 10},
    {'text': 'This creates sadness in me.', 'label': 10},
    {'text': 'I am down about the situation.', 'label': 10},
    {'text': 'This makes me feel depressed.', 'label': 10},
    {'text': 'I feel melancholy about the loss.', 'label': 10},
    {'text': 'This gives me sorrow.', 'label': 10},
    {'text': 'I am blue about the outcome.', 'label': 10},
    {'text': 'I feel heartbroken by the news.', 'label': 10},
    {'text': 'This brings me sorrow.', 'label': 10},
    {'text': 'I am depressed about the situation.', 'label': 10},
    
    # tired (20 samples)
    {'text': 'I am tired from working all day.', 'label': 11},
    {'text': 'I feel tired of the routine.', 'label': 11},
    {'text': 'This makes me feel tired.', 'label': 11},
    {'text': 'I am feeling tired today.', 'label': 11},
    {'text': 'I feel tired and exhausted.', 'label': 11},
    {'text': 'This gives me fatigue.', 'label': 11},
    {'text': 'I am tired of the stress.', 'label': 11},
    {'text': 'I feel tired and worn out.', 'label': 11},
    {'text': 'This brings me fatigue.', 'label': 11},
    {'text': 'I am tired of the pressure.', 'label': 11},
    {'text': 'I feel tired and drained.', 'label': 11},
    {'text': 'This creates fatigue in me.', 'label': 11},
    {'text': 'I am exhausted from the work.', 'label': 11},
    {'text': 'This makes me feel fatigued.', 'label': 11},
    {'text': 'I feel weary from the routine.', 'label': 11},
    {'text': 'This gives me exhaustion.', 'label': 11},
    {'text': 'I am drained from the stress.', 'label': 11},
    {'text': 'I feel worn out from the pressure.', 'label': 11},
    {'text': 'This brings me exhaustion.', 'label': 11},
    {'text': 'I am fatigued from the workload.', 'label': 11}
]

print(f'📊 Base dataset size: {len(base_data)} samples')

# Advanced data augmentation function
def augment_text(text, emotion):
    """Create augmented versions of the text with sophisticated techniques."""
    augmented = []
    
    # Synonym replacement with emotion-specific synonyms
    synonyms = {
        'anxious': ['worried', 'nervous', 'concerned', 'uneasy', 'tense', 'stressed'],
        'calm': ['peaceful', 'serene', 'tranquil', 'relaxed', 'composed', 'centered'],
        'content': ['satisfied', 'fulfilled', 'pleased', 'happy', 'gratified', 'at ease'],
        'excited': ['thrilled', 'enthusiastic', 'eager', 'pumped', 'energized', 'motivated'],
        'frustrated': ['annoyed', 'irritated', 'aggravated', 'bothered', 'upset', 'angry'],
        'grateful': ['thankful', 'appreciative', 'blessed', 'indebted', 'obliged', 'pleased'],
        'happy': ['joyful', 'cheerful', 'delighted', 'pleased', 'glad', 'elated'],
        'hopeful': ['optimistic', 'positive', 'confident', 'assured', 'encouraged', 'upbeat'],
        'overwhelmed': ['stressed', 'burdened', 'swamped', 'flooded', 'exhausted', 'drained'],
        'proud': ['accomplished', 'satisfied', 'confident', 'pleased', 'fulfilled', 'achieved'],
        'sad': ['down', 'depressed', 'melancholy', 'blue', 'heartbroken', 'sorrowful'],
        'tired': ['exhausted', 'fatigued', 'weary', 'drained', 'worn out', 'spent']
    }
    
    # Create variations with synonyms (more sophisticated)
    for synonym in synonyms.get(emotion, [emotion])[:3]:  # Use first 3 synonyms
        new_text = text.replace(emotion, synonym)
        if new_text != text:
            augmented.append({'text': new_text, 'label': emotions.index(emotion)})
    
    # Add intensity variations with more variety
    intensity_words = ['really', 'very', 'extremely', 'quite', 'somewhat', 'incredibly', 'absolutely']
    for intensity in intensity_words[:3]:
        if intensity not in text.lower():
            new_text = f'I am {intensity} {emotion}.'
            augmented.append({'text': new_text, 'label': emotions.index(emotion)})
    
    # Add context variations
    contexts = [
        f'Right now, I feel {emotion}.',
        f'At this moment, I am {emotion}.',
        f'Currently, I feel {emotion}.',
        f'In this situation, I am {emotion}.'
    ]
    for context in contexts[:2]:
        augmented.append({'text': context, 'label': emotions.index(emotion)})
    
    return augmented

# Apply comprehensive augmentation
augmented_data = []
for item in base_data:
    emotion = emotions[item['label']]
    augmented = augment_text(item['text'], emotion)
    augmented_data.extend(augmented)

# Combine base and augmented data
enhanced_data = base_data + augmented_data
print(f'📊 Enhanced dataset size: {len(enhanced_data)} samples')
print(f'📊 Augmentation added: {len(augmented_data)} samples')

# Convert to lists for processing
texts = [item['text'] for item in enhanced_data]
labels = [item['label'] for item in enhanced_data]

print(f'✅ Comprehensive dataset prepared with {len(texts)} samples')

## 🔧 MODEL SETUP WITH ARCHITECTURE FIXES

In [None]:
# Load model and tokenizer
model_name = 'j-hartmann/emotion-english-distilroberta-base'
print(f'🔧 Loading model: {model_name}')

tokenizer = AutoTokenizer.from_pretrained(model_name)

print(f'Original model labels: {AutoModelForSequenceClassification.from_pretrained(model_name).config.num_labels}')
print(f'Original id2label: {AutoModelForSequenceClassification.from_pretrained(model_name).config.id2label}')

# CRITICAL: Create a NEW model with correct configuration from scratch
print('\n🔧 CREATING NEW MODEL WITH CORRECT ARCHITECTURE')
print('=' * 60)

# Create a new model with the correct number of labels
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=len(emotions),  # Set to 12 emotions
    ignore_mismatched_sizes=True  # Important: ignore size mismatches
)

# Configure the model properly
model.config.num_labels = len(emotions)
model.config.id2label = {i: emotion for i, emotion in enumerate(emotions)}
model.config.label2id = {emotion: i for i, emotion in enumerate(emotions)}
model.config.problem_type = 'single_label_classification'

# Verify the configuration
print(f'✅ Model created with {model.config.num_labels} labels')
print(f'✅ New id2label: {model.config.id2label}')
print(f'✅ Classifier output size: {model.classifier.out_proj.out_features}')
print(f'✅ Problem type: {model.config.problem_type}')

# Test the model with a sample input
test_input = tokenizer('I feel happy today', return_tensors='pt', truncation=True, padding=True)
with torch.no_grad():
    test_output = model(**test_input)
    print(f'✅ Test output shape: {test_output.logits.shape}')
    print(f'✅ Expected shape: [1, {len(emotions)}]')
    assert test_output.logits.shape[1] == len(emotions), f'Output shape mismatch: {test_output.logits.shape[1]} != {len(emotions)}'
    print('✅ Model architecture verified!')

# Move model to GPU
if torch.cuda.is_available():
    model = model.to('cuda')
    print('✅ Model moved to GPU')
else:
    print('⚠️ CUDA not available, model will run on CPU')

## 📊 DATA PREPROCESSING AND SPLITTING

In [None]:
print('📊 PREPROCESSING AND SPLITTING DATA')
print('=' * 50)

# Split the data
train_texts, val_texts, train_labels, val_labels = train_test_split(
    texts, labels, test_size=0.2, random_state=42, stratify=labels
)

print(f'📊 Training samples: {len(train_texts)}')
print(f'📊 Validation samples: {len(val_texts)}')

# Create datasets
train_dataset = {'text': train_texts, 'label': train_labels}
val_dataset = {'text': val_texts, 'label': val_labels}

print('✅ Data split and prepared')

## ⚖️ FOCAL LOSS AND CLASS WEIGHTING

In [None]:
print('⚖️ SETTING UP FOCAL LOSS AND CLASS WEIGHTING')
print('=' * 60)

# Calculate class weights
class_weights = compute_class_weight(
    'balanced',
    classes=np.unique(train_labels),
    y=train_labels
)

class_weights_tensor = torch.FloatTensor(class_weights)
if torch.cuda.is_available():
    class_weights_tensor = class_weights_tensor.cuda()

print(f'✅ Class weights calculated: {class_weights}')
print(f'✅ Class weights tensor shape: {class_weights_tensor.shape}')

# Focal Loss implementation
class FocalLoss(torch.nn.Module):
    def __init__(self, alpha=1, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
    
    def forward(self, inputs, targets):
        ce_loss = torch.nn.functional.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1-pt)**self.gamma * ce_loss
        return focal_loss.mean()

print('✅ Focal Loss class defined')

## 🎯 WEIGHTED LOSS TRAINER

In [None]:
print('🎯 CREATING WEIGHTED LOSS TRAINER')
print('=' * 50)

# Custom trainer with focal loss and class weighting
class WeightedLossTrainer(Trainer):
    def __init__(self, focal_alpha=1, focal_gamma=2, class_weights=None, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.focal_alpha = focal_alpha
        self.focal_gamma = focal_gamma
        self.class_weights = class_weights
    
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop('labels')
        outputs = model(**inputs)
        logits = outputs.logits
        
        # Focal Loss
        ce_loss = torch.nn.functional.cross_entropy(logits, labels, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = self.focal_alpha * (1-pt)**self.focal_gamma * ce_loss
        
        # Apply class weights if provided
        if self.class_weights is not None:
            weighted_loss = focal_loss * self.class_weights[labels]
            loss = weighted_loss.mean()
        else:
            loss = focal_loss.mean()
        
        return (loss, outputs) if return_outputs else loss

print('✅ WeightedLossTrainer created with focal loss and class weighting')

## 🔧 DATA PREPROCESSING FUNCTION

In [None]:
print('🔧 SETTING UP DATA PREPROCESSING')
print('=' * 50)

# Preprocessing function
def preprocess_function(examples):
    tokenized = tokenizer(
        examples['text'],
        truncation=True,
        padding='max_length',
        max_length=128,
        return_tensors=None
    )
    if 'label' in examples:
        tokenized['labels'] = examples['label']
    return tokenized

# Apply preprocessing
train_dataset_processed = preprocess_function(train_dataset)
val_dataset_processed = preprocess_function(val_dataset)

# Create data collator
data_collator = DataCollatorWithPadding(
    tokenizer=tokenizer,
    padding=True,
    return_tensors='pt'
)

print('✅ Data preprocessing completed')
print('✅ Data collator created')

## ⚙️ TRAINING ARGUMENTS

In [None]:
print('⚙️ CONFIGURING TRAINING ARGUMENTS')
print('=' * 50)

# Training arguments
training_args = TrainingArguments(
    output_dir='./comprehensive_emotion_model',
    num_train_epochs=5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=100,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    eval_steps=50,
    save_steps=100,
    load_best_model_at_end=True,
    metric_for_best_model='f1',
    greater_is_better=True,
    # Disable wandb if no API key is set
    report_to=None if 'WANDB_API_KEY' not in os.environ else ['wandb']
)

print('✅ Training arguments configured')

## 📊 COMPUTE METRICS FUNCTION

In [None]:
print('📊 SETTING UP COMPUTE METRICS')
print('=' * 50)

# Compute metrics function
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    # Calculate metrics
    f1 = f1_score(labels, predictions, average='weighted')
    accuracy = accuracy_score(labels, predictions)
    precision = precision_score(labels, predictions, average='weighted')
    recall = recall_score(labels, predictions, average='weighted')
    
    return {
        'f1': f1,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall
    }

print('✅ Compute metrics function defined')

## 🚀 TRAINING EXECUTION

In [None]:
# Initialize trainer
trainer = WeightedLossTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset_processed,
    eval_dataset=val_dataset_processed,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    focal_alpha=1,
    focal_gamma=2,
    class_weights=class_weights_tensor
)

print('✅ Trainer initialized')

# Start training
print('🚀 STARTING COMPREHENSIVE TRAINING')
print('=' * 60)
print(f'🎯 Target: 75-85% F1 score')
print(f'📊 Training samples: {len(train_texts)}')
print(f'🧪 Validation samples: {len(val_texts)}')
print(f'⚖️ Using focal loss + class weighting')
print(f'🔧 Model: {model_name}')
print(f'📈 Data augmentation: {len(augmented_data)} samples added')

# Train the model
trainer.train()

print('✅ Training completed successfully!')

## 📊 EVALUATION AND VALIDATION

In [None]:
print('📊 EVALUATING MODEL PERFORMANCE')
print('=' * 50)

# Evaluate the model
eval_results = trainer.evaluate()
print('\n📊 EVALUATION RESULTS:')
print('=' * 30)
for key, value in eval_results.items():
    print(f'{key}: {value:.4f}')

# Detailed classification report
print('\n📋 DETAILED CLASSIFICATION REPORT:')
print('=' * 40)
predictions = trainer.predict(val_dataset_processed)
pred_labels = np.argmax(predictions.predictions, axis=1)
true_labels = val_labels

print(classification_report(true_labels, pred_labels, target_names=emotions))

## 🔍 ADVANCED VALIDATION

In [None]:
print('🔍 ADVANCED VALIDATION AND BIAS ANALYSIS')
print('=' * 60)

# Test on completely unseen examples
unseen_examples = [
    'I am feeling absolutely ecstatic about the promotion!',
    'This situation is making me extremely anxious and worried.',
    'I feel completely overwhelmed by all the responsibilities.',
    'I am so grateful for all the support I received.',
    'This makes me feel incredibly proud of my achievements.',
    'I am feeling quite content with my current situation.',
    'This gives me a lot of hope for the future.',
    'I feel really tired after working all day.',
    'I am sad about the recent loss.',
    'This excites me about the possibilities ahead.'
]

print('\n🧪 TESTING ON UNSEEN EXAMPLES:')
print('=' * 40)

for i, example in enumerate(unseen_examples, 1):
    inputs = tokenizer(example, return_tensors='pt', truncation=True, padding=True)
    if torch.cuda.is_available():
        inputs = {k: v.cuda() for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model(**inputs)
        probabilities = torch.softmax(outputs.logits, dim=1)
        predicted_label = torch.argmax(outputs.logits, dim=1).item()
        confidence = probabilities[0][predicted_label].item()
    
    print(f'{i:2d}. "{example}"')
    print(f'    → Predicted: {emotions[predicted_label]} (confidence: {confidence:.3f})')
    print()

# Bias analysis
print('\n📊 BIAS ANALYSIS:')
print('=' * 30)
print('Checking for prediction bias across emotions...')

# Count predictions per emotion
prediction_counts = {emotion: 0 for emotion in emotions}
for pred in pred_labels:
    prediction_counts[emotions[pred]] += 1

print('\nPrediction distribution:')
for emotion, count in prediction_counts.items():
    percentage = (count / len(pred_labels)) * 100
    print(f'{emotion:12s}: {count:3d} ({percentage:5.1f}%)')

print('\n✅ Advanced validation completed')

## 💾 MODEL SAVING WITH VERIFICATION

In [None]:
print('💾 SAVING MODEL WITH CONFIGURATION VERIFICATION')
print('=' * 60)

# Save the model
model_save_path = './comprehensive_emotion_model_final'
trainer.save_model(model_save_path)
tokenizer.save_pretrained(model_save_path)

print(f'✅ Model saved to: {model_save_path}')

# CRITICAL: Verify the saved configuration
print('\n🔍 VERIFYING SAVED MODEL CONFIGURATION:')
print('=' * 50)

# Load the saved model and check configuration
saved_model = AutoModelForSequenceClassification.from_pretrained(model_save_path)
saved_tokenizer = AutoTokenizer.from_pretrained(model_save_path)

print(f'✅ Saved model labels: {saved_model.config.num_labels}')
print(f'✅ Saved id2label: {saved_model.config.id2label}')
print(f'✅ Saved label2id: {saved_model.config.label2id}')
print(f'✅ Saved problem_type: {saved_model.config.problem_type}')

# Test the saved model
test_input = saved_tokenizer('I feel happy today', return_tensors='pt', truncation=True, padding=True)
with torch.no_grad():
    test_output = saved_model(**test_input)
    predicted_label = torch.argmax(test_output.logits, dim=1).item()
    confidence = torch.softmax(test_output.logits, dim=1)[0][predicted_label].item()

print(f'\n🧪 SAVED MODEL TEST:')
print(f'Input: "I feel happy today"')
print(f'Predicted: {saved_model.config.id2label[predicted_label]} (confidence: {confidence:.3f})')

# Verify configuration persistence
config_correct = (
    saved_model.config.num_labels == len(emotions) and
    saved_model.config.id2label == {i: emotion for i, emotion in enumerate(emotions)} and
    saved_model.config.problem_type == 'single_label_classification'
)

if config_correct:
    print('\n✅ CONFIGURATION PERSISTENCE VERIFIED!')
    print('✅ Model will work correctly in deployment')
    print('✅ No more 8.3% vs 75% discrepancy!')
else:
    print('\n❌ CONFIGURATION PERSISTENCE FAILED!')
    print('❌ Model may have issues in deployment')

print(f'\n🎉 COMPREHENSIVE TRAINING COMPLETED!')
print(f'📁 Model saved to: {model_save_path}')
print(f'📊 Final F1 Score: {eval_results.get("eval_f1", "N/A"):.4f}')
print(f'📊 Final Accuracy: {eval_results.get("eval_accuracy", "N/A"):.4f}')