# Results Analysis & Interactive Demo

**Final verification of all project goals**

In [None]:
!pip install -q transformers datasets peft wandb matplotlib seaborn pandas

In [None]:
import sys; sys.path.append('../')
import torch, wandb, json
import matplotlib.pyplot as plt
import seaborn as sns
from src.model.base import load_base_model, load_tokenizer
from src.utils.metrics import MetricsTracker
from src.eval.persona import PersonaEvaluator

In [None]:
wandb.init(project='persona-chatbot-rlhf', name='analysis-demo', tags=['demo'])
print(f'GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU"}')

## 1. Load Best Model (RLHF)

In [None]:
model = load_base_model({'name': '../models/rlhf/checkpoint-final', 'device_map': 'auto'})
tokenizer = load_tokenizer({'name': '../models/rlhf/checkpoint-final'})
model.eval()
print('✅ RLHF model loaded')

## 2. Verify All Project Goals

In [None]:
# Load training summary
with open('../models/sft/summary.json', 'r') as f:
    sft_summary = json.load(f)

# Load evaluation results
import pandas as pd
eval_results = pd.read_csv('../outputs/evaluation_results.csv', index_col=0)

print('🎯 PROJECT GOALS VERIFICATION')
print('=' * 60)

# Goal 1: Cost Reduction (75-80%)
cost_savings = sft_summary['savings']['cost_%']
cost_met = cost_savings >= 75
print(f'1. Cost Reduction:')
print(f'   Target: 75-80%')
print(f'   Achieved: {cost_savings:.1f}%')
print(f'   Status: {"✅ ACHIEVED" if cost_met else "❌ NOT MET"}')

# Goal 2: Time Reduction (60-70%)
time_savings = sft_summary['savings']['time_%']
time_met = time_savings >= 60
print(f'\n2. Time Reduction:')
print(f'   Target: 60-70%')
print(f'   Achieved: {time_savings:.1f}%')
print(f'   Status: {"✅ ACHIEVED" if time_met else "❌ NOT MET"}')

# Goal 3: Persona Consistency (85%+)
persona_consistency = eval_results.loc['rlhf', 'persona_consistency']
persona_met = persona_consistency >= 0.85
print(f'\n3. Persona Consistency:')
print(f'   Target: 85%+')
print(f'   Achieved: {persona_consistency:.1%}')
print(f'   Status: {"✅ ACHIEVED" if persona_met else "❌ NOT MET"}')

# Overall
all_met = cost_met and time_met and persona_met
print(f'\n{"=" * 60}')
print(f'OVERALL: {"✅ ALL GOALS ACHIEVED" if all_met else "⚠️ SOME GOALS NOT MET"}')
print(f'{"=" * 60}')

wandb.log({'goal_cost_met': cost_met, 'goal_time_met': time_met, 'goal_persona_met': persona_met, 'all_goals_met': all_met})

## 3. Cost-Benefit Summary

In [None]:
# Visualize savings
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Cost & Time Savings
savings_data = {'Cost Savings': cost_savings, 'Time Savings': time_savings}
axes[0].bar(savings_data.keys(), savings_data.values(), color=['green', 'blue'])
axes[0].axhline(y=75, color='r', linestyle='--', label='Cost Target (75%)')
axes[0].axhline(y=60, color='orange', linestyle='--', label='Time Target (60%)')
axes[0].set_ylabel('Savings (%)')
axes[0].set_title('LoRA Efficiency Gains')
axes[0].legend()
axes[0].set_ylim(0, 100)

# Model Comparison
models = ['Baseline', 'SFT', 'RLHF']
scores = [eval_results.loc['baseline', 'persona_consistency'], 
          eval_results.loc['sft', 'persona_consistency'],
          eval_results.loc['rlhf', 'persona_consistency']]
axes[1].bar(models, scores, color=['gray', 'orange', 'green'])
axes[1].axhline(y=0.85, color='r', linestyle='--', label='Target (85%)')
axes[1].set_ylabel('Persona Consistency')
axes[1].set_title('Model Performance')
axes[1].legend()
axes[1].set_ylim(0, 1)

plt.tight_layout()
plt.savefig('../outputs/project_goals_summary.png', dpi=300)
wandb.log({'goals_summary': wandb.Image(plt)})
plt.show()
print('✅ Visualization saved')

## 4. Interactive Chatbot Demo

In [None]:
def chat(persona_traits, user_message, history=[]):
    """Chat with persona-consistent bot"""
    # Format prompt
    persona_text = ' | '.join(persona_traits)
    context_str = ' [SEP] '.join(history[-3:]) if history else ''
    prompt = f'[PERSONA] {persona_text} [DIALOGUE] {context_str} User: {user_message} [RESPONSE]'
    
    # Generate
    inputs = tokenizer(prompt, return_tensors='pt').to(model.device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=100, do_sample=True, temperature=0.9, top_p=0.9, pad_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True)
    
    return response.strip()

print('✅ Chat function ready')

In [None]:
# Demo conversation
persona = ['I am a software engineer', 'I love hiking', 'I have two dogs']

print('🤖 CHATBOT DEMO')
print(f'Persona: {", ".join(persona)}')
print('=' * 70)

conversation = []
questions = [
    'Hi! What do you like to do in your free time?',
    'That sounds fun! What kind of dogs do you have?',
    'Do you take them hiking with you?'
]

persona_eval = PersonaEvaluator()

for q in questions:
    print(f'\nUser: {q}')
    response = chat(persona, q, conversation)
    print(f'Bot: {response}')
    
    # Check consistency
    consistency = persona_eval.calculate_consistency_score(response, persona)
    print(f'Consistency: {consistency:.1%}')
    
    conversation.append(f'User: {q}')
    conversation.append(f'Bot: {response}')

print('\n' + '=' * 70)
print('Demo complete!')

In [None]:
wandb.finish()
print('\n✅ PROJECT COMPLETE!')
print('All notebooks executed successfully.')