# Comprehensive Analysis and Results

Este notebook realiza a análise final comparando todas as abordagens: Baseline, Differential Privacy e Federated Learning.

## O que este notebook faz:
1. Carrega todos os resultados (baseline, DP, FL)
2. Cria visualizações comparativas
3. Analisa trade-offs entre privacidade e performance
4. Gera tabelas de resultados
5. Cria dashboard final
6. Salva relatório completo

**Pré-requisitos**: Execute todos os notebooks anteriores (00-05).


In [None]:
# ============================================================================
# STEP 1: Load All Results
# ============================================================================

print("="*70)
print("COMPREHENSIVE ANALYSIS")
print("="*70)

# Import modules
from src.evaluation.metrics import load_evaluation_results, compare_models, create_results_summary
from src.evaluation.visualization import (
    plot_tradeoff_curve, plot_comparison_bars, plot_privacy_analysis,
    create_summary_dashboard
)
import pandas as pd
import numpy as np
import os

# Define paths
RESULTS_PATH = '/content/drive/MyDrive/mhealth-data/results'

print("Loading all results...")

# Load baseline results
baseline_results = {}
if os.path.exists(f'{RESULTS_PATH}/baseline_results.json'):
    baseline_results = load_evaluation_results(f'{RESULTS_PATH}/baseline_results.json')
    print(f"✅ Baseline results loaded: {len(baseline_results)} models")

# Load DP results
dp_results = {}
if os.path.exists(f'{RESULTS_PATH}/dp_results.json'):
    dp_results = load_evaluation_results(f'{RESULTS_PATH}/dp_results.json')
    print(f"✅ DP results loaded: {len(dp_results)} models")

# Load FL results
fl_results = {}
if os.path.exists(f'{RESULTS_PATH}/fl_results.json'):
    fl_results = load_evaluation_results(f'{RESULTS_PATH}/fl_results.json')
    print(f"✅ FL results loaded: {len(fl_results)} models")

# Combine all results
all_results = {}
all_results.update(baseline_results)
all_results.update(dp_results)
all_results.update(fl_results)

print(f"\nTotal models analyzed: {len(all_results)}")
print(f"Techniques: {set([results.get('privacy_technique', 'Unknown') for results in all_results.values()])}")


In [None]:
# ============================================================================
# STEP 2: Create Results Summary Table
# ============================================================================

print("\n" + "="*70)
print("CREATING RESULTS SUMMARY")
print("="*70)

if all_results:
    # Create comprehensive summary
    summary_df = create_results_summary(all_results, f'{RESULTS_PATH}/comprehensive_results_summary.csv')
    
    print("Results Summary Table:")
    print(summary_df.to_string(index=False))
    
    # Save detailed summary
    summary_df.to_csv(f'{RESULTS_PATH}/detailed_results_summary.csv', index=False)
    print(f"\n✅ Detailed summary saved to: {RESULTS_PATH}/detailed_results_summary.csv")
    
    # Create comparison by technique
    print("\n" + "-"*50)
    print("COMPARISON BY TECHNIQUE")
    print("-"*50)
    
    technique_comparison = summary_df.groupby('Privacy_Technique').agg({
        'Accuracy': ['mean', 'std'],
        'F1-Score': ['mean', 'std'],
        'Model': 'count'
    }).round(4)
    
    print(technique_comparison)
    
else:
    print("❌ No results found. Please run training notebooks first.")


In [None]:
# ============================================================================
# STEP 3: Create Comprehensive Visualizations
# ============================================================================

print("\n" + "="*70)
print("CREATING COMPREHENSIVE VISUALIZATIONS")
print("="*70)

if all_results:
    # 1. Privacy vs. Performance Trade-off Analysis
    print("Creating privacy analysis...")
    plot_privacy_analysis(
        all_results,
        save_path=f'{RESULTS_PATH}/privacy_analysis_comprehensive.png',
        title='Comprehensive Privacy Analysis'
    )
    
    # 2. Model Comparison Bars
    print("Creating model comparison...")
    plot_comparison_bars(
        all_results,
        metrics=['accuracy', 'f1_score'],
        save_path=f'{RESULTS_PATH}/model_comparison_comprehensive.png',
        title='Comprehensive Model Comparison'
    )
    
    # 3. Trade-off Curves for DP
    dp_models = {k: v for k, v in all_results.items() if v.get('privacy_technique') == 'DP'}
    if dp_models:
        print("Creating DP trade-off curves...")
        plot_tradeoff_curve(
            dp_models,
            metric='accuracy',
            privacy_param='epsilon',
            save_path=f'{RESULTS_PATH}/dp_tradeoff_comprehensive.png',
            title='Differential Privacy: Privacy vs. Performance Trade-off'
        )
    
    # 4. Summary Dashboard
    print("Creating summary dashboard...")
    create_summary_dashboard(
        all_results,
        save_path=f'{RESULTS_PATH}/summary_dashboard.png'
    )
    
    print("✅ All visualizations created successfully!")
    
else:
    print("❌ No results to visualize")


In [None]:
# ============================================================================
# STEP 4: Statistical Analysis
# ============================================================================

print("\n" + "="*70)
print("STATISTICAL ANALYSIS")
print("="*70)

if all_results:
    # Extract baseline results for comparison
    baseline_models = {k: v for k, v in all_results.items() if v.get('privacy_technique') == 'None'}
    dp_models = {k: v for k, v in all_results.items() if v.get('privacy_technique') == 'DP'}
    fl_models = {k: v for k, v in all_results.items() if v.get('privacy_technique') == 'FL'}
    
    print("Statistical Analysis Results:")
    print("-" * 50)
    
    # Analyze each dataset
    for dataset in ['sleep_edf', 'wesad']:
        print(f"\n{dataset.upper()} Dataset Analysis:")
        
        # Find baseline for this dataset
        dataset_baseline = None
        for model_name, results in baseline_models.items():
            if results.get('dataset') == dataset:
                dataset_baseline = results
                break
        
        if dataset_baseline:
            baseline_acc = dataset_baseline['metrics']['accuracy']
            baseline_f1 = dataset_baseline['metrics']['f1_score']
            
            print(f"  Baseline: Accuracy={baseline_acc:.4f}, F1={baseline_f1:.4f}")
            
            # Compare DP models
            dataset_dp = {k: v for k, v in dp_models.items() if v.get('dataset') == dataset}
            if dataset_dp:
                print(f"  DP Models:")
                for model_name, results in dataset_dp.items():
                    acc = results['metrics']['accuracy']
                    f1 = results['metrics']['f1_score']
                    epsilon = results.get('epsilon', 'N/A')
                    acc_degradation = baseline_acc - acc
                    f1_degradation = baseline_f1 - f1
                    print(f"    {model_name}: Acc={acc:.4f} (-{acc_degradation:.4f}), F1={f1:.4f} (-{f1_degradation:.4f}), ε={epsilon}")
            
            # Compare FL models
            dataset_fl = {k: v for k, v in fl_models.items() if v.get('dataset') == dataset}
            if dataset_fl:
                print(f"  FL Models:")
                for model_name, results in dataset_fl.items():
                    acc = results['metrics']['accuracy']
                    f1 = results['metrics']['f1_score']
                    n_clients = results.get('n_clients', 'N/A')
                    acc_degradation = baseline_acc - acc
                    f1_degradation = baseline_f1 - f1
                    print(f"    {model_name}: Acc={acc:.4f} (-{acc_degradation:.4f}), F1={f1:.4f} (-{f1_degradation:.4f}), Clients={n_clients}")
    
    print("\n✅ Statistical analysis completed!")
    
else:
    print("❌ No results for statistical analysis")


In [None]:
# ============================================================================
# STEP 5: Generate Final Report
# ============================================================================

print("\n" + "="*70)
print("GENERATING FINAL REPORT")
print("="*70)

# Create final report
report = {
    'project': 'Privacy-Preserving Health Data Analysis',
    'datasets': ['Sleep-EDF', 'WESAD'],
    'techniques': ['Baseline', 'Differential Privacy', 'Federated Learning'],
    'total_models': len(all_results),
    'summary': summary_df.to_dict('records') if 'summary_df' in locals() else [],
    'key_findings': []
}

# Add key findings
if all_results:
    # Find best performing models
    best_accuracy = max(all_results.items(), key=lambda x: x[1]['metrics']['accuracy'])
    best_f1 = max(all_results.items(), key=lambda x: x[1]['metrics']['f1_score'])
    
    report['key_findings'].extend([
        f"Best Accuracy: {best_accuracy[0]} ({best_accuracy[1]['metrics']['accuracy']:.4f})",
        f"Best F1-Score: {best_f1[0]} ({best_f1[1]['metrics']['f1_score']:.4f})"
    ])
    
    # Analyze privacy trade-offs
    if dp_models:
        dp_accuracies = [results['metrics']['accuracy'] for results in dp_models.values()]
        avg_dp_accuracy = np.mean(dp_accuracies)
        report['key_findings'].append(f"Average DP Accuracy: {avg_dp_accuracy:.4f}")
    
    if fl_models:
        fl_accuracies = [results['metrics']['accuracy'] for results in fl_models.values()]
        avg_fl_accuracy = np.mean(fl_accuracies)
        report['key_findings'].append(f"Average FL Accuracy: {avg_fl_accuracy:.4f}")

# Save final report
import json
with open(f'{RESULTS_PATH}/final_report.json', 'w') as f:
    json.dump(report, f, indent=2)

print("Final Report Generated:")
print("-" * 30)
print(f"Project: {report['project']}")
print(f"Datasets: {', '.join(report['datasets'])}")
print(f"Techniques: {', '.join(report['techniques'])}")
print(f"Total Models: {report['total_models']}")
print("\nKey Findings:")
for finding in report['key_findings']:
    print(f"  • {finding}")

print(f"\n✅ Final report saved to: {RESULTS_PATH}/final_report.json")


In [None]:
# ============================================================================
# STEP 6: Project Completion Summary
# ============================================================================

print("\n" + "="*70)
print("PROJECT COMPLETION SUMMARY")
print("="*70)

print("🎉 Privacy-Preserving Health Data Analysis Project Complete!")
print("\nWhat was accomplished:")
print("✅ Preprocessed Sleep-EDF and WESAD datasets")
print("✅ Trained baseline LSTM models")
print("✅ Implemented Differential Privacy with multiple epsilon values")
print("✅ Implemented Federated Learning with multiple client configurations")
print("✅ Comprehensive evaluation and comparison")
print("✅ Statistical analysis of privacy-performance trade-offs")
print("✅ Generated visualizations and final report")

print(f"\nResults saved in: {RESULTS_PATH}")
print("Files generated:")
print("  • comprehensive_results_summary.csv")
print("  • detailed_results_summary.csv")
print("  • privacy_analysis_comprehensive.png")
print("  • model_comparison_comprehensive.png")
print("  • dp_tradeoff_comprehensive.png")
print("  • summary_dashboard.png")
print("  • final_report.json")

print("\nNext steps for your thesis:")
print("1. Review the results and visualizations")
print("2. Write the methodology section using the implemented code")
print("3. Analyze the trade-offs between privacy and performance")
print("4. Discuss implications for mobile health applications")
print("5. Create conclusions and recommendations")

print("\n🚀 Your project is ready for thesis writing!")
print("All code is modular and well-documented for reproducibility.")
