# Modelv2 Output Visualization

This notebook analyzes and visualizes the outputs from the modelv2 pipeline, including:
- Score distributions and analysis
- Feature weights analysis
- Signal correlation analysis
- Performance metrics
- Time series analysis of scores

## Data Sources:
- Model outputs: `model_outputs/{model_name}/`
- Signal data: `../covenant/SampleCovenantData/`
- Configuration: `config_local.yaml`

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import yaml
import warnings
from datetime import datetime, timedelta
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc, roc_curve
from sklearn.preprocessing import StandardScaler
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

warnings.filterwarnings('ignore')

# Set style for better plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Configure display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

print("Libraries imported successfully!")

## 1. Configuration and Data Loading

In [None]:
# Load configuration
config_path = Path('../config_local.yaml')
with open(config_path, 'r') as file:
    config = yaml.safe_load(file)

print("Configuration loaded:")
for key, value in config.items():
    if isinstance(value, dict):
        print(f"  {key}: {dict(value)}")
    else:
        print(f"  {key}: {value}")

In [None]:
# Define paths
base_path = Path('../')
model_outputs_path = base_path / 'model_outputs'
data_path = Path(config['data_directory'])

# Available models
available_models = [d.name for d in model_outputs_path.iterdir() if d.is_dir()]
print(f"Available models: {available_models}")

# Load model data
model_data = {}
for model_name in available_models:
    model_path = model_outputs_path / model_name
    scores_file = model_path / 'scores.csv'
    weights_file = model_path / 'weights.csv'
    
    if scores_file.exists() and weights_file.exists():
        scores = pd.read_csv(scores_file)
        weights = pd.read_csv(weights_file)
        
        # Convert dates
        scores['signal_date'] = pd.to_datetime(scores['signal_date'])
        
        model_data[model_name] = {
            'scores': scores,
            'weights': weights
        }
        print(f"Loaded {model_name}: {len(scores):,} records")

print(f"\nTotal models loaded: {len(model_data)}")

## 2. Score Analysis and Distribution

In [None]:
def analyze_scores(model_name, scores_df):
    """Comprehensive score analysis"""
    print(f"\n=== {model_name.upper()} SCORE ANALYSIS ===")
    
    # Basic statistics
    print(f"Total records: {len(scores_df):,}")
    print(f"Failed companies: {scores_df['fail'].sum():,}")
    print(f"Non-failed companies: {(scores_df['fail'] == 0).sum():,}")
    print(f"Failure rate: {scores_df['fail'].mean():.3%}")
    
    # Score statistics
    failed_scores = scores_df[scores_df['fail'] == 1]['score']
    non_failed_scores = scores_df[scores_df['fail'] == 0]['score']
    
    print(f"\nScore Statistics:")
    print(f"  Overall - Mean: {scores_df['score'].mean():.4f}, Std: {scores_df['score'].std():.4f}")
    print(f"  Failed - Mean: {failed_scores.mean():.4f}, Std: {failed_scores.std():.4f}")
    print(f"  Non-failed - Mean: {non_failed_scores.mean():.4f}, Std: {non_failed_scores.std():.4f}")
    
    # Separation metric
    separation = (failed_scores.mean() - non_failed_scores.mean()) / np.sqrt((failed_scores.var() + non_failed_scores.var()) / 2)
    print(f"  Separation: {separation:.4f}")
    
    return {
        'failed_scores': failed_scores,
        'non_failed_scores': non_failed_scores,
        'separation': separation
    }

# Analyze all models
score_analysis = {}
for model_name, data in model_data.items():
    score_analysis[model_name] = analyze_scores(model_name, data['scores'])

In [None]:
# Plot score distributions
fig, axes = plt.subplots(1, len(model_data), figsize=(6*len(model_data), 5))
if len(model_data) == 1:
    axes = [axes]

for i, (model_name, data) in enumerate(model_data.items()):
    scores_df = data['scores']
    
    # Plot distributions
    sns.kdeplot(data=scores_df[scores_df['fail'] == 1], x='score', 
               label='Failed', fill=True, common_norm=False, ax=axes[i])
    sns.kdeplot(data=scores_df[scores_df['fail'] == 0], x='score', 
               label='Non-failed', fill=True, common_norm=False, ax=axes[i])
    
    axes[i].set_title(f'{model_name.upper()} Score Distribution')
    axes[i].set_xlabel('Score')
    axes[i].set_ylabel('Density')
    axes[i].legend()
    
    # Add statistics
    failed_mean = scores_df[scores_df['fail'] == 1]['score'].mean()
    non_failed_mean = scores_df[scores_df['fail'] == 0]['score'].mean()
    axes[i].text(0.02, 0.98, f'Failed mean: {failed_mean:.3f}\nNon-failed mean: {non_failed_mean:.3f}', 
                transform=axes[i].transAxes, verticalalignment='top',
                bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

plt.tight_layout()
plt.show()

## 3. Smoothed P(failure|Score) Analysis

In [None]:
def calculate_smoothed_failure_probability(scores_df, window_size=100):
    """Calculate smoothed P(failure|Score) using rolling average"""
    # Sort by score
    df_sorted = scores_df.sort_values('score').reset_index(drop=True)
    
    # Calculate rolling average of failure probability
    rolling_avg = df_sorted['fail'].rolling(window=window_size, center=True).mean()
    
    return df_sorted['score'], rolling_avg

# Plot smoothed failure probability for all models
plt.figure(figsize=(12, 8))

colors = ['blue', 'red', 'green', 'orange', 'purple']
for i, (model_name, data) in enumerate(model_data.items()):
    scores_df = data['scores']
    
    scores, probs = calculate_smoothed_failure_probability(scores_df, window_size=100)
    
    plt.plot(scores, probs, 
            color=colors[i % len(colors)], 
            linewidth=2, 
            label=f'{model_name.upper()}')

plt.xlabel('Score')
plt.ylabel('P(failure)')
plt.title('Smoothed P(failure | Score) - Window Size: 100')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Plot with different window sizes
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
window_sizes = [50, 100, 200]

for i, window_size in enumerate(window_sizes):
    for j, (model_name, data) in enumerate(model_data.items()):
        scores_df = data['scores']
        scores, probs = calculate_smoothed_failure_probability(scores_df, window_size)
        
        axes[i].plot(scores, probs, 
                    color=colors[j % len(colors)], 
                    linewidth=2, 
                    label=f'{model_name.upper()}')
    
    axes[i].set_xlabel('Score')
    axes[i].set_ylabel('P(failure)')
    axes[i].set_title(f'Window Size: {window_size}')
    axes[i].legend()
    axes[i].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 4. Feature Weights Analysis

In [None]:
def analyze_weights(model_name, weights_df):
    """Analyze feature weights"""
    print(f"\n=== {model_name.upper()} WEIGHTS ANALYSIS ===")
    
    weights = weights_df.set_index('Feature')['Weight']
    
    print(f"Total features: {len(weights)}")
    print(f"Positive weights: {(weights > 0).sum()}")
    print(f"Negative weights: {(weights < 0).sum()}")
    print(f"Zero weights: {(weights == 0).sum()}")
    
    print(f"\nWeight Statistics:")
    print(f"  Mean: {weights.mean():.4f}")
    print(f"  Std: {weights.std():.4f}")
    print(f"  Min: {weights.min():.4f}")
    print(f"  Max: {weights.max():.4f}")
    
    # Top positive and negative weights
    print(f"\nTop 5 Positive Weights:")
    for feature, weight in weights.nlargest(5).items():
        print(f"  {feature}: {weight:.4f}")
    
    print(f"\nTop 5 Negative Weights:")
    for feature, weight in weights.nsmallest(5).items():
        print(f"  {feature}: {weight:.4f}")
    
    return weights

# Analyze weights for all models
weights_analysis = {}
for model_name, data in model_data.items():
    weights_analysis[model_name] = analyze_weights(model_name, data['weights'])

In [None]:
# Plot weights comparison
fig, axes = plt.subplots(2, 1, figsize=(12, 10))

# Create weights dataframe for comparison
weights_df = pd.DataFrame()
for model_name, weights in weights_analysis.items():
    weights_df[model_name] = weights

# Heatmap
sns.heatmap(weights_df, annot=True, cmap='RdBu_r', center=0, ax=axes[0])
axes[0].set_title('Feature Weights Comparison (Heatmap)')

# Bar plot for top features
top_features = weights_df.abs().max(axis=1).nlargest(15).index
weights_subset = weights_df.loc[top_features]
weights_subset.plot(kind='bar', ax=axes[1])
axes[1].set_title('Top 15 Feature Weights Comparison')
axes[1].set_xlabel('Features')
axes[1].set_ylabel('Weight')
axes[1].legend()
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## 5. Signal Analysis

In [None]:
def load_and_analyze_signals():
    """Load and analyze signal data"""
    signals_file = data_path / 'binarySignalsPart00.csv'
    
    if not signals_file.exists():
        print(f"Signals file not found: {signals_file}")
        return None
    
    # Load signals
    signals_df = pd.read_csv(signals_file, parse_dates=['signal_date'], date_format='%m/%d/%Y')
    
    # Get feature columns
    feature_cols = [col for col in signals_df.columns if col not in ['companyid', 'signal_date']]
    
    print(f"Signal Analysis:")
    print(f"  Total records: {len(signals_df):,}")
    print(f"  Unique companies: {signals_df['companyid'].nunique():,}")
    print(f"  Date range: {signals_df['signal_date'].min()} to {signals_df['signal_date'].max()}")
    print(f"  Features: {len(feature_cols)}")
    
    # Signal frequency analysis
    signal_freq = signals_df[feature_cols].mean().sort_values(ascending=False)
    
    print(f"\nSignal Frequencies (Top 10):")
    for feature, freq in signal_freq.head(10).items():
        print(f"  {feature}: {freq:.3%}")
    
    return signals_df, feature_cols, signal_freq

# Load signals
signals_data = load_and_analyze_signals()
if signals_data is not None:
    signals_df, feature_cols, signal_freq = signals_data
else:
    signals_df, feature_cols, signal_freq = None, None, None

In [None]:
if signals_df is not None:
    # Plot signal frequencies
    plt.figure(figsize=(12, 6))
    
    plt.subplot(1, 2, 1)
    signal_freq.plot(kind='bar')
    plt.title('Signal Frequencies')
    plt.xlabel('Signals')
    plt.ylabel('Frequency')
    plt.xticks(rotation=45)
    
    plt.subplot(1, 2, 2)
    signal_freq.hist(bins=20, alpha=0.7)
    plt.title('Distribution of Signal Frequencies')
    plt.xlabel('Frequency')
    plt.ylabel('Count')
    
    plt.tight_layout()
    plt.show()
    
    # Correlation analysis
    if len(feature_cols) > 1:
        corr_matrix = signals_df[feature_cols].corr()
        
        plt.figure(figsize=(10, 8))
        sns.heatmap(corr_matrix, annot=False, cmap='coolwarm', center=0)
        plt.title('Signal Correlation Matrix')
        plt.tight_layout()
        plt.show()
        
        # Find highly correlated signals
        high_corr_pairs = []
        for i in range(len(corr_matrix.columns)):
            for j in range(i+1, len(corr_matrix.columns)):
                corr_val = corr_matrix.iloc[i, j]
                if abs(corr_val) > 0.5:
                    high_corr_pairs.append((corr_matrix.columns[i], corr_matrix.columns[j], corr_val))
        
        if high_corr_pairs:
            print("\nHighly Correlated Signal Pairs (|correlation| > 0.5):")
            for pair in sorted(high_corr_pairs, key=lambda x: abs(x[2]), reverse=True):
                print(f"  {pair[0]} - {pair[1]}: {pair[2]:.3f}")

## 6. Performance Metrics

In [None]:
def calculate_performance_metrics(scores_df):
    """Calculate comprehensive performance metrics"""
    # ROC AUC
    roc_auc = roc_auc_score(scores_df['fail'], scores_df['score'])
    
    # PR AUC
    precision, recall, _ = precision_recall_curve(scores_df['fail'], scores_df['score'])
    pr_auc = auc(recall, precision)
    
    # Score statistics
    failed_scores = scores_df[scores_df['fail'] == 1]['score']
    non_failed_scores = scores_df[scores_df['fail'] == 0]['score']
    
    # Separation metric
    separation = (failed_scores.mean() - non_failed_scores.mean()) / np.sqrt((failed_scores.var() + non_failed_scores.var()) / 2)
    
    # Additional metrics
    metrics = {
        'roc_auc': roc_auc,
        'pr_auc': pr_auc,
        'separation': separation,
        'failed_mean': failed_scores.mean(),
        'non_failed_mean': non_failed_scores.mean(),
        'failed_std': failed_scores.std(),
        'non_failed_std': non_failed_scores.std(),
        'score_range': scores_df['score'].max() - scores_df['score'].min(),
        'score_std': scores_df['score'].std()
    }
    
    return metrics

# Calculate metrics for all models
metrics_df = pd.DataFrame()

for model_name, data in model_data.items():
    metrics = calculate_performance_metrics(data['scores'])
    metrics_df[model_name] = metrics

metrics_df = metrics_df.T
print("Performance Metrics:")
print(metrics_df.round(4))

In [None]:
# Plot ROC and PR curves
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# ROC curves
for model_name, data in model_data.items():
    scores_df = data['scores']
    fpr, tpr, _ = roc_curve(scores_df['fail'], scores_df['score'])
    roc_auc = roc_auc_score(scores_df['fail'], scores_df['score'])
    
    axes[0].plot(fpr, tpr, linewidth=2, 
                 label=f'{model_name.upper()} (AUC: {roc_auc:.3f})')

axes[0].plot([0, 1], [0, 1], 'k--', alpha=0.5)
axes[0].set_xlabel('False Positive Rate')
axes[0].set_ylabel('True Positive Rate')
axes[0].set_title('ROC Curves')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# PR curves
for model_name, data in model_data.items():
    scores_df = data['scores']
    precision, recall, _ = precision_recall_curve(scores_df['fail'], scores_df['score'])
    pr_auc = auc(recall, precision)
    
    axes[1].plot(recall, precision, linewidth=2, 
                 label=f'{model_name.upper()} (AUC: {pr_auc:.3f})')

axes[1].set_xlabel('Recall')
axes[1].set_ylabel('Precision')
axes[1].set_title('Precision-Recall Curves')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 7. Time Series Analysis

In [None]:
def analyze_time_series(scores_df):
    """Analyze scores over time"""
    # Add time components
    scores_df = scores_df.copy()
    scores_df['year'] = scores_df['signal_date'].dt.year
    scores_df['month'] = scores_df['signal_date'].dt.month
    scores_df['quarter'] = scores_df['signal_date'].dt.quarter
    
    # Time series analysis
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # 1. Score trends over time
    yearly_stats = scores_df.groupby('year')['score'].agg(['mean', 'std']).reset_index()
    axes[0, 0].plot(yearly_stats['year'], yearly_stats['mean'], marker='o')
    axes[0, 0].fill_between(yearly_stats['year'], 
                           yearly_stats['mean'] - yearly_stats['std'],
                           yearly_stats['mean'] + yearly_stats['std'], alpha=0.3)
    axes[0, 0].set_title('Average Score by Year')
    axes[0, 0].set_xlabel('Year')
    axes[0, 0].set_ylabel('Score')
    axes[0, 0].grid(True, alpha=0.3)
    
    # 2. Failure rate over time
    failure_rate = scores_df.groupby('year')['fail'].mean()
    axes[0, 1].plot(failure_rate.index, failure_rate.values, marker='o', color='red')
    axes[0, 1].set_title('Failure Rate by Year')
    axes[0, 1].set_xlabel('Year')
    axes[0, 1].set_ylabel('Failure Rate')
    axes[0, 1].grid(True, alpha=0.3)
    
    # 3. Score distribution by quarter
    quarterly_stats = scores_df.groupby('quarter')['score'].mean()
    axes[1, 0].bar(quarterly_stats.index, quarterly_stats.values)
    axes[1, 0].set_title('Average Score by Quarter')
    axes[1, 0].set_xlabel('Quarter')
    axes[1, 0].set_ylabel('Average Score')
    axes[1, 0].grid(True, alpha=0.3)
    
    # 4. Score vs failure rate scatter
    yearly_score_fail = scores_df.groupby('year').agg({
        'score': 'mean',
        'fail': 'mean'
    }).reset_index()
    
    axes[1, 1].scatter(yearly_score_fail['score'], yearly_score_fail['fail'])
    for _, row in yearly_score_fail.iterrows():
        axes[1, 1].annotate(str(int(row['year'])), 
                           (row['score'], row['fail']),
                           xytext=(5, 5), textcoords='offset points')
    axes[1, 1].set_xlabel('Average Score')
    axes[1, 1].set_ylabel('Failure Rate')
    axes[1, 1].set_title('Score vs Failure Rate by Year')
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    return yearly_stats, failure_rate

# Analyze time series for each model
for model_name, data in model_data.items():
    print(f"\n=== {model_name.upper()} TIME SERIES ANALYSIS ===")
    yearly_stats, failure_rate = analyze_time_series(data['scores'])

## 8. Summary and Insights

In [None]:
print("=== MODELV2 VISUALIZATION SUMMARY ===\n")

# Model comparison summary
print("Model Performance Summary:")
for model_name in model_data.keys():
    if model_name in metrics_df.index:
        print(f"\n{model_name.upper()}:")
        print(f"  ROC AUC: {metrics_df.loc[model_name, 'roc_auc']:.3f}")
        print(f"  PR AUC: {metrics_df.loc[model_name, 'pr_auc']:.3f}")
        print(f"  Separation: {metrics_df.loc[model_name, 'separation']:.3f}")
        print(f"  Score Range: {metrics_df.loc[model_name, 'score_range']:.3f}")

print("\n=== KEY INSIGHTS ===")
print("1. The smoothed P(failure|Score) curves show the relationship between model scores and failure probability")
print("2. Higher separation between failed and non-failed distributions indicates better model performance")
print("3. ROC AUC and PR AUC provide quantitative measures of model discrimination ability")
print("4. Feature weights reveal which signals are most important for each model variant")
print("5. Time series analysis shows how model performance varies over time")
print("6. Signal correlation analysis helps understand feature relationships and potential redundancy")