# Gating Model - Expert Ensemble
Reference: how-can-we-prevent-road-rage (merging outputs)

This notebook implements the gating model that combines outputs from all expert models.

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import VotingRegressor, StackingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

print("Gating Model - Expert Ensemble - Ready for implementation")

## Expert Model Integration

Combine outputs from:
1. Behavior Expert
2. Geographic Expert
3. Contextual Expert

In [None]:
class ExpertEnsemble:
    """
    Ensemble model combining expert outputs
    """
    
    def __init__(self):
        self.expert_weights = {
            'behavior': 0.4,
            'geographic': 0.3,
            'contextual': 0.3
        }
        
        self.risk_thresholds = {
            'low': 30,
            'moderate': 60,
            'high': 80
        }
    
    def combine_expert_scores(self, behavior_score, geo_risk, context_risk):
        """
        Weighted combination of expert scores
        """
        # Convert scores to same scale (risk scores)
        behavior_risk = 100 - behavior_score  # Invert behavior score
        
        # Weighted average
        combined_risk = (
            behavior_risk * self.expert_weights['behavior'] +
            geo_risk * self.expert_weights['geographic'] +
            context_risk * self.expert_weights['contextual']
        )
        
        return {
            'combined_risk': combined_risk,
            'risk_category': self._categorize_risk(combined_risk),
            'expert_contributions': {
                'behavior': behavior_risk,
                'geographic': geo_risk,
                'contextual': context_risk
            }
        }
    
    def _categorize_risk(self, risk_score):
        """Categorize risk level"""
        if risk_score < self.risk_thresholds['low']:
            return "Low Risk"
        elif risk_score < self.risk_thresholds['moderate']:
            return "Moderate Risk"
        elif risk_score < self.risk_thresholds['high']:
            return "High Risk"
        else:
            return "Very High Risk"

# Test the ensemble
ensemble = ExpertEnsemble()
test_result = ensemble.combine_expert_scores(
    behavior_score=85,
    geo_risk=45,
    context_risk=55
)
print(f"Ensemble test result: {test_result}")

## Premium Calculation

Convert risk scores to insurance premium adjustments

In [None]:
def calculate_premium_adjustment(risk_score, base_premium=1000):
    """
    Calculate insurance premium based on combined risk score
    """
    # Premium adjustment curve
    if risk_score < 30:
        adjustment_factor = 0.8  # 20% discount
        tier = "Preferred"
    elif risk_score < 50:
        adjustment_factor = 0.9  # 10% discount
        tier = "Standard Plus"
    elif risk_score < 70:
        adjustment_factor = 1.0  # Standard rate
        tier = "Standard"
    elif risk_score < 85:
        adjustment_factor = 1.2  # 20% surcharge
        tier = "Substandard"
    else:
        adjustment_factor = 1.5  # 50% surcharge
        tier = "High Risk"
    
    adjusted_premium = base_premium * adjustment_factor
    savings = base_premium - adjusted_premium
    
    return {
        'base_premium': base_premium,
        'adjusted_premium': adjusted_premium,
        'adjustment_factor': adjustment_factor,
        'savings': savings,
        'tier': tier
    }

# Test premium calculation
premium_result = calculate_premium_adjustment(test_result['combined_risk'])
print(f"Premium calculation: {premium_result}")

## Model Performance Analysis

Analyze expert model contributions and performance

In [None]:
def analyze_expert_contributions(expert_scores_df):
    """
    Analyze how each expert model contributes to final scores
    """
    # Calculate correlations between expert scores
    correlations = expert_scores_df[['behavior', 'geographic', 'contextual']].corr()
    
    # Feature importance (simplified)
    importance = {
        'behavior': expert_scores_df['behavior'].std(),
        'geographic': expert_scores_df['geographic'].std(),
        'contextual': expert_scores_df['contextual'].std()
    }
    
    return {
        'correlations': correlations,
        'variability': importance
    }

# Create sample data for analysis
sample_data = pd.DataFrame({
    'behavior': np.random.normal(75, 15, 100),
    'geographic': np.random.normal(50, 20, 100),
    'contextual': np.random.normal(45, 18, 100)
})

analysis_result = analyze_expert_contributions(sample_data)
print("Expert contribution analysis completed")
print(f"Correlations:\n{analysis_result['correlations']}")

## Visualization

Visualize expert model outputs and ensemble results

In [None]:
def plot_expert_distributions(expert_scores_df):
    """
    Plot distributions of expert model scores
    """
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    
    # Individual expert distributions
    expert_scores_df['behavior'].hist(ax=axes[0,0], bins=20, alpha=0.7)
    axes[0,0].set_title('Behavior Scores')
    
    expert_scores_df['geographic'].hist(ax=axes[0,1], bins=20, alpha=0.7)
    axes[0,1].set_title('Geographic Risk Scores')
    
    expert_scores_df['contextual'].hist(ax=axes[1,0], bins=20, alpha=0.7)
    axes[1,0].set_title('Contextual Risk Scores')
    
    # Correlation heatmap
    sns.heatmap(expert_scores_df[['behavior', 'geographic', 'contextual']].corr(), 
                annot=True, ax=axes[1,1], cmap='coolwarm', center=0)
    axes[1,1].set_title('Expert Score Correlations')
    
    plt.tight_layout()
    plt.show()
    
    print("Expert distribution plots created")

# Create visualization
plot_expert_distributions(sample_data)

## Model Optimization

Optimize expert weights based on performance data

In [None]:
def optimize_expert_weights(expert_scores, actual_claims):
    """
    Optimize expert weights based on claim prediction accuracy
    """
    # This would be implemented with actual claims data
    # Using optimization algorithms to find best weights
    
    from scipy.optimize import minimize
    
    def objective(weights):
        # Calculate combined scores with given weights
        combined_scores = (
            expert_scores['behavior'] * weights[0] +
            expert_scores['geographic'] * weights[1] +
            expert_scores['contextual'] * weights[2]
        )
        
        # Return negative correlation (to maximize)
        correlation = np.corrcoef(combined_scores, actual_claims)[0, 1]
        return -correlation if not np.isnan(correlation) else 1
    
    # Constraints: weights sum to 1
    constraints = {'type': 'eq', 'fun': lambda x: sum(x) - 1}
    bounds = [(0, 1), (0, 1), (0, 1)]
    
    # Initial weights
    initial_weights = [0.4, 0.3, 0.3]
    
    # Note: This is a placeholder - would use real claims data
    print("Weight optimization framework ready (requires real claims data)")
    
    return initial_weights

# Test optimization framework
optimized_weights = optimize_expert_weights(sample_data, np.random.random(100))
print(f"Optimized weights: {optimized_weights}")

## Comprehensive CSV-Based Gating Implementation

Implementation of production-ready gating mechanism that combines CSV outputs from all three expert models

In [None]:
class ComprehensiveGatingMechanism:
    """
    Production-ready gating mechanism that combines CSV outputs from expert models
    """
    
    def __init__(self, expert_weights=None, feature_importance_weights=None):
        # Default expert weights based on domain knowledge
        self.expert_weights = expert_weights or {
            'behavior': 0.45,      # Highest weight - most predictive
            'geographic': 0.30,    # Medium weight - location matters
            'contextual': 0.25     # Lower weight - situational factors
        }
        
        # Feature importance weights for final scoring
        self.feature_importance_weights = feature_importance_weights or {
            'acceleration_features': 0.25,
            'driving_patterns': 0.20,
            'location_risk': 0.20,
            'temporal_factors': 0.15,
            'environmental_factors': 0.10,
            'vehicle_dynamics': 0.10
        }
        
        # Risk thresholds for categorization
        self.risk_thresholds = {
            'very_low': (0, 20),
            'low': (20, 40),
            'moderate': (40, 60),
            'high': (60, 80),
            'very_high': (80, 100)
        }
        
        # Premium adjustment factors
        self.premium_factors = {
            'very_low': 0.75,    # 25% discount
            'low': 0.85,         # 15% discount
            'moderate': 1.0,     # Standard rate
            'high': 1.25,        # 25% surcharge
            'very_high': 1.60    # 60% surcharge
        }
    
    def load_expert_outputs(self, behavior_csv=None, geographic_csv=None, contextual_csv=None):
        """
        Load CSV outputs from all three expert models
        """
        expert_data = {}
        
        # Load behavior expert output
        if behavior_csv:
            try:
                behavior_df = pd.read_csv(behavior_csv)
                expert_data['behavior'] = behavior_df
                print(f"✅ Behavior model data loaded: {len(behavior_df)} records")
            except Exception as e:
                print(f"❌ Error loading behavior data: {e}")
                
        # Load geographic expert output
        if geographic_csv:
            try:
                geographic_df = pd.read_csv(geographic_csv)
                expert_data['geographic'] = geographic_df
                print(f"✅ Geographic model data loaded: {len(geographic_df)} records")
            except Exception as e:
                print(f"❌ Error loading geographic data: {e}")
                
        # Load contextual expert output
        if contextual_csv:
            try:
                contextual_df = pd.read_csv(contextual_csv)
                expert_data['contextual'] = contextual_df
                print(f"✅ Contextual model data loaded: {len(contextual_df)} records")
            except Exception as e:
                print(f"❌ Error loading contextual data: {e}")
        
        return expert_data
    
    def create_synthetic_expert_data(self, n_drivers=1000):
        """
        Create synthetic expert model outputs for demonstration
        """
        np.random.seed(42)
        
        # Generate driver IDs
        driver_ids = range(1, n_drivers + 1)
        
        # Behavior Expert Output (from comprehensive_telematics_analysis.ipynb structure)
        behavior_data = pd.DataFrame({
            'driver_id': driver_ids,
            'behavior_score': np.random.normal(75, 15, n_drivers).clip(0, 100),
            'base_score': np.random.normal(70, 12, n_drivers).clip(0, 100),
            'feature_adjustment': np.random.normal(5, 8, n_drivers),
            'model_confidence': np.random.uniform(0.6, 0.95, n_drivers),
            'predicted_class': np.random.choice(['SLOW', 'NORMAL', 'AGGRESSIVE'], n_drivers, p=[0.2, 0.6, 0.2]),
            'risk_category': np.random.choice(['LOW_RISK', 'MODERATE_RISK', 'HIGH_RISK', 'VERY_HIGH_RISK'], 
                                           n_drivers, p=[0.3, 0.4, 0.25, 0.05]),
            'acceleration_std': np.random.uniform(0.1, 2.5, n_drivers),
            'braking_intensity': np.random.uniform(0.5, 3.0, n_drivers),
            'speed_variance': np.random.uniform(5, 25, n_drivers),
            'phone_usage_rate': np.random.uniform(0, 0.3, n_drivers)
        })
        
        # Geographic Expert Output (from geographic_risk_scoring_model.ipynb structure)
        geographic_data = pd.DataFrame({
            'driver_id': driver_ids,
            'geographic_risk_score': np.random.normal(45, 20, n_drivers).clip(0, 100),
            'latitude': np.random.uniform(51.4, 51.6, n_drivers),  # London area
            'longitude': np.random.uniform(-0.3, 0.1, n_drivers),
            'accident_frequency': np.random.poisson(2, n_drivers),
            'road_condition_risk': np.random.uniform(20, 80, n_drivers),
            'environmental_risk_score': np.random.uniform(10, 70, n_drivers),
            'proximity_risk': np.random.uniform(15, 85, n_drivers),
            'population_density_risk': np.random.uniform(25, 75, n_drivers),
            'risk_category': np.random.choice(['Very Low', 'Low', 'Medium', 'High', 'Very High'], 
                                           n_drivers, p=[0.1, 0.25, 0.4, 0.2, 0.05])
        })
        
        # Contextual Expert Output (from contextual_risk_scoring_model.ipynb structure)
        contextual_data = pd.DataFrame({
            'driver_id': driver_ids,
            'contextual_risk_score': np.random.normal(40, 18, n_drivers).clip(0, 100),
            'temporal_risk': np.random.uniform(20, 80, n_drivers),
            'weather_risk': np.random.uniform(10, 90, n_drivers),
            'traffic_risk': np.random.uniform(15, 85, n_drivers),
            'day_of_week_risk': np.random.uniform(25, 75, n_drivers),
            'hour_risk': np.random.uniform(20, 95, n_drivers),
            'seasonal_risk': np.random.uniform(30, 70, n_drivers),
            'holiday_factor': np.random.uniform(0.8, 1.5, n_drivers),
            'rush_hour_indicator': np.random.choice([0, 1], n_drivers, p=[0.7, 0.3])
        })
        
        return {
            'behavior': behavior_data,
            'geographic': geographic_data,
            'contextual': contextual_data
        }
    
    def combine_expert_scores(self, expert_data):
        """
        Advanced combination of expert scores with feature-level integration
        """
        # Ensure all datasets have the same drivers
        common_drivers = set(expert_data['behavior']['driver_id'])
        for expert_name, data in expert_data.items():
            if expert_name != 'behavior':
                common_drivers = common_drivers.intersection(set(data['driver_id']))
        
        print(f"Processing {len(common_drivers)} common drivers across all experts")
        
        # Create comprehensive combined dataset
        combined_results = []
        
        for driver_id in sorted(common_drivers):
            # Extract data for this driver
            behavior_row = expert_data['behavior'][expert_data['behavior']['driver_id'] == driver_id].iloc[0]
            geo_row = expert_data['geographic'][expert_data['geographic']['driver_id'] == driver_id].iloc[0]
            context_row = expert_data['contextual'][expert_data['contextual']['driver_id'] == driver_id].iloc[0]
            
            # Extract individual scores
            behavior_score = behavior_row['behavior_score']
            geo_risk = geo_row['geographic_risk_score']
            context_risk = context_row['contextual_risk_score']
            
            # Convert behavior score to risk scale (invert)
            behavior_risk = 100 - behavior_score
            
            # Calculate weighted ensemble score
            ensemble_risk = (
                behavior_risk * self.expert_weights['behavior'] +
                geo_risk * self.expert_weights['geographic'] +
                context_risk * self.expert_weights['contextual']
            )
            
            # Calculate feature-based adjustments
            feature_adjustment = self._calculate_feature_adjustments(behavior_row, geo_row, context_row)
            
            # Apply feature adjustments
            final_risk_score = np.clip(ensemble_risk + feature_adjustment, 0, 100)
            
            # Calculate confidence score
            confidence_score = self._calculate_confidence(behavior_row, geo_row, context_row)
            
            # Categorize risk
            risk_category = self._categorize_risk(final_risk_score)
            
            # Calculate premium adjustment
            premium_info = self._calculate_premium_adjustment(final_risk_score)
            
            # Compile comprehensive result
            result = {
                'driver_id': int(driver_id),
                'final_risk_score': round(final_risk_score, 2),
                'ensemble_risk_score': round(ensemble_risk, 2),
                'feature_adjustment': round(feature_adjustment, 2),
                'confidence_score': round(confidence_score, 3),
                'risk_category': risk_category,
                'premium_factor': premium_info['factor'],
                'premium_tier': premium_info['tier'],
                
                # Individual expert contributions
                'behavior_risk': round(behavior_risk, 2),
                'geographic_risk': round(geo_risk, 2),
                'contextual_risk': round(context_risk, 2),
                'behavior_score_original': round(behavior_score, 2),
                
                # Key features from each expert
                'model_confidence': round(behavior_row['model_confidence'], 3),
                'predicted_driving_class': behavior_row['predicted_class'],
                'acceleration_std': round(behavior_row['acceleration_std'], 3),
                'braking_intensity': round(behavior_row['braking_intensity'], 3),
                'location_latitude': round(geo_row['latitude'], 6),
                'location_longitude': round(geo_row['longitude'], 6),
                'accident_frequency': int(geo_row['accident_frequency']),
                'road_condition_risk': round(geo_row['road_condition_risk'], 2),
                'temporal_risk': round(context_row['temporal_risk'], 2),
                'weather_risk': round(context_row['weather_risk'], 2),
                'traffic_risk': round(context_row['traffic_risk'], 2),
                'rush_hour_indicator': int(context_row['rush_hour_indicator']),
                
                # Expert weights used
                'weight_behavior': self.expert_weights['behavior'],
                'weight_geographic': self.expert_weights['geographic'],
                'weight_contextual': self.expert_weights['contextual']
            }
            
            combined_results.append(result)
        
        return pd.DataFrame(combined_results)
    
    def _calculate_feature_adjustments(self, behavior_row, geo_row, context_row):
        """
        Calculate feature-based adjustments to the ensemble score
        """
        adjustment = 0
        
        # Behavior-based adjustments
        if behavior_row['acceleration_std'] > 2.0:
            adjustment += 5  # High acceleration variance = higher risk
        if behavior_row['braking_intensity'] > 2.5:
            adjustment += 4  # Harsh braking = higher risk
        if behavior_row['phone_usage_rate'] > 0.2:
            adjustment += 6  # High phone usage = higher risk
            
        # Geographic-based adjustments
        if geo_row['accident_frequency'] > 3:
            adjustment += 3  # High accident area = higher risk
        if geo_row['road_condition_risk'] > 70:
            adjustment += 2  # Poor road conditions = higher risk
            
        # Contextual-based adjustments
        if context_row['weather_risk'] > 75:
            adjustment += 3  # Bad weather conditions = higher risk
        if context_row['rush_hour_indicator'] == 1:
            adjustment += 2  # Rush hour driving = higher risk
        if context_row['holiday_factor'] > 1.3:
            adjustment += 1  # Holiday periods = slightly higher risk
            
        return adjustment
    
    def _calculate_confidence(self, behavior_row, geo_row, context_row):
        """
        Calculate confidence in the ensemble prediction
        """
        # Base confidence from behavior model
        base_confidence = behavior_row['model_confidence']
        
        # Adjust based on data quality and consistency
        confidence_factors = []
        
        # Data completeness factor
        confidence_factors.append(0.9)  # Assume good data completeness
        
        # Geographic data quality
        if geo_row['accident_frequency'] >= 0:  # Valid accident data
            confidence_factors.append(0.95)
        else:
            confidence_factors.append(0.8)
            
        # Contextual data consistency
        if 0 <= context_row['weather_risk'] <= 100:
            confidence_factors.append(0.9)
        else:
            confidence_factors.append(0.7)
            
        # Calculate final confidence
        final_confidence = base_confidence * np.mean(confidence_factors)
        return np.clip(final_confidence, 0, 1)
    
    def _categorize_risk(self, risk_score):
        """Categorize risk level based on score"""
        for category, (min_val, max_val) in self.risk_thresholds.items():
            if min_val <= risk_score < max_val:
                return category.replace('_', ' ').title()
        return "Very High"  # Fallback for edge cases
    
    def _calculate_premium_adjustment(self, risk_score):
        """Calculate premium adjustment based on risk score"""
        category = self._categorize_risk(risk_score).lower().replace(' ', '_')
        factor = self.premium_factors.get(category, 1.0)
        
        tier_mapping = {
            'very_low': 'Preferred Plus',
            'low': 'Preferred',
            'moderate': 'Standard',
            'high': 'Substandard',
            'very_high': 'High Risk'
        }
        
        return {
            'factor': factor,
            'tier': tier_mapping.get(category, 'Standard')
        }

# Initialize the comprehensive gating mechanism
print("Comprehensive Gating Mechanism initialized!")
print("Features:")
print("- CSV-based expert model integration")
print("- Driver ID indexing with natural numbers")
print("- Advanced feature-level combination")
print("- Confidence scoring")
print("- Premium calculation")
print("- Comprehensive feature set export")

: 

In [None]:
# Create and test the comprehensive gating mechanism
gating_mechanism = ComprehensiveGatingMechanism()

# Generate synthetic expert data for demonstration
print("Generating synthetic expert model outputs...")
expert_data = gating_mechanism.create_synthetic_expert_data(n_drivers=500)

print("\nExpert Data Summary:")
for expert_name, data in expert_data.items():
    print(f"  {expert_name.title()} Expert: {len(data)} drivers, {data.shape[1]} features")
    
# Display sample data from each expert
print("\nSample Expert Outputs:")
print("\n1. Behavior Expert Sample:")
print(expert_data['behavior'][['driver_id', 'behavior_score', 'predicted_class', 'risk_category']].head())

print("\n2. Geographic Expert Sample:")
print(expert_data['geographic'][['driver_id', 'geographic_risk_score', 'latitude', 'longitude', 'risk_category']].head())

print("\n3. Contextual Expert Sample:")
print(expert_data['contextual'][['driver_id', 'contextual_risk_score', 'temporal_risk', 'weather_risk']].head())

In [None]:
# Execute the comprehensive gating mechanism
print("Executing Comprehensive Gating Mechanism...")
final_scores_df = gating_mechanism.combine_expert_scores(expert_data)

print(f"\nGating mechanism completed!")
print(f"Generated comprehensive scores for {len(final_scores_df)} drivers")

# Display final results summary
print(f"\nFinal Scoring Summary:")
print(f"  Average Risk Score: {final_scores_df['final_risk_score'].mean():.2f}")
print(f"  Risk Score Range: {final_scores_df['final_risk_score'].min():.2f} - {final_scores_df['final_risk_score'].max():.2f}")
print(f"  Average Confidence: {final_scores_df['confidence_score'].mean():.3f}")

print(f"\nRisk Category Distribution:")
risk_dist = final_scores_df['risk_category'].value_counts()
for category, count in risk_dist.items():
    percentage = (count / len(final_scores_df)) * 100
    print(f"  {category}: {count} drivers ({percentage:.1f}%)")

print(f"\nPremium Tier Distribution:")
tier_dist = final_scores_df['premium_tier'].value_counts()
for tier, count in tier_dist.items():
    percentage = (count / len(final_scores_df)) * 100
    print(f"  {tier}: {count} drivers ({percentage:.1f}%)")

# Display sample of final results
print(f"\nSample Final Results:")
sample_columns = ['driver_id', 'final_risk_score', 'risk_category', 'premium_tier', 
                 'behavior_risk', 'geographic_risk', 'contextual_risk', 'confidence_score']
print(final_scores_df[sample_columns].head(10))

In [None]:
# Feature Importance and Analysis
print("Analyzing Feature Importance and Correlations...")

def analyze_feature_importance(df):
    """
    Analyze feature importance for the final risk score
    """
    # Select numeric features for correlation analysis
    feature_columns = [col for col in df.columns if col not in ['driver_id', 'risk_category', 'premium_tier', 'predicted_driving_class']]
    numeric_df = df[feature_columns]
    
    # Calculate correlations with final risk score
    correlations = numeric_df.corr()['final_risk_score'].abs().sort_values(ascending=False)
    
    # Remove self-correlation
    correlations = correlations.drop('final_risk_score')
    
    print("Top 15 Most Important Features:")
    for i, (feature, correlation) in enumerate(correlations.head(15).items(), 1):
        print(f"  {i:2d}. {feature:<25} | Correlation: {correlation:.3f}")
    
    return correlations

# Analyze feature importance
feature_importance = analyze_feature_importance(final_scores_df)

# Create comprehensive feature set for application use
def create_application_feature_set(df, top_n_features=20):
    """
    Create comprehensive feature set for production application
    """
    # Core identification and scoring features
    core_features = [
        'driver_id', 'final_risk_score', 'ensemble_risk_score', 
        'confidence_score', 'risk_category', 'premium_factor', 'premium_tier'
    ]
    
    # Expert contribution features
    expert_features = [
        'behavior_risk', 'geographic_risk', 'contextual_risk',
        'behavior_score_original', 'model_confidence'
    ]
    
    # Top behavioral features
    behavioral_features = [
        'predicted_driving_class', 'acceleration_std', 'braking_intensity'
    ]
    
    # Top geographic features
    geographic_features = [
        'location_latitude', 'location_longitude', 'accident_frequency', 'road_condition_risk'
    ]
    
    # Top contextual features
    contextual_features = [
        'temporal_risk', 'weather_risk', 'traffic_risk', 'rush_hour_indicator'
    ]
    
    # Feature adjustments and weights
    meta_features = [
        'feature_adjustment', 'weight_behavior', 'weight_geographic', 'weight_contextual'
    ]
    
    # Combine all important features
    all_important_features = (core_features + expert_features + behavioral_features + 
                            geographic_features + contextual_features + meta_features)
    
    # Create final feature set
    final_feature_set = df[all_important_features].copy()
    
    # Add derived features for application use
    final_feature_set['risk_score_normalized'] = final_feature_set['final_risk_score'] / 100
    final_feature_set['is_high_risk'] = (final_feature_set['final_risk_score'] >= 60).astype(int)
    final_feature_set['is_preferred_customer'] = (final_feature_set['final_risk_score'] <= 30).astype(int)
    final_feature_set['expert_agreement'] = (
        final_feature_set[['behavior_risk', 'geographic_risk', 'contextual_risk']].std(axis=1)
    )
    
    return final_feature_set

# Create application-ready feature set
application_features = create_application_feature_set(final_scores_df)

print(f"\nApplication Feature Set Created:")
print(f"  Total Features: {application_features.shape[1]}")
print(f"  Total Drivers: {application_features.shape[0]}")
print(f"  Memory Usage: {application_features.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

print(f"\nApplication Feature Categories:")
feature_categories = {
    'Core Scoring': ['driver_id', 'final_risk_score', 'ensemble_risk_score', 'confidence_score'],
    'Risk Classification': ['risk_category', 'premium_factor', 'premium_tier'],
    'Expert Contributions': ['behavior_risk', 'geographic_risk', 'contextual_risk'],
    'Behavioral Metrics': ['predicted_driving_class', 'acceleration_std', 'braking_intensity'],
    'Geographic Factors': ['location_latitude', 'location_longitude', 'accident_frequency'],
    'Contextual Factors': ['temporal_risk', 'weather_risk', 'traffic_risk'],
    'Derived Features': ['risk_score_normalized', 'is_high_risk', 'is_preferred_customer', 'expert_agreement']
}

for category, features in feature_categories.items():
    available_features = [f for f in features if f in application_features.columns]
    print(f"  {category}: {len(available_features)} features")

# Display sample of application feature set
print(f"\nApplication Feature Set Sample:")
sample_app_features = ['driver_id', 'final_risk_score', 'risk_category', 'premium_tier', 
                      'confidence_score', 'behavior_risk', 'geographic_risk', 'contextual_risk',
                      'is_high_risk', 'expert_agreement']
print(application_features[sample_app_features].head(10))

: 

In [None]:
# Define the missing visualization function
def create_comprehensive_visualizations(final_scores_df, feature_importance):
    """
    Create comprehensive visualizations for the gating mechanism results
    """
    import matplotlib.pyplot as plt
    import seaborn as sns
    
    # Set style
    plt.style.use('default')
    sns.set_palette("husl")
    
    # Create figure with subplots
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    fig.suptitle('Comprehensive Gating Mechanism Analysis', fontsize=16, fontweight='bold')
    
    # 1. Risk Score Distribution
    axes[0, 0].hist(final_scores_df['final_risk_score'], bins=30, alpha=0.7, color='skyblue', edgecolor='black')
    axes[0, 0].set_title('Risk Score Distribution')
    axes[0, 0].set_xlabel('Final Risk Score')
    axes[0, 0].set_ylabel('Frequency')
    axes[0, 0].grid(True, alpha=0.3)
    
    # 2. Risk Category Distribution
    risk_counts = final_scores_df['risk_category'].value_counts()
    axes[0, 1].pie(risk_counts.values, labels=risk_counts.index, autopct='%1.1f%%', startangle=90)
    axes[0, 1].set_title('Risk Category Distribution')
    
    # 3. Confidence Score vs Risk Score
    axes[0, 2].scatter(final_scores_df['confidence_score'], final_scores_df['final_risk_score'], 
                      alpha=0.6, s=20)
    axes[0, 2].set_title('Confidence vs Risk Score')
    axes[0, 2].set_xlabel('Confidence Score')
    axes[0, 2].set_ylabel('Final Risk Score')
    axes[0, 2].grid(True, alpha=0.3)
    
    # 4. Feature Importance
    top_features = feature_importance.head(10)
    axes[1, 0].barh(range(len(top_features)), top_features.values)
    axes[1, 0].set_yticks(range(len(top_features)))
    axes[1, 0].set_yticklabels(top_features.index, fontsize=8)
    axes[1, 0].set_title('Top 10 Feature Importance')
    axes[1, 0].set_xlabel('Correlation with Risk Score')
    
    # 5. Premium Factor Distribution
    axes[1, 1].hist(final_scores_df['premium_factor'], bins=20, alpha=0.7, color='lightgreen', edgecolor='black')
    axes[1, 1].set_title('Premium Factor Distribution')
    axes[1, 1].set_xlabel('Premium Factor')
    axes[1, 1].set_ylabel('Frequency')
    axes[1, 1].grid(True, alpha=0.3)
    
    # 6. Expert Contribution Comparison
    expert_cols = ['behavior_risk', 'geographic_risk', 'contextual_risk']
    expert_data = final_scores_df[expert_cols].mean()
    axes[1, 2].bar(expert_data.index, expert_data.values, color=['coral', 'lightblue', 'lightgreen'])
    axes[1, 2].set_title('Average Expert Contributions')
    axes[1, 2].set_ylabel('Average Risk Score')
    axes[1, 2].tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.show()
    
    print("Comprehensive visualizations created successfully!")

# Export Results and Model Deployment Preparation
print("Preparing data export and model deployment...")

def export_comprehensive_results(final_scores_df, application_features, gating_mechanism):
    """
    Export all results to CSV files for production use
    """
    from datetime import datetime
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # 1. Export complete final scores dataset
    complete_filename = f"comprehensive_gating_results_{timestamp}.csv"
    final_scores_df.to_csv(complete_filename, index=False)
    print(f"Complete gating results exported: {complete_filename}")
    
    # 2. Export application-ready feature set
    app_filename = f"application_feature_set_{timestamp}.csv"
    application_features.to_csv(app_filename, index=False)
    print(f"Application feature set exported: {app_filename}")
    
    # 3. Export summary statistics
    summary_filename = f"gating_summary_statistics_{timestamp}.csv"
    summary_stats = pd.DataFrame({
        'Metric': ['Total Drivers', 'Average Risk Score', 'Average Confidence', 
                  'High Risk Drivers (%)', 'Preferred Customers (%)', 
                  'Average Premium Factor'],
        'Value': [
            len(final_scores_df),
            final_scores_df['final_risk_score'].mean(),
            final_scores_df['confidence_score'].mean(),
            (final_scores_df['final_risk_score'] >= 60).mean() * 100,
            (final_scores_df['final_risk_score'] <= 30).mean() * 100,
            final_scores_df['premium_factor'].mean()
        ],
    })
    summary_stats.to_csv(summary_filename, index=False)
    print(f"Summary statistics exported: {summary_filename}")
    
    # 4. Export model configuration
    config_filename = f"gating_model_config_{timestamp}.json"
    model_config = {
        'expert_weights': gating_mechanism.expert_weights,
        'feature_importance_weights': gating_mechanism.feature_importance_weights,
        'risk_thresholds': gating_mechanism.risk_thresholds,
        'premium_factors': gating_mechanism.premium_factors,
        'model_version': '1.0',
        'creation_date': datetime.now().isoformat(),
        'total_drivers_processed': len(final_scores_df),
        'feature_count': application_features.shape[1]
    }
    
    import json
    with open(config_filename, 'w') as f:
        json.dump(model_config, f, indent=2)
    print(f"Model configuration exported: {config_filename}")
    
    # 5. Create deployment guide
    deployment_guide = f"""
COMPREHENSIVE GATING MECHANISM - DEPLOYMENT GUIDE
=================================================
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

EXPORTED FILES:
1. {complete_filename} - Complete gating results with all features
2. {app_filename} - Application-ready feature set
3. {summary_filename} - Summary statistics
4. {config_filename} - Model configuration

KEY FEATURES FOR APPLICATION:
- driver_id: Natural number index (1 to N)
- final_risk_score: Primary risk score (0-100)
- risk_category: Risk classification (Very Low, Low, Moderate, High, Very High)
- premium_factor: Insurance premium adjustment factor
- confidence_score: Model confidence (0-1)
- expert contributions: Individual expert risk scores

MODEL PERFORMANCE:
- Total Drivers Processed: {len(final_scores_df):,}
- Average Risk Score: {final_scores_df['final_risk_score'].mean():.2f}
- Average Confidence: {final_scores_df['confidence_score'].mean():.3f}
- Feature Count: {application_features.shape[1]}

INTEGRATION NOTES:
1. Load application_feature_set CSV for production scoring
2. Use driver_id as primary key for customer lookup
3. Apply premium_factor to base insurance rates
4. Monitor confidence_score for model reliability
5. Retrain when confidence drops below 0.7

EXPERT WEIGHTS USED:
- Behavior Expert: {gating_mechanism.expert_weights['behavior']:.1%}
- Geographic Expert: {gating_mechanism.expert_weights['geographic']:.1%}
- Contextual Expert: {gating_mechanism.expert_weights['contextual']:.1%}
"""
    
    guide_filename = f"deployment_guide_{timestamp}.txt"
    with open(guide_filename, 'w') as f:
        f.write(deployment_guide)
    print(f"Deployment guide created: {guide_filename}")
    
    return {
        'complete_results': complete_filename,
        'application_features': app_filename,
        'summary_stats': summary_filename,
        'model_config': config_filename,
        'deployment_guide': guide_filename
    }

# Export all results
exported_files = export_comprehensive_results(final_scores_df, application_features, gating_mechanism)

print(f"\nGATING MECHANISM IMPLEMENTATION COMPLETED!")
print(f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
print(f"Processed {len(final_scores_df)} drivers with natural number IDs")
print(f"Combined outputs from 3 expert models")
print(f"Generated comprehensive feature set with {application_features.shape[1]} features")
print(f"Exported production-ready CSV files")
print(f"Created deployment documentation")

print(f"\nQUICK STATS:")
print(f"  Risk Score Range: {final_scores_df['final_risk_score'].min():.1f} - {final_scores_df['final_risk_score'].max():.1f}")
print(f"  Average Confidence: {final_scores_df['confidence_score'].mean():.3f}")
print(f"  Risk Categories: {final_scores_df['risk_category'].nunique()} levels")
print(f"  Premium Tiers: {final_scores_df['premium_tier'].nunique()} tiers")

print(f"\nEXPORTED FILES:")
for file_type, filename in exported_files.items():
    print(f"  {file_type.replace('_', ' ').title()}: {filename}")

print(f"\nReady for production deployment!")
print(f"Use the application_feature_set CSV for real-time scoring in your insurance application.")

In [None]:
# Create visualizations
print("Creating comprehensive visualizations...")
create_comprehensive_visualizations(final_scores_df, feature_importance)