In [1]:
# ==============================================
# STEP 1: LOAD & PREPARE ACTUAL MARGIN DATA
# ==============================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from sklearn.neighbors import NearestNeighbors

print("="*80)
print("STEP 1: USING ACTUAL mt_marge DATA")
print("="*80)

# Load and prepare data
df = pd.read_csv('cleaned_quote_data.csv')
df['dt_creation_devis'] = pd.to_datetime(df['dt_creation_devis'])

# Create product categories
df['product_category'] = 'Other'
df.loc[df['famille_equipement_produit'].str.contains('chaudi√®re', case=False, na=False), 'product_category'] = 'Gas Boiler'
df.loc[df['famille_equipement_produit'].str.contains('pompe √† chaleur', case=False, na=False), 'product_category'] = 'Heat Pump'

analysis_df = df[df['product_category'].isin(['Gas Boiler', 'Heat Pump'])].copy()

# Define wins
win_statuses = ['Valid√©, sign√©, r√©alis√© et conforme', 
                'Valid√©,sign√© mais abandonn√©',
                "Valid√©, sign√© - en attente d'intervention"]
analysis_df['is_win'] = analysis_df['statut_devis'].isin(win_statuses)

# Use ACTUAL margin data
analysis_df['actual_margin'] = analysis_df['mt_marge'].fillna(0)
analysis_df['actual_revenue'] = analysis_df['mt_apres_remise_ht_devis']


STEP 1: USING ACTUAL mt_marge DATA


In [2]:
print(analysis_df[['dt_creation_devis', 'dt_signature_devis']].dtypes)

dt_creation_devis     datetime64[ns]
dt_signature_devis            object
dtype: object


In [3]:
if 'dt_creation_devis' in analysis_df.columns and 'dt_signature_devis' in analysis_df.columns:
    
    # Convert only if it's not already datetime
    if analysis_df['dt_signature_devis'].dtype != 'datetime64[ns]':
        # Try to infer format automatically
        analysis_df['dt_signature_devis'] = pd.to_datetime(
            analysis_df['dt_signature_devis'], 
            errors='coerce'
        )
        # Or specify format if you know it (more efficient and reliable)
        # analysis_df['dt_signature_devis'] = pd.to_datetime(
        #     analysis_df['dt_signature_devis'], 
        #     format='%Y-%m-%d',  # Adjust format to match your data
        #     errors='coerce'
        # )
    
    # Calculate days to close (only for rows where both dates are valid)
    mask = analysis_df['dt_signature_devis'].notna() & analysis_df['dt_creation_devis'].notna()
    analysis_df.loc[mask, 'days_to_close'] = (
        analysis_df.loc[mask, 'dt_signature_devis'] - 
        analysis_df.loc[mask, 'dt_creation_devis']
    ).dt.days
    
    # Flag quick closes
    analysis_df['quick_close'] = analysis_df['days_to_close'] <= 3

In [4]:
# ==============================================
# EMERGENCY VS CONSIDERED ANALYSIS
# ==============================================

print("="*80)
print("EMERGENCY REPLACEMENT vs CONSIDERED UPGRADE ANALYSIS")
print("="*80)

# Create proxy indicators from existing data
analysis_df = analysis_df.copy()

# 1. Time-based indicators
if 'dt_creation_devis' in analysis_df.columns and 'dt_signature_devis' in analysis_df.columns:
    # Calculate days to close for won deals
    analysis_df['days_to_close'] = (analysis_df['dt_signature_devis'] - analysis_df['dt_creation_devis']).dt.days
    
    # Flag quick closes (potential emergencies)
    analysis_df['quick_close'] = analysis_df['days_to_close'] <= 3
    
    # Extract month for seasonality
    analysis_df['month'] = analysis_df['dt_creation_devis'].dt.month
    analysis_df['winter_month'] = analysis_df['month'].isin([1, 2, 11, 12])  # Jan, Feb, Nov, Dec
    
    print(f"\n‚è±Ô∏è Time-based Indicators:")
    print(f"  Quotes with quick close (‚â§3 days): {analysis_df['quick_close'].sum():,} ({analysis_df['quick_close'].mean()*100:.1f}%)")
    print(f"  Quotes in winter months: {analysis_df['winter_month'].sum():,} ({analysis_df['winter_month'].mean()*100:.1f}%)")

# 2. Behavioral indicators
# Count quotes per customer
quote_counts = analysis_df.groupby('numero_compte').size()
analysis_df['total_quotes_per_customer'] = analysis_df['numero_compte'].map(quote_counts)

analysis_df['single_quote_customer'] = analysis_df['total_quotes_per_customer'] == 1
analysis_df['multiple_quote_customer'] = analysis_df['total_quotes_per_customer'] > 1

print(f"\nüîÑ Behavioral Indicators:")
print(f"  Single-quote customers: {analysis_df['single_quote_customer'].sum():,} quotes ({analysis_df['single_quote_customer'].mean()*100:.1f}%)")
print(f"  Multiple-quote customers: {analysis_df['multiple_quote_customer'].sum():,} quotes ({analysis_df['multiple_quote_customer'].mean()*100:.1f}%)")

# 3. Create composite emergency score
emergency_indicators = []

if 'quick_close' in analysis_df.columns:
    emergency_indicators.append('quick_close')
if 'winter_month' in analysis_df.columns:
    emergency_indicators.append('winter_month')
emergency_indicators.append('single_quote_customer')

analysis_df['emergency_score'] = analysis_df[emergency_indicators].sum(axis=1) / len(emergency_indicators)

print(f"\nüìä Emergency Score Distribution:")
for score in sorted(analysis_df['emergency_score'].unique()):
    count = (analysis_df['emergency_score'] == score).sum()
    pct = count / len(analysis_df) * 100
    print(f"  Score {score:.1f}: {count:,} quotes ({pct:.1f}%)")

# Define thresholds
analysis_df['likely_emergency'] = analysis_df['emergency_score'] >= 0.7
analysis_df['likely_considered'] = analysis_df['emergency_score'] <= 0.3

print(f"\nüéØ Estimated Segmentation:")
print(f"  Likely Emergency Replacements: {analysis_df['likely_emergency'].sum():,} quotes ({analysis_df['likely_emergency'].mean()*100:.1f}%)")
print(f"  Likely Considered Upgrades: {analysis_df['likely_considered'].sum():,} quotes ({analysis_df['likely_considered'].mean()*100:.1f}%)")
print(f"  Uncertain/Mixed: {len(analysis_df) - analysis_df['likely_emergency'].sum() - analysis_df['likely_considered'].sum():,} quotes")

# 4. Compare performance by segment
print("\n" + "="*80)
print("PERFORMANCE BY ESTIMATED SEGMENT")
print("="*80)

segments = ['likely_emergency', 'likely_considered']
for segment in segments:
    segment_data = analysis_df[analysis_df[segment]]
    
    if len(segment_data) > 0:
        print(f"\nüìä {segment.replace('_', ' ').title()}:")
        print(f"  Count: {len(segment_data):,} quotes")
        
        # Conversion rate
        conv_rate = segment_data['is_win'].mean() * 100
        print(f"  Conversion rate: {conv_rate:.1f}%")
        
        # Average margin
        avg_margin = segment_data['actual_margin'].mean()
        print(f"  Average margin: {avg_margin:,.0f}‚Ç¨")
        
        # Expected value
        ev = avg_margin * (conv_rate/100)
        print(f"  Expected value: {ev:,.0f}‚Ç¨")
        
        # Product mix
        for product in ['Gas Boiler', 'Heat Pump']:
            product_pct = (segment_data['product_category'] == product).mean() * 100
            print(f"  {product}: {product_pct:.1f}%")

EMERGENCY REPLACEMENT vs CONSIDERED UPGRADE ANALYSIS

‚è±Ô∏è Time-based Indicators:
  Quotes with quick close (‚â§3 days): 1,812 (11.1%)
  Quotes in winter months: 6,713 (41.2%)

üîÑ Behavioral Indicators:
  Single-quote customers: 8,720 quotes (53.5%)
  Multiple-quote customers: 7,587 quotes (46.5%)

üìä Emergency Score Distribution:
  Score 0.0: 4,154 quotes (25.5%)
  Score 0.3: 7,609 quotes (46.7%)
  Score 0.7: 3,996 quotes (24.5%)
  Score 1.0: 548 quotes (3.4%)

üéØ Estimated Segmentation:
  Likely Emergency Replacements: 548 quotes (3.4%)
  Likely Considered Upgrades: 4,154 quotes (25.5%)
  Uncertain/Mixed: 11,605 quotes

PERFORMANCE BY ESTIMATED SEGMENT

üìä Likely Emergency:
  Count: 548 quotes
  Conversion rate: 99.3%
  Average margin: 2,954‚Ç¨
  Expected value: 2,932‚Ç¨
  Gas Boiler: 77.0%
  Heat Pump: 23.0%

üìä Likely Considered:
  Count: 4,154 quotes
  Conversion rate: 19.5%
  Average margin: 4,246‚Ç¨
  Expected value: 829‚Ç¨
  Gas Boiler: 57.9%
  Heat Pump: 42.1%


In [5]:
# ==============================================
# BUSINESS IMPLICATIONS OF SEGMENTATION
# ==============================================

print("\n" + "="*80)
print("BUSINESS IMPLICATIONS")
print("="*80)

# If we can segment...
print("\nüîç What Segmentation Reveals:")

# Calculate if segments behave differently
if 'likely_emergency' in analysis_df.columns and 'likely_considered' in analysis_df.columns:
    emergency_data = analysis_df[analysis_df['likely_emergency']]
    considered_data = analysis_df[analysis_df['likely_considered']]
    
    if len(emergency_data) > 0 and len(considered_data) > 0:
        print(f"\n1. CONVERSION DIFFERENCES:")
        emergency_conv = emergency_data['is_win'].mean()
        considered_conv = considered_data['is_win'].mean()
        print(f"   Emergency: {emergency_conv:.1%}")
        print(f"   Considered: {considered_conv:.1%}")
        print(f"   Difference: {(considered_conv - emergency_conv)*100:+.1f} percentage points")
        
        print(f"\n2. PRODUCT PREFERENCES:")
        for product in ['Gas Boiler', 'Heat Pump']:
            emergency_pct = (emergency_data['product_category'] == product).mean()
            considered_pct = (considered_data['product_category'] == product).mean()
            print(f"   {product}:")
            print(f"     Emergency: {emergency_pct:.1%}")
            print(f"     Considered: {considered_pct:.1%}")
        
        print(f"\n3. MARGIN DIFFERENCES:")
        emergency_margin = emergency_data['actual_margin'].mean()
        considered_margin = considered_data['actual_margin'].mean()
        print(f"   Emergency: {emergency_margin:,.0f}‚Ç¨")
        print(f"   Considered: {considered_margin:,.0f}‚Ç¨")
        print(f"   Difference: {considered_margin - emergency_margin:+,.0f}‚Ç¨")
        
        print(f"\n4. EXPECTED VALUE:")
        emergency_ev = emergency_margin * emergency_conv
        considered_ev = considered_margin * considered_conv
        print(f"   Emergency: {emergency_ev:,.0f}‚Ç¨")
        print(f"   Considered: {considered_ev:,.0f}‚Ç¨")
        print(f"   Advantage: {'Emergency' if emergency_ev > considered_ev else 'Considered'}")


BUSINESS IMPLICATIONS

üîç What Segmentation Reveals:

1. CONVERSION DIFFERENCES:
   Emergency: 99.3%
   Considered: 19.5%
   Difference: -79.7 percentage points

2. PRODUCT PREFERENCES:
   Gas Boiler:
     Emergency: 77.0%
     Considered: 57.9%
   Heat Pump:
     Emergency: 23.0%
     Considered: 42.1%

3. MARGIN DIFFERENCES:
   Emergency: 2,954‚Ç¨
   Considered: 4,246‚Ç¨
   Difference: +1,292‚Ç¨

4. EXPECTED VALUE:
   Emergency: 2,932‚Ç¨
   Considered: 829‚Ç¨
   Advantage: Emergency
