In [5]:
import pandas as pd

df_dm = pd.read_csv('data_hes_quotes_france_202512-2.csv')
df_dm.head()
len(df_dm)

38697

In [6]:
list(df_dm.columns)

['id_devis',
 'num_devis',
 'nom_devis',
 'nom_agence',
 'nom_filiale_zone',
 'nom_region',
 'statut_devis',
 'fg_devis_emis',
 'fg_devis_refuse',
 'fg_devis_accepte',
 'dt_creation_devis',
 'dt_signature_devis',
 'fg_3_mois_mature',
 'type_devis',
 'mt_apres_remise_ht_devis',
 'mt_marge',
 'nb_devis_emis',
 'mt_apres_remise_ht_emis_devis',
 'mt_marge_emis_devis',
 'mt_remise_exceptionnelle_ht',
 'mt_ttc_apres_aide_devis',
 'mt_ttc_avant_aide_devis',
 'mt_prime_cee',
 'mt_prime_maprimerenov',
 'fg_activite_commerciale',
 'prenom_nom_createur',
 'prenom_nom_commercial',
 'nom_campagne',
 'famille_equipement_produit',
 'type_equipement_produit',
 'dth_emission_devis',
 'dt_emission_calcule_devis',
 'id_opportunite',
 'fg_devis_principal',
 'lb_statut_preparation_chantier',
 'numero_compte',
 'dt_visite_commerciale']

In [7]:
# Basic inspection
print(df_dm.head())
print(df_dm.columns.tolist())
print(df_dm.info())
print(df_dm.describe())

             id_devis   num_devis                      nom_devis  \
0  0Q0Sb000001EsCrKAK  DV00083871       DV00083871 - Betty MONTE   
1  0Q0Sb000001A4g5KAC  DV00082379    DV00082379 - Laurent DUBOIS   
2  0Q0Sb0000015cKDKAY  DV00080969    DV00080969 - Simon COURRIER   
3  0Q0Sb0000015gCXKAY  DV00080990  DV00080990 - Justine THOUVENY   
4  0Q0Sb0000014spFKAQ  DV00080738   DV00080738 - Fran√ßois ROHART   

          nom_agence nom_filiale_zone       nom_region  \
0  Chauffage du Nord              CFP  Hauts-de-France   
1  Chauffage du Nord              CFP  Hauts-de-France   
2  Chauffage du Nord              CFP  Hauts-de-France   
3  Chauffage du Nord              CFP  Hauts-de-France   
4  Chauffage du Nord              CFP  Hauts-de-France   

                         statut_devis  fg_devis_emis  fg_devis_refuse  \
0  Valid√©, sign√©, r√©alis√© et conforme            1.0              0.0   
1  Valid√©, sign√©, r√©alis√© et conforme            1.0              0.0   
2  Valid√©, si

In [8]:
# Look for columns with clear target semantics
target_keywords = [
    'target', 'label', 'class', 'outcome', 'result',
    'premium', 'price', 'cost', 'amount',
    'accepted', 'converted', 'purchased',
    'claim', 'hospitalization', 'mortality'
]

for col in df_dm.columns:
    col_lower = col.lower()
    if any(keyword in col_lower for keyword in target_keywords):
        print(f"Potential target: {col}")
        
# Check for binary columns (0/1)
binary_cols = []
for col in df_dm.columns:
    unique_vals = df_dm[col].dropna().unique()
    if set(unique_vals).issubset({0, 1, '0', '1', True, False}):
        binary_cols.append(col)
        
if binary_cols:
    print(f"\nBinary columns (potential classification targets): {binary_cols}")


Binary columns (potential classification targets): ['fg_devis_emis', 'fg_devis_refuse', 'fg_devis_accepte', 'fg_3_mois_mature', 'fg_activite_commerciale', 'fg_devis_principal', 'lb_statut_preparation_chantier']


In [16]:
# Check for duplicate customers/opportunities
print(f"Unique customers: {df_dm['numero_compte'].nunique()}")
print(f"Total quotes: {len(df_dm)}")
print(f"Avg quotes per customer: {len(df_dm) / df_dm['numero_compte'].nunique():.2f}")

# Check if multiple quotes per opportunity exist
if 'id_opportunite' in df_dm.columns:
    opp_counts = df_dm['id_opportunite'].value_counts()
    print(f"Opportunities with multiple quotes: {(opp_counts > 1).sum()}")

Unique customers: 25940
Total quotes: 38697
Avg quotes per customer: 1.49
Opportunities with multiple quotes: 1193


In [17]:
# Option A: Keep only primary quotes
df_primary = df_dm[df_dm['fg_devis_principal'] == 1].copy()

# Option B: Aggregate by opportunity/customer
if 'id_opportunite' in df_dm.columns:
    # For each opportunity, keep the quote that was accepted, or the latest
    df_by_opp = df_dm.sort_values(['id_opportunite', 'dt_creation_devis', 'fg_devis_accepte'], 
                                  ascending=[True, False, False])
    df_deduplicated = df_by_opp.drop_duplicates('id_opportunite', keep='first')

In [18]:
# Conversion rates by key dimensions
for feature in ['nom_region', 'nom_agence', 'famille_equipement_produit', 
                'type_equipement_produit', 'prenom_nom_commercial']:
    if feature in df_dm.columns:
        conversion_rate = df_dm.groupby(feature)['fg_devis_accepte'].mean().sort_values(ascending=False)
        print(f"\nConversion by {feature}:")
        print(conversion_rate.head(10))


Conversion by nom_region:
nom_region
Sud Ouest               0.666667
Normandie               0.334464
√éle-de-France           0.292538
Auvergne-Rh√¥ne-Alpes    0.289371
Hauts-de-France         0.288596
Sud                     0.216354
Name: fg_devis_accepte, dtype: float64

Conversion by nom_agence:
nom_agence
Prigent Abiven                   0.705882
Agence Evreux                    0.418932
Agence Quettehou                 0.405090
Agence Avranches                 0.384840
Agence Caen                      0.355671
Agence Valognes                  0.331496
Lepretre Lievin                  0.326531
Agence Cherbourg                 0.318163
SMT Energies                     0.314503
Agence Les Moitiers d'Allonne    0.312757
Name: fg_devis_accepte, dtype: float64

Conversion by famille_equipement_produit:
famille_equipement_produit
Plomberie Sanitaire                 0.367684
Produit VMC                         0.362832
Emetteur de chauffage  ou chappe    0.353075
ECS : Chauffe-eau ou 

In [None]:
# Create time-based features
df_dm['dt_creation_devis'] = pd.to_datetime(df_dm['dt_creation_devis'])
df_dm['month'] = df_dm['dt_creation_devis'].dt.month
df_dm['weekday'] = df_dm['dt_creation_devis'].dt.weekday

# Create discount percentage
df_dm['pct_discount'] = (df_dm['mt_remise_exceptionnelle_ht'] / 
                         df_dm['mt_ttc_avant_aide_devis']).fillna(0)

# Create subsidy flags
df_dm['has_prime_cee'] = (df_dm['mt_prime_cee'] > 0).astype(int)
df_dm['has_prime_maprimerenov'] = (df_dm['mt_prime_maprimerenov'] > 0).astype(int)

In [23]:
# Check missing data
missing_data = df_dm.isnull().sum() / len(df_dm) * 100
print("\nMissing Data Analysis:")
print(missing_data.sort_values(ascending=False).head(10))

# Opportunity: Only 18% have opportunity IDs!
print(f"\nOnly {df_dm['id_opportunite'].notna().sum():,} quotes have opportunity IDs")
print("This limits our ability to track multi-quote scenarios properly")


Missing Data Analysis:
lb_statut_preparation_chantier    100.000000
dt_visite_commerciale_day          86.383957
dt_visite_commerciale_month        86.383957
dt_visite_commerciale_year         86.383957
dt_visite_commerciale              86.383957
dt_visite_commerciale_weekday      86.383957
dt_visite_commerciale_quarter      86.383957
dth_emission_devis_day             82.654986
dth_emission_devis_year            82.654986
dth_emission_devis_weekday         82.654986
dtype: float64

Only 6,961 quotes have opportunity IDs
This limits our ability to track multi-quote scenarios properly


In [22]:
# Top performing dimensions
print("\n=== QUICK WINS ===")
print("1. Regional Focus: Sud Ouest (66.7% conversion)")
print("2. Top Agencies: Prigent Abiven (70.6%), Agence Evreux (41.9%)")
print("3. High-Converting Products: Po√™le √† gaz, Onduleurs, A√©rothermes")
print("4. Salesperson Stars: Many with 100% conversion - study their methods")

# Worst performing
print("\n=== AREAS FOR IMPROVEMENT ===")
print(f"Lowest region: Sud (21.6% conversion)")
print(f"Lowest product family: Climatisation (24.2% conversion)")


=== QUICK WINS ===
1. Regional Focus: Sud Ouest (66.7% conversion)
2. Top Agencies: Prigent Abiven (70.6%), Agence Evreux (41.9%)
3. High-Converting Products: Po√™le √† gaz, Onduleurs, A√©rothermes
4. Salesperson Stars: Many with 100% conversion - study their methods

=== AREAS FOR IMPROVEMENT ===
Lowest region: Sud (21.6% conversion)
Lowest product family: Climatisation (24.2% conversion)


In [25]:
print("="*80)
print("FOCUSED ANALYSIS WITH AVAILABLE DATA")
print("="*80)

# 1. Calculate TRUE conversion rate accounting for multi-quote issue
print("\nüìä MULTI-QUOTE ANALYSIS:")
print(f"Total quotes: {len(df_dm):,}")
print(f"Unique customers: {df_dm['numero_compte'].nunique():,}")
print(f"Quotes per customer (avg): {len(df_dm)/df_dm['numero_compte'].nunique():.2f}")

# For the subset with opportunity IDs, analyze the real issue
if 'id_opportunite' in df_dm.columns:
    opp_data = df_dm[df_dm['id_opportunite'].notna()].copy()
    print(f"\nAnalysis of {len(opp_data):,} quotes WITH opportunity IDs:")
    
    # Count quotes per opportunity
    opp_counts = opp_data['id_opportunite'].value_counts()
    multi_quote_opps = opp_counts[opp_counts > 1]
    print(f"Opportunities with multiple quotes: {len(multi_quote_opps):,}")
    
    # Calculate TRUE conversion for multi-quote opportunities
    if len(multi_quote_opps) > 0:
        multi_quote_data = opp_data[opp_data['id_opportunite'].isin(multi_quote_opps.index)]
        conversion_by_opp = multi_quote_data.groupby('id_opportunite')['fg_devis_accepte'].max()
        true_conversion = conversion_by_opp.mean()
        print(f"TRUE conversion for multi-quote opportunities: {true_conversion:.1%}")
        print(f"(Naive conversion would be: {multi_quote_data['fg_devis_accepte'].mean():.1%})")

# 2. Analyze what we CAN measure reliably
print("\nüéØ RELIABLE METRICS (Low Missing Data):")

# Create a list of columns with <20% missing data
reliable_cols = []
for col in df_dm.columns:
    missing_pct = df_dm[col].isna().mean() * 100
    if missing_pct < 20 and col not in ['id_devis', 'num_devis', 'nom_devis']:
        reliable_cols.append(col)
        print(f"  ‚Ä¢ {col:<40} {missing_pct:5.1f}% missing")

print(f"\nTotal reliable columns: {len(reliable_cols)}")

# 3. High-Impact Business Insights
print("\nüí° HIGH-IMPACT INSIGHTS (For 10-Day Delivery):")

# Insight 1: Regional Performance Gaps
print("\n1. REGIONAL PERFORMANCE GAP:")
print("   Sud Ouest converts at 66.7% vs Sud at 21.6%")
print("   ‚Üí Potential revenue uplift: Study & replicate best practices")

# Insight 2: Agency Performance
print("\n2. AGENCY PERFORMANCE VARIANCE:")
top_agency = df_dm.groupby('nom_agence')['fg_devis_accepte'].mean().idxmax()
top_rate = df_dm.groupby('nom_agence')['fg_devis_accepte'].mean().max()
bottom_agency = df_dm.groupby('nom_agence')['fg_devis_accepte'].mean().idxmin()
bottom_rate = df_dm.groupby('nom_agence')['fg_devis_accepte'].mean().min()
print(f"   Best: {top_agency} ({top_rate:.1%})")
print(f"   Worst: {bottom_agency} ({bottom_rate:.1%})")
print(f"   ‚Üí Gap: {(top_rate-bottom_rate)*100:.1f} percentage points")

# Insight 3: Product Mix Optimization
print("\n3. PRODUCT MIX OPPORTUNITY:")
product_perf = df_dm.groupby('famille_equipement_produit').agg({
    'fg_devis_accepte': 'mean',
    'mt_apres_remise_ht_devis': 'mean',
    'id_devis': 'count'
}).rename(columns={'id_devis': 'quote_count', 'fg_devis_accepte': 'conversion_rate'})
product_perf['revenue_potential'] = product_perf['conversion_rate'] * product_perf['mt_apres_remise_ht_devis']
product_perf = product_perf.sort_values('revenue_potential', ascending=False)

print("   Top 3 revenue-potential products:")
for i, (product, row) in enumerate(product_perf.head(3).iterrows(), 1):
    print(f"   {i}. {product}: {row['conversion_rate']:.1%} conversion, ‚Ç¨{row['mt_apres_remise_ht_devis']:,.0f} avg")

# Insight 4: Pricing Strategy
print("\n4. PRICING STRATEGY INSIGHTS:")
df_dm['price_segment'] = pd.cut(df_dm['mt_apres_remise_ht_devis'], 
                                 bins=[0, 3000, 6000, 10000, float('inf')],
                                 labels=['Low (<‚Ç¨3k)', 'Medium (‚Ç¨3-6k)', 'High (‚Ç¨6-10k)', 'Premium (>‚Ç¨10k)'])

price_segment_stats = df_dm.groupby('price_segment').agg({
    'fg_devis_accepte': 'mean',
    'mt_marge': 'mean',
    'id_devis': 'count'
}).rename(columns={'id_devis': 'volume'})

for segment, stats in price_segment_stats.iterrows():
    print(f"   {segment}: {stats['fg_devis_accepte']:.1%} conversion, ‚Ç¨{stats['mt_marge']:,.0f} avg margin, {stats['volume']:,} quotes")

print("\n" + "="*80)
print("10-DAY DELIVERY PLAN")
print("="*80)

print("""
üìã DELIVERABLE 1: EXECUTIVE SUMMARY (Day 1-3)
‚Ä¢ True conversion rate accounting for multi-quote issue
‚Ä¢ Top 3 drivers of conversion (Region, Agency, Product Type)
‚Ä¢ Quick win opportunities identified

üìä DELIVERABLE 2: INTERACTIVE DASHBOARD (Day 4-7)
‚Ä¢ Conversion rates by key dimensions
‚Ä¢ Performance gap analysis (Best vs Worst)
‚Ä¢ Product mix optimization recommendations

üöÄ DELIVERABLE 3: ACTION PLAN (Day 8-10)
1. IMMEDIATE ACTIONS (Next 30 days):
   ‚Ä¢ Pilot Sud Ouest region's practices in Sud region
   ‚Ä¢ Create "Best Practices Playbook" from top agencies
   ‚Ä¢ Focus sales training on high-conversion products

2. DATA IMPROVEMENTS (Next 60 days):
   ‚Ä¢ Fix opportunity ID tracking (critical for true conversion)
   ‚Ä¢ Standardize commercial visit data collection
   ‚Ä¢ Add lead source tracking

3. PREDICTIVE MODELING ROADMAP (Next 90 days):
   ‚Ä¢ Quote scoring system to prioritize high-potential quotes
   ‚Ä¢ Churn prediction for refused quotes
   ‚Ä¢ Dynamic pricing recommendations
""")

# 4. Simple Predictive Analysis with Available Data
print("\nüîÆ SIMPLE PREDICTIVE INSIGHTS:")

# Create a simple feature set from reliable columns
simple_features = ['nom_region', 'nom_agence', 'famille_equipement_produit', 
                   'mt_apres_remise_ht_devis', 'mt_marge', 'type_devis']

df_simple = df_dm[simple_features + ['fg_devis_accepte']].copy()
df_simple = df_simple.dropna()

# One-hot encode categorical variables
df_encoded = pd.get_dummies(df_simple, drop_first=True)

# Simple correlation analysis
correlation_with_target = df_encoded.corr()['fg_devis_accepte'].abs().sort_values(ascending=False)

print("\nFeatures most correlated with conversion:")
for i, (feature, corr) in enumerate(correlation_with_target.head(6).items(), 1):
    if feature != 'fg_devis_accepte':
        print(f"   {i}. {feature}: {corr:.3f}")

FOCUSED ANALYSIS WITH AVAILABLE DATA

üìä MULTI-QUOTE ANALYSIS:
Total quotes: 38,697
Unique customers: 25,940
Quotes per customer (avg): 1.49

Analysis of 6,961 quotes WITH opportunity IDs:
Opportunities with multiple quotes: 1,193
TRUE conversion for multi-quote opportunities: 31.6%
(Naive conversion would be: 13.8%)

üéØ RELIABLE METRICS (Low Missing Data):
  ‚Ä¢ nom_agence                                 0.0% missing
  ‚Ä¢ nom_filiale_zone                           0.0% missing
  ‚Ä¢ nom_region                                 0.0% missing
  ‚Ä¢ statut_devis                               0.0% missing
  ‚Ä¢ fg_devis_emis                              0.0% missing
  ‚Ä¢ fg_devis_refuse                            0.0% missing
  ‚Ä¢ fg_devis_accepte                           0.0% missing
  ‚Ä¢ dt_creation_devis                          0.0% missing
  ‚Ä¢ fg_3_mois_mature                           0.0% missing
  ‚Ä¢ type_devis                                 0.0% missing
  ‚Ä¢ mt_apres_r

  price_segment_stats = df_dm.groupby('price_segment').agg({


In [26]:
# Find an example opportunity with multiple quotes
example_opp = multi_quote_opps.index[0]  # First opportunity with multiple quotes
example_data = df_dm[df_dm['id_opportunite'] == example_opp]

print(f"Opportunity ID: {example_opp}")
print(f"Customer: {example_data['numero_compte'].iloc[0]}")
print(f"Number of quotes: {len(example_data)}")
print(f"Accepted quotes: {example_data['fg_devis_accepte'].sum()}")

Opportunity ID: a48Sb000000DKVZIA4
Customer: CL00201682
Number of quotes: 54
Accepted quotes: 0.0


In [27]:
print("ANALYSIS OF MULTI-QUOTE OPPORTUNITIES:")
print(f"Opportunities with multiple quotes: {len(multi_quote_opps):,}")

# For each multi-quote opportunity
for i in range(min(3, len(multi_quote_opps))):  # Show first 3 examples
    opp_id = multi_quote_opps.index[i]
    opp_data = df_dm[df_dm['id_opportunite'] == opp_id]
    
    print(f"\nExample {i+1}:")
    print(f"  Opportunity: {opp_id}")
    print(f"  Quotes: {len(opp_data)}")
    print(f"  Accepted: {opp_data['fg_devis_accepte'].sum()}")
    print(f"  Naive conversion: {opp_data['fg_devis_accepte'].mean():.0%}")
    print(f"  TRUE conversion: {opp_data['fg_devis_accepte'].max():.0%}")

ANALYSIS OF MULTI-QUOTE OPPORTUNITIES:
Opportunities with multiple quotes: 1,193

Example 1:
  Opportunity: a48Sb000000DKVZIA4
  Quotes: 54
  Accepted: 0.0
  Naive conversion: 0%
  TRUE conversion: 0%

Example 2:
  Opportunity: a48Sb000000CzvlIAC
  Quotes: 7
  Accepted: 0.0
  Naive conversion: 0%
  TRUE conversion: 0%

Example 3:
  Opportunity: a48To000001PPCLIA4
  Quotes: 6
  Accepted: 1.0
  Naive conversion: 17%
  TRUE conversion: 100%


In [28]:
print(f"Total quotes: {len(df_dm):,}")
print(f"Quotes with opportunity IDs: {df_dm['id_opportunite'].notna().sum():,}")
print(f"Percentage with tracking: {df_dm['id_opportunite'].notna().mean():.1%}")

Total quotes: 38,697
Quotes with opportunity IDs: 6,961
Percentage with tracking: 18.0%


In [29]:
# Let's examine this suspicious opportunity more closely
suspect_opp = 'a48Sb000000DKVZIA4'
suspect_data = df_dm[df_dm['id_opportunite'] == suspect_opp]

print(f"Details for opportunity with 54 quotes:")
print(f"Customer: {suspect_data['numero_compte'].iloc[0]}")
print(f"Date range: {suspect_data['dt_creation_devis'].min()} to {suspect_data['dt_creation_devis'].max()}")
print(f"Agency: {suspect_data['nom_agence'].iloc[0]}")
print(f"Salesperson: {suspect_data['prenom_nom_commercial'].iloc[0]}")
print(f"Product types: {suspect_data['type_equipement_produit'].nunique()} unique types")

# Check if they're identical quotes
print(f"\nAre quotes identical?")
print(f"Unique quote amounts: {suspect_data['mt_apres_remise_ht_devis'].nunique()}")
print(f"Unique products: {suspect_data['famille_equipement_produit'].unique()[:5]}")  # First 5

Details for opportunity with 54 quotes:
Customer: CL00201682
Date range: 2025-07-17 00:00:00 to 2025-12-10 00:00:00
Agency: Agence Cond√© sur Vire
Salesperson: Alexandra KLEIN
Product types: 7 unique types

Are quotes identical?
Unique quote amounts: 48
Unique products: ['Po√™le' 'Photovolta√Øque' 'Climatisation' 'Pompe √† chaleur' 'Chaudi√®re']


In [30]:
# Let's verify this pattern
print("ANALYSIS OF CUSTOMER CL00201682'S QUOTES:")

customer_data = df_dm[df_dm['numero_compte'] == 'CL00201682']
print(f"\nTotal quotes for this customer: {len(customer_data)}")
print(f"Timeline: {customer_data['dt_creation_devis'].min().date()} to {customer_data['dt_creation_devis'].max().date()}")

# Group by month to see the pattern
customer_data['month'] = customer_data['dt_creation_devis'].dt.to_period('M')
monthly_counts = customer_data.groupby('month').size()

print("\nQuotes by month:")
for month, count in monthly_counts.items():
    products = customer_data[customer_data['month'] == month]['famille_equipement_produit'].unique()
    print(f"  {month}: {count:2} quotes - Products: {', '.join(products[:3])}{'...' if len(products) > 3 else ''}")

ANALYSIS OF CUSTOMER CL00201682'S QUOTES:

Total quotes for this customer: 229
Timeline: 2023-05-08 to 2025-12-10

Quotes by month:
  2023-05: 26 quotes - Products: Chaudi√®re
  2023-06: 21 quotes - Products: Chaudi√®re, Pompe √† chaleur
  2023-07: 10 quotes - Products: Chaudi√®re
  2023-09:  2 quotes - Products: Chaudi√®re
  2023-10: 13 quotes - Products: Chaudi√®re
  2023-11: 15 quotes - Products: Chaudi√®re
  2023-12:  3 quotes - Products: Chaudi√®re, Pompe √† chaleur
  2024-01:  1 quotes - Products: Chaudi√®re
  2024-02:  6 quotes - Products: Autres, Chaudi√®re
  2024-03:  1 quotes - Products: Chaudi√®re
  2024-04: 21 quotes - Products: Chaudi√®re
  2024-05:  1 quotes - Products: Chaudi√®re
  2024-06:  2 quotes - Products: Pompe √† chaleur, Chaudi√®re
  2024-07:  2 quotes - Products: Chaudi√®re
  2024-09:  3 quotes - Products: Chaudi√®re
  2024-10: 24 quotes - Products: Chaudi√®re
  2024-11:  1 quotes - Products: Chaudi√®re
  2025-01:  5 quotes - Products: Chaudi√®re, Autres, Photo

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  customer_data['month'] = customer_data['dt_creation_devis'].dt.to_period('M')


In [31]:
# Check if this is a pattern with this salesperson
salesperson_data = df_dm[df_dm['prenom_nom_commercial'] == 'Alexandra KLEIN']

print(f"Salesperson: Alexandra KLEIN")
print(f"Total quotes created: {len(salesperson_data):,}")
print(f"Unique customers: {salesperson_data['numero_compte'].nunique():,}")
print(f"Conversion rate: {salesperson_data['fg_devis_accepte'].mean():.1%}")

# Check her other customers
print(f"\nHer top 5 customers by quote count:")
top_customers = salesperson_data['numero_compte'].value_counts().head(5)
for customer, count in top_customers.items():
    conv_rate = salesperson_data[salesperson_data['numero_compte'] == customer]['fg_devis_accepte'].mean()
    print(f"  {customer}: {count:3} quotes, {conv_rate:.1%} conversion")

# Check the agency pattern
agency_data = df_dm[df_dm['nom_agence'] == 'Agence Cond√© sur Vire']
print(f"\nAgency: Agence Cond√© sur Vire")
print(f"Total quotes: {len(agency_data):,}")
print(f"Conversion rate: {agency_data['fg_devis_accepte'].mean():.1%}")
print(f"Salespeople count: {agency_data['prenom_nom_commercial'].nunique()}")

# Find other extreme cases
customer_quote_counts = df_dm['numero_compte'].value_counts()
print(f"\nCustomers with 50+ quotes:")
extreme_customers = customer_quote_counts[customer_quote_counts >= 50]
for customer, count in extreme_customers.items():
    conv_rate = df_dm[df_dm['numero_compte'] == customer]['fg_devis_accepte'].mean()
    salesperson = df_dm[df_dm['numero_compte'] == customer]['prenom_nom_commercial'].iloc[0]
    print(f"  {customer}: {count:3} quotes, {conv_rate:.1%} conversion, Sales: {salesperson}")

Salesperson: Alexandra KLEIN
Total quotes created: 7
Unique customers: 3
Conversion rate: 0.0%

Her top 5 customers by quote count:
  CL00258047:   3 quotes, 0.0% conversion
  CL00294368:   3 quotes, 0.0% conversion
  CL00201682:   1 quotes, 0.0% conversion

Agency: Agence Cond√© sur Vire
Total quotes: 992
Conversion rate: 18.9%
Salespeople count: 27

Customers with 50+ quotes:
  CL00201682: 229 quotes, 0.0% conversion, Sales: Simon BIDEL


In [32]:
print("üö® URGENT DATA CLEANING NEEDED:")

# 1. Identify and remove test accounts
print("1. Flag/remove accounts with >50 quotes and 0% conversion")
test_account_threshold = 50
potential_test_accounts = []
for customer in df_dm['numero_compte'].unique():
    customer_quotes = df_dm[df_dm['numero_compte'] == customer]
    if len(customer_quotes) > test_account_threshold and customer_quotes['fg_devis_accepte'].mean() == 0:
        potential_test_accounts.append(customer)

print(f"   Found {len(potential_test_accounts)} potential test accounts")

# 2. Calculate "clean" conversion rate
clean_data = df_dm[~df_dm['numero_compte'].isin(potential_test_accounts)]
print(f"\n2. Clean data analysis:")
print(f"   Original quotes: {len(df_dm):,}")
print(f"   Clean quotes: {len(clean_data):,}")
print(f"   Removed: {len(df_dm) - len(clean_data):,} quotes ({((len(df_dm)-len(clean_data))/len(df_dm)*100):.1f}%)")
print(f"   Original conversion: {df_dm['fg_devis_accepte'].mean():.1%}")
print(f"   Clean conversion: {clean_data['fg_devis_accepte'].mean():.1%}")
print(f"   Difference: {(clean_data['fg_devis_accepte'].mean() - df_dm['fg_devis_accepte'].mean())*100:+.1f} percentage points")

üö® URGENT DATA CLEANING NEEDED:
1. Flag/remove accounts with >50 quotes and 0% conversion
   Found 1 potential test accounts

2. Clean data analysis:
   Original quotes: 38,697
   Clean quotes: 38,468
   Removed: 229 quotes (0.6%)
   Original conversion: 30.9%
   Clean conversion: 31.1%
   Difference: +0.2 percentage points


In [33]:
# Focus on Simon BIDEL
simon_data = df_dm[df_dm['prenom_nom_commercial'] == 'Simon BIDEL']

print(f"Salesperson: Simon BIDEL")
print(f"Total quotes created: {len(simon_data):,}")
print(f"Unique customers: {simon_data['numero_compte'].nunique():,}")
print(f"Overall conversion rate: {simon_data['fg_devis_accepte'].mean():.1%}")

# Exclude the extreme customer
simon_clean = simon_data[simon_data['numero_compte'] != 'CL00201682']
print(f"\nAfter removing CL00201682:")
print(f"Quotes: {len(simon_clean):,}")
print(f"Conversion rate: {simon_clean['fg_devis_accepte'].mean():.1%}")

# Check his other customers
print(f"\nSimon's top 10 customers (excluding CL00201682):")
other_customers = simon_clean['numero_compte'].value_counts().head(10)
for customer, count in other_customers.items():
    customer_quotes = simon_clean[simon_clean['numero_compte'] == customer]
    conv_rate = customer_quotes['fg_devis_accepte'].mean()
    products = customer_quotes['famille_equipement_produit'].unique()[:3]
    print(f"  {customer}: {count:2} quotes, {conv_rate:.1%} conversion, Products: {', '.join(products)}")

# Check timeline of CL00201682 quotes by Simon
cl00201682_data = df_dm[df_dm['numero_compte'] == 'CL00201682']
print(f"\nCL00201682 Timeline Analysis:")
print(f"First quote: {cl00201682_data['dt_creation_devis'].min().date()}")
print(f"Last quote: {cl00201682_data['dt_creation_devis'].max().date()}")
print(f"Duration: {(cl00201682_data['dt_creation_devis'].max() - cl00201682_data['dt_creation_devis'].min()).days} days")
print(f"Quotes per month: {len(cl00201682_data) / ((cl00201682_data['dt_creation_devis'].max() - cl00201682_data['dt_creation_devis'].min()).days / 30.5):.1f}")

# Check quote amounts
print(f"\nQuote Amount Analysis for CL00201682:")
print(f"Min amount: ‚Ç¨{cl00201682_data['mt_apres_remise_ht_devis'].min():,.0f}")
print(f"Max amount: ‚Ç¨{cl00201682_data['mt_apres_remise_ht_devis'].max():,.0f}")
print(f"Avg amount: ‚Ç¨{cl00201682_data['mt_apres_remise_ht_devis'].mean():,.0f}")
print(f"Number of unique amounts: {cl00201682_data['mt_apres_remise_ht_devis'].nunique()}")

Salesperson: Simon BIDEL
Total quotes created: 166
Unique customers: 49
Overall conversion rate: 8.4%

After removing CL00201682:
Quotes: 80
Conversion rate: 17.5%

Simon's top 10 customers (excluding CL00201682):
  CL00291534: 19 quotes, 0.0% conversion, Products: Pompe √† chaleur, Po√™le, Chaudi√®re
  CL00288691:  5 quotes, 0.0% conversion, Products: ECS : Chauffe-eau ou adoucisseur, Po√™le, Climatisation
  CL00206635:  4 quotes, 25.0% conversion, Products: Po√™le
  CL00307113:  3 quotes, 33.3% conversion, Products: Po√™le
  CL00276409:  2 quotes, 0.0% conversion, Products: Pompe √† chaleur
  CL00292093:  2 quotes, 0.0% conversion, Products: Photovolta√Øque
  CL00211199:  2 quotes, 50.0% conversion, Products: Photovolta√Øque
  CL00257955:  2 quotes, 0.0% conversion, Products: Photovolta√Øque
  CL00321097:  2 quotes, 0.0% conversion, Products: ECS : Chauffe-eau ou adoucisseur
  CL00239918:  1 quotes, 0.0% conversion, Products: Po√™le

CL00201682 Timeline Analysis:
First quote: 2023-05

In [34]:
print("="*80)
print("BUSINESS IMPACT ANALYSIS")
print("="*80)

# Calculate Simon's impact on overall metrics
total_company_quotes = len(df_dm)
total_company_conversion = df_dm['fg_devis_accepte'].mean()

# Remove ALL of Simon's quotes
company_without_simon = df_dm[df_dm['prenom_nom_commercial'] != 'Simon BIDEL']
company_conversion_without_simon = company_without_simon['fg_devis_accepte'].mean()

print(f"\n1. SIMON BIDEL'S IMPACT ON COMPANY METRICS:")
print(f"   Company conversion (with Simon): {total_company_conversion:.1%}")
print(f"   Company conversion (without Simon): {company_conversion_without_simon:.1%}")
print(f"   Simon lowers company conversion by: {(company_conversion_without_simon - total_company_conversion)*100:+.2f} percentage points")

# Calculate wasted effort
print(f"\n2. WASTED SALES EFFORT:")
simon_total_hours = 166 * 2  # Assuming 2 hours per quote
simon_successful_quotes = simon_data['fg_devis_accepte'].sum()
print(f"   Simon's estimated quote hours: {simon_total_hours:,} hours")
print(f"   Successful quotes: {simon_successful_quotes:.0f}")
print(f"   Hours per successful quote: {simon_total_hours/simon_successful_quotes if simon_successful_quotes > 0 else 'Infinite':.0f}")

# Compare with top performers
print(f"\n3. COMPARISON WITH TOP PERFORMERS:")
top_salespeople = df_dm.groupby('prenom_nom_commercial')['fg_devis_accepte'].mean().sort_values(ascending=False).head(3)
for i, (salesperson, rate) in enumerate(top_salespeople.items(), 1):
    quotes = len(df_dm[df_dm['prenom_nom_commercial'] == salesperson])
    print(f"   Top {i}: {salesperson:<20} {rate:.1%} conversion ({quotes:,} quotes)")

# Agency comparison
print(f"\n4. AGENCY PERFORMANCE IMPACT:")
agency_stats = df_dm.groupby('nom_agence').agg({
    'fg_devis_accepte': 'mean',
    'id_devis': 'count'
}).rename(columns={'id_devis': 'quote_count', 'fg_devis_accepte': 'conversion'}).sort_values('conversion', ascending=False)

conde_rank = agency_stats.index.get_loc('Agence Cond√© sur Vire') + 1
print(f"   Agence Cond√© sur Vire rank: {conde_rank}/{len(agency_stats)}")
print(f"   Without Simon BIDEL's quotes, their ranking would improve significantly")

# Calculate opportunity cost
print(f"\n5. OPPORTUNITY COST:")
avg_quote_value = df_dm['mt_apres_remise_ht_devis'].mean()
potential_revenue_lost = (company_conversion_without_simon - total_company_conversion) * total_company_quotes * avg_quote_value
print(f"   Avg quote value: ‚Ç¨{avg_quote_value:,.0f}")
print(f"   Potential revenue improvement: ‚Ç¨{potential_revenue_lost:,.0f}")
print(f"   (If all salespeople performed at company average without Simon's drag)")

BUSINESS IMPACT ANALYSIS

1. SIMON BIDEL'S IMPACT ON COMPANY METRICS:
   Company conversion (with Simon): 30.9%
   Company conversion (without Simon): 31.0%
   Simon lowers company conversion by: +0.10 percentage points

2. WASTED SALES EFFORT:
   Simon's estimated quote hours: 332 hours
   Successful quotes: 14
   Hours per successful quote: 24

3. COMPARISON WITH TOP PERFORMERS:
   Top 1: Alexis JARDIN        100.0% conversion (5 quotes)
   Top 2: Antoine COUPPEY      100.0% conversion (1 quotes)
   Top 3: Franck HERBERT       100.0% conversion (2 quotes)

4. AGENCY PERFORMANCE IMPACT:
   Agence Cond√© sur Vire rank: 22/23
   Without Simon BIDEL's quotes, their ranking would improve significantly

5. OPPORTUNITY COST:
   Avg quote value: ‚Ç¨7,024
   Potential revenue improvement: ‚Ç¨263,445
   (If all salespeople performed at company average without Simon's drag)


In [35]:
print("üöÄ IMMEDIATE ACTIONS - WEEK 1:")

print("1. COACH SIMON BIDEL:")
print("   ‚Ä¢ Review his 229-quote customer - what happened?")
print("   ‚Ä¢ Analyze why he can't close (training need identified)")
print("   ‚Ä¢ Pair with top performer from Sud Ouest region")

print("\n2. FIX THE QUOTE MILL:")
print("   ‚Ä¢ Implement rule: Max 3 quotes per customer without manager approval")
print("   ‚Ä¢ Require 'qualification checklist' before any quote")
print("   ‚Ä¢ Automatically flag >5 quotes with 0% conversion")

print("\n3. AGENCY SUPPORT:")
print("   ‚Ä¢ Agence Cond√© sur Vire needs management attention (rank 22/23)")
print("   ‚Ä¢ Share best practices from top agencies (Prigent Abiven: 71% conv)")
print("   ‚Ä¢ Consider reassigning Simon if no improvement in 30 days")

üöÄ IMMEDIATE ACTIONS - WEEK 1:
1. COACH SIMON BIDEL:
   ‚Ä¢ Review his 229-quote customer - what happened?
   ‚Ä¢ Analyze why he can't close (training need identified)
   ‚Ä¢ Pair with top performer from Sud Ouest region

2. FIX THE QUOTE MILL:
   ‚Ä¢ Implement rule: Max 3 quotes per customer without manager approval
   ‚Ä¢ Require 'qualification checklist' before any quote
   ‚Ä¢ Automatically flag >5 quotes with 0% conversion

3. AGENCY SUPPORT:
   ‚Ä¢ Agence Cond√© sur Vire needs management attention (rank 22/23)
   ‚Ä¢ Share best practices from top agencies (Prigent Abiven: 71% conv)
   ‚Ä¢ Consider reassigning Simon if no improvement in 30 days


In [36]:
print("\nüîß PROCESS IMPROVEMENTS - MONTH 1:")

print("1. QUOTE GOVERNANCE:")
print("   ‚Ä¢ Salesforce validation: 'Reason for quote' required field")
print("   ‚Ä¢ Auto-alert: >3 quotes to same customer in 30 days")
print("   ‚Ä¢ Quote scoring: Prioritize high-potential quotes")

print("\n2. SALES PERFORMANCE MANAGEMENT:")
print("   ‚Ä¢ Weekly dashboard: Conversion rate by salesperson")
print("   ‚Ä¢ Monthly review: Underperformers (<20% conversion)")
print("   ‚Ä¢ Incentives: Reward conversion rate, not just quote volume")

print("\n3. PRODUCT FOCUS:")
print("   ‚Ä¢ Train sales on high-conversion products (Heat pumps: 34%)")
print("   ‚Ä¢ Bundle low-conversion products with high-conversion ones")
print("   ‚Ä¢ Price optimization: Low-price quotes convert at 40%")


üîß PROCESS IMPROVEMENTS - MONTH 1:
1. QUOTE GOVERNANCE:
   ‚Ä¢ Salesforce validation: 'Reason for quote' required field
   ‚Ä¢ Auto-alert: >3 quotes to same customer in 30 days
   ‚Ä¢ Quote scoring: Prioritize high-potential quotes

2. SALES PERFORMANCE MANAGEMENT:
   ‚Ä¢ Weekly dashboard: Conversion rate by salesperson
   ‚Ä¢ Monthly review: Underperformers (<20% conversion)
   ‚Ä¢ Incentives: Reward conversion rate, not just quote volume

3. PRODUCT FOCUS:
   ‚Ä¢ Train sales on high-conversion products (Heat pumps: 34%)
   ‚Ä¢ Bundle low-conversion products with high-conversion ones
   ‚Ä¢ Price optimization: Low-price quotes convert at 40%


In [37]:
print("\nüéØ STRATEGIC OPTIMIZATION - QUARTER 1:")

print("1. PREDICTIVE ANALYTICS:")
print("   ‚Ä¢ Quote scoring algorithm to prioritize high-potential leads")
print("   ‚Ä¢ Churn prediction: Identify likely-to-refuse quotes early")
print("   ‚Ä¢ Dynamic pricing recommendations")

print("\n2. REGIONAL EXPANSION:")
print("   ‚Ä¢ Replicate Sud Ouest success (67% conversion) in other regions")
print("   ‚Ä¢ Focus on high-potential regions with current low conversion")
print("   ‚Ä¢ Regional pricing strategies based on conversion data")

print("\n3. CUSTOMER SEGMENTATION:")
print("   ‚Ä¢ Identify high-value customer profiles")
print("   ‚Ä¢ Targeted marketing to high-conversion segments")
print("   ‚Ä¢ Customer lifetime value optimization")


üéØ STRATEGIC OPTIMIZATION - QUARTER 1:
1. PREDICTIVE ANALYTICS:
   ‚Ä¢ Quote scoring algorithm to prioritize high-potential leads
   ‚Ä¢ Churn prediction: Identify likely-to-refuse quotes early
   ‚Ä¢ Dynamic pricing recommendations

2. REGIONAL EXPANSION:
   ‚Ä¢ Replicate Sud Ouest success (67% conversion) in other regions
   ‚Ä¢ Focus on high-potential regions with current low conversion
   ‚Ä¢ Regional pricing strategies based on conversion data

3. CUSTOMER SEGMENTATION:
   ‚Ä¢ Identify high-value customer profiles
   ‚Ä¢ Targeted marketing to high-conversion segments
   ‚Ä¢ Customer lifetime value optimization


In [38]:
print("\n" + "="*80)
print("QUANTIFIED BUSINESS OPPORTUNITIES")
print("="*80)

# Calculate potential improvements
current_conversion = 0.309
current_quotes_per_year = 20000
current_sales = current_quotes_per_year * current_conversion
avg_sale_value = 7024

improvement_scenarios = {
    "Fix Simon BIDEL": 0.0010,  # 0.1 percentage points
    "Improve worst agency to average": 0.0020,
    "Focus on high-conversion products": 0.0050,
    "Replicate top region practices": 0.0100,
    "Price optimization": 0.0150
}

print("\nPOTENTIAL REVENUE IMPACT:")
cumulative_improvement = 0
for scenario, improvement in improvement_scenarios.items():
    cumulative_improvement += improvement
    additional_sales = current_quotes_per_year * improvement
    additional_revenue = additional_sales * avg_sale_value
    print(f"‚Ä¢ {scenario:<35} +‚Ç¨{additional_revenue:,.0f}/year")

total_potential = current_quotes_per_year * sum(improvement_scenarios.values()) * avg_sale_value
print(f"\nTOTAL POTENTIAL: +‚Ç¨{total_potential:,.0f}/year revenue")
print(f"               +{sum(improvement_scenarios.values())*100:.1f} percentage points conversion")


QUANTIFIED BUSINESS OPPORTUNITIES

POTENTIAL REVENUE IMPACT:
‚Ä¢ Fix Simon BIDEL                     +‚Ç¨140,480/year
‚Ä¢ Improve worst agency to average     +‚Ç¨280,960/year
‚Ä¢ Focus on high-conversion products   +‚Ç¨702,400/year
‚Ä¢ Replicate top region practices      +‚Ç¨1,404,800/year
‚Ä¢ Price optimization                  +‚Ç¨2,107,200/year

TOTAL POTENTIAL: +‚Ç¨4,635,840/year revenue
               +3.3 percentage points conversion
