In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

from etl.util import prepare_dataset_without_leakage
from ml_features.features import prepare_features
from ml_features.customer_features import create_customer_features
from ml_features.sequence_features import create_sequence_features
from ml_training.train_rf import train_rf
from ml_evaluation.dashboard import model_evaluation_report

# ============ ONLY ADD THESE 3 WANDB LINES ============
import wandb
# Initialize with minimal config to avoid empty charts
wandb.init(project="france-hvac", name="clean-run")
# ======================================================

import warnings
warnings.filterwarnings('ignore')

# Load data
df_quotes = pd.read_csv('cleaned_quote_data.csv')
df_quotes['dt_creation_devis'] = pd.to_datetime(df_quotes['dt_creation_devis'])

print(f"\nüìä Original quote data: {len(df_quotes):,} quotes")

# Create features
enhanced_customers = create_customer_features(df_quotes)
sequence_df = create_sequence_features(df_quotes, window_days=90)
sequence_df = pd.merge(sequence_df, enhanced_customers, on='numero_compte', how='left')

X_customer = enhanced_customers.drop(columns=['numero_compte', 'converted'], errors='ignore')
y_customer = enhanced_customers['converted']
X_customer_clean, y_customer_clean = prepare_features(X_customer, y_customer, "Customer Features")

X_sequence = sequence_df.drop(columns=['numero_compte', 'current_converted', 'current_product_family', 'converted'], errors='ignore')
y_sequence = sequence_df['current_converted']
X_sequence_clean, y_sequence_clean = prepare_features(X_sequence, y_sequence, "Sequence Features")

print("\n" + "="*80)
print("MODEL TRAINING WITH PROPER VISUALIZATIONS")
print("="*80)

# Function to create and log ROC curve
def plot_and_log_roc(y_true, y_pred_proba, model_name):
    """Create ROC curve and log to wandb"""
    fpr, tpr, thresholds = roc_curve(y_true, y_pred_proba)
    auc = roc_auc_score(y_true, y_pred_proba)
    
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {auc:.3f})')
    ax.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title(f'ROC Curve - {model_name}')
    ax.legend(loc="lower right")
    ax.grid(True, alpha=0.3)
    
    # Log to wandb
    wandb.log({f"{model_name}/roc_curve": wandb.Image(fig)})
    plt.close(fig)
    
    return auc

# Function to create and log confusion matrix
def plot_and_log_confusion_matrix(y_true, y_pred, model_name):
    """Create confusion matrix and log to wandb"""
    cm = confusion_matrix(y_true, y_pred)
    
    fig, ax = plt.subplots(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax,
                xticklabels=['No Convert', 'Convert'],
                yticklabels=['No Convert', 'Convert'])
    ax.set_xlabel('Predicted')
    ax.set_ylabel('Actual')
    ax.set_title(f'Confusion Matrix - {model_name}')
    
    # Log to wandb
    wandb.log({f"{model_name}/confusion_matrix": wandb.Image(fig)})
    plt.close(fig)
    
    return cm

# Test 1: Customer features
print("\nüß™ CUSTOMER FEATURES MODEL")
result_customer = train_rf(X_customer_clean, y_customer_clean, 'customer_model')

# Create predictions for visualizations
X_train, X_test, y_train, y_test = train_test_split(
    X_customer_clean, y_customer_clean, test_size=0.2, random_state=42
)
y_pred = result_customer['model'].predict(X_test)
y_pred_proba = result_customer['model'].predict_proba(X_test)[:, 1]

# Log ROC and Confusion Matrix for customer model
auc_customer = plot_and_log_roc(y_test, y_pred_proba, "customer_model")
plot_and_log_confusion_matrix(y_test, y_pred, "customer_model")

# Log single metric (clean, no automatic charts)
wandb.log({
    "customer_auc": auc_customer,
    "customer_accuracy": accuracy_score(y_test, y_pred),
    "customer_f1": f1_score(y_test, y_pred, zero_division=0)
})

print(f"  ‚úì Customer AUC: {auc_customer:.3f}")
print(f"  ‚úì ROC & Confusion Matrix logged to wandb")

# Test 2: Sequence features
print("\nüß™ SEQUENCE FEATURES MODEL")
result_sequence = train_rf(X_sequence_clean, y_sequence_clean, 'sequence_model')

# Create predictions for sequence model
X_train_seq, X_test_seq, y_train_seq, y_test_seq = train_test_split(
    X_sequence_clean, y_sequence_clean, test_size=0.2, random_state=42
)
y_pred_seq = result_sequence['model'].predict(X_test_seq)
y_pred_proba_seq = result_sequence['model'].predict_proba(X_test_seq)[:, 1]

# Log ROC and Confusion Matrix for sequence model
auc_sequence = plot_and_log_roc(y_test_seq, y_pred_proba_seq, "sequence_model")
plot_and_log_confusion_matrix(y_test_seq, y_pred_seq, "sequence_model")

# Log single metric
wandb.log({
    "sequence_auc": auc_sequence,
    "sequence_accuracy": accuracy_score(y_test_seq, y_pred_seq),
    "sequence_f1": f1_score(y_test_seq, y_pred_seq, zero_division=0)
})

print(f"  ‚úì Sequence AUC: {auc_sequence:.3f}")
print(f"  ‚úì ROC & Confusion Matrix logged to wandb")

# Create comparison bar chart (only one extra chart)
print("\nüìä CREATING MODEL COMPARISON")
fig, ax = plt.subplots(figsize=(8, 6))
models = ['Customer', 'Sequence']
aucs = [auc_customer, auc_sequence]

bars = ax.bar(models, aucs, color=['skyblue', 'lightcoral'])
ax.set_ylabel('AUC Score')
ax.set_title('Model AUC Comparison')
ax.set_ylim([0, 1])

# Add value labels on bars
for bar, auc in zip(bars, aucs):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{auc:.3f}', ha='center', va='bottom')

# Log comparison chart
wandb.log({"model_comparison": wandb.Image(fig)})
plt.close(fig)

# Log final summary
wandb.run.summary["best_model"] = "Sequence" if auc_sequence > auc_customer else "Customer"
wandb.run.summary["best_auc"] = max(auc_customer, auc_sequence)
wandb.run.summary["improvement"] = auc_sequence - auc_customer

print("\n" + "="*80)
print("RESULTS SUMMARY")
print("="*80)
print(f"Customer Model AUC: {auc_customer:.3f}")
print(f"Sequence Model AUC: {auc_sequence:.3f}")
print(f"Improvement: {auc_sequence - auc_customer:.3f}")

# Clean finish
wandb.finish()
print(f"\n‚úÖ Clean wandb run complete!")
print(f"üìä Check: wandb.ai/your-username/france-hvac")

[34m[1mwandb[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from /home/valeriya/.netrc.
[34m[1mwandb[0m: Currently logged in as: [33mvslovik[0m ([33mhomeserve[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin



üìä Original quote data: 38,333 quotes
Creating enhanced customer features...
  Total customers: 25,930
‚úì Created features for 25,930 customers
‚úì New features: ['numero_compte', 'total_quotes', 'converted', 'avg_days_between_quotes', 'std_days_between_quotes', 'max_days_between_quotes', 'engagement_density', 'price_trajectory', 'unique_product_families', 'product_consistency']...
Creating sequence features (this may take a moment)...
  Total customers: 25,930
‚úì Created 10,723 sequence observations
‚úì Features include: recent patterns leading up to each quote

üîß ENCODING & PREPARING FOR MODELING...
  Preparing Customer Features...
  Features: 14, Samples: 25930

üîß ENCODING & PREPARING FOR MODELING...
  Preparing Sequence Features...
  Features: 22, Samples: 10723

MODEL TRAINING WITH PROPER VISUALIZATIONS

üß™ CUSTOMER FEATURES MODEL
‚úì Model saved: customer_model.pkl
‚úì AUC: 0.675
  ‚úì Customer AUC: 0.680
  ‚úì ROC & Confusion Matrix logged to wandb

üß™ SEQUENCE FE

0,1
customer_accuracy,‚ñÅ
customer_auc,‚ñÅ
customer_f1,‚ñÅ
sequence_accuracy,‚ñÅ
sequence_auc,‚ñÅ
sequence_f1,‚ñÅ

0,1
best_auc,0.786
best_model,Sequence
customer_accuracy,0.63941
customer_auc,0.67981
customer_f1,0.55855
improvement,0.10619
sequence_accuracy,0.74172
sequence_auc,0.786
sequence_f1,0.55821



‚úÖ Clean wandb run complete!
üìä Check: wandb.ai/your-username/france-hvac
