In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

from etl.util import prepare_dataset_without_leakage
from ml_features.features import prepare_features
from ml_features.customer_features import create_customer_features
from ml_features.sequence_features import create_sequence_features
from ml_training.train_rf import train_rf
from ml_evaluation.dashboard import model_evaluation_report
import warnings
warnings.filterwarnings('ignore')


import wandb
import os

In [2]:
global wandb_run
wandb_run = wandb.init(
    project="france-hvac",
    name="feature_engineering",
    config={"note": "Clean run"},
    # Only use settings that actually exist
    settings=wandb.Settings(
        _disable_stats=True,   # This one is confirmed to work
        silent=True           # Reduce output
    )
)

run_id = wandb_run.id
print(f"Run ID: {run_id}")



# run = wandb.init(
#     project="france-hvac",
#     name="feature_engineering",
#     config={
#         "_wandb": {
#             "python": os.environ['WANDB_PYTHON'],
#         }
#     },
#     settings=wandb.Settings(
#         _executable=os.environ['WANDB_PYTHON'],
#         _disable_meta=False,  # Keep meta but override values
#         disable_git=False,
#         silent=False
#     )
# )

# ======================================================

[34m[1mwandb[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from /home/valeriya/.netrc.


Run ID: 6u5az6pc


In [3]:
# Load data
df_quotes = pd.read_csv('cleaned_quote_data.csv')
df_quotes['dt_creation_devis'] = pd.to_datetime(df_quotes['dt_creation_devis'])

print(f"\nüìä Original quote data: {len(df_quotes):,} quotes")

# Create features
enhanced_customers = create_customer_features(df_quotes)
sequence_df = create_sequence_features(df_quotes, window_days=90)
sequence_df = pd.merge(sequence_df, enhanced_customers, on='numero_compte', how='left')

X_customer = enhanced_customers.drop(columns=['numero_compte', 'converted'], errors='ignore')
y_customer = enhanced_customers['converted']
X_customer_clean, y_customer_clean = prepare_features(X_customer, y_customer, "Customer Features")

X_sequence = sequence_df.drop(columns=['numero_compte', 'current_converted', 'current_product_family', 'converted'], errors='ignore')
y_sequence = sequence_df['current_converted']
X_sequence_clean, y_sequence_clean = prepare_features(X_sequence, y_sequence, "Sequence Features")


üìä Original quote data: 38,333 quotes
Creating enhanced customer features...
  Total customers: 25,930
‚úì Created features for 25,930 customers
‚úì New features: ['numero_compte', 'total_quotes', 'converted', 'avg_days_between_quotes', 'std_days_between_quotes', 'max_days_between_quotes', 'engagement_density', 'price_trajectory', 'unique_product_families', 'product_consistency']...
Creating sequence features (this may take a moment)...
  Total customers: 25,930
‚úì Created 10,723 sequence observations
‚úì Features include: recent patterns leading up to each quote

üîß ENCODING & PREPARING FOR MODELING...
  Preparing Customer Features...
  Features: 14, Samples: 25930

üîß ENCODING & PREPARING FOR MODELING...
  Preparing Sequence Features...
  Features: 22, Samples: 10723


In [4]:
print("\n" + "="*80)
print("MODEL TRAINING WITH PROPER VISUALIZATIONS")
print("="*80)

# Function to create and log ROC curve
def plot_and_log_roc(y_true, y_pred_proba, model_name):
    """Create ROC curve and log to wandb"""
    fpr, tpr, thresholds = roc_curve(y_true, y_pred_proba)
    auc = roc_auc_score(y_true, y_pred_proba)
    
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {auc:.3f})')
    ax.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title(f'ROC Curve - {model_name}')
    ax.legend(loc="lower right")
    ax.grid(True, alpha=0.3)
    
    # Log to wandb
    wandb.log({f"{model_name}/roc_curve": wandb.Image(fig)})
    plt.close(fig)
    
    return auc

# Function to create and log confusion matrix
def plot_and_log_confusion_matrix(y_true, y_pred, model_name):
    """Create confusion matrix and log to wandb"""
    cm = confusion_matrix(y_true, y_pred)
    
    fig, ax = plt.subplots(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax,
                xticklabels=['No Convert', 'Convert'],
                yticklabels=['No Convert', 'Convert'])
    ax.set_xlabel('Predicted')
    ax.set_ylabel('Actual')
    ax.set_title(f'Confusion Matrix - {model_name}')
    
    # Log to wandb
    plt.close(fig)
    
    return cm

# Test 1: Customer features
print("\nüß™ CUSTOMER FEATURES MODEL")
result_customer = train_rf(X_customer_clean, y_customer_clean, 'customer_model')

# Create predictions for visualizations
X_train, X_test, y_train, y_test = train_test_split(
    X_customer_clean, y_customer_clean, test_size=0.2, random_state=42
)
y_pred = result_customer['model'].predict(X_test)


MODEL TRAINING WITH PROPER VISUALIZATIONS

üß™ CUSTOMER FEATURES MODEL
‚úì Model saved: customer_model.pkl
‚úì AUC: 0.675


In [5]:
y_pred_proba = result_customer['model'].predict_proba(X_test)[:, 1]

In [7]:
# import wandb
# import plotly.graph_objects as go

# print(run_id)
# wandb.init(project="france-hvac", id=run_id, resume="allow")

# # Create figure with FIXED height
# fig = go.Figure()

# # Add your ROC curve
# fig.add_trace(go.Scatter(x=fpr, y=tpr, mode='lines'))

# # CRITICAL: Set exact figure dimensions
# fig.update_layout(
#     height=500,  # Fixed height in pixels - prevents scroll
#     width=800,   # Fixed width
#     autosize=False  # Disable auto-sizing
# )

# # Log to wandb
# wandb.log({"roc_fixed": fig})

In [8]:
# import wandb
# import plotly.io as pio
# print(run_id)
# wandb.init(project="france-hvac", id=run_id, resume="allow")

# # Create figure with your preferred size
# fig = go.Figure()
# # ... add traces ...

# # Convert to HTML with explicit size
# html = pio.to_html(
#     fig, 
#     full_html=False,
#     include_plotlyjs='cdn',
#     default_width='900px',   # ‚Üê Controls display width
#     default_height='650px'   # ‚Üê Controls display height
# )

# # Wrap in a div for more control
# wrapped_html = f'''
# <div style="
#     width: 100%;
#     max-width: 1000px;
#     margin: 0 auto;
#     padding: 20px;
#     background: white;
#     border-radius: 10px;
#     box-shadow: 0 4px 6px rgba(0,0,0,0.1);
# ">
#     {html}
# </div>
# '''

# wandb.log({
#     "roc_custom": wandb.Html(wrapped_html, inject=False)
# })

In [9]:
# import wandb
# import plotly.graph_objects as go
# import plotly.io as pio

# print(run_id)
# wandb.init(project="france-hvac", id=run_id, resume="allow")

# # Create Plotly figure
# fig = go.Figure()

# # Add model ROC
# fig.add_trace(go.Scatter(
#     x=fpr,
#     y=tpr,
#     mode='lines',
#     name=f'Model (AUC={roc_auc:.3f})',
#     line=dict(color='blue', width=3)
# ))

# # Add random classifier
# fig.add_trace(go.Scatter(
#     x=[0, 1],
#     y=[0, 1],
#     mode='lines',
#     name='Random (AUC=0.5)',
#     line=dict(color='gray', dash='dash', width=2)
# ))

# # Customize
# # fig.update_layout(
# #     title=f"ROC Curve",
# #     xaxis_title="False Positive Rate",
# #     yaxis_title="True Positive Rate",
# #     width=800,
# #     height=600,
# #     showlegend=True
# # )

# fig.update_layout(
#     # Add border around entire figure
#     paper_bgcolor='white',
    
#     # Add shadow effect
#     shapes=[
#         dict(
#             type="rect",
#             xref="paper",
#             yref="paper",
#             x0=0,
#             y0=0,
#             x1=1,
#             y1=1,
#             line=dict(color="black", width=2),
#             fillcolor="rgba(0,0,0,0)"
#         )
#     ],
    
#     # Customize grid
#     xaxis=dict(
#         gridcolor='lightgray',
#         griddash='dot',  # Dotted grid lines
#         gridwidth=0.5,
#         showline=True,
#         linecolor='black',
#         linewidth=2
#     ),
#     yaxis=dict(
#         gridcolor='lightgray',
#         griddash='dot',
#         gridwidth=0.5,
#         showline=True,
#         linecolor='black', 
#         linewidth=2
#     )
# )

# # Log as Plotly - NO TABLE!
# wandb.log({"roc_plotly": fig})

In [None]:
import wandb
import plotly.graph_objects as go
import plotly.io as pio
from sklearn.metrics import roc_curve, auc

# Initialize wandb
print(run_id)
wandb.init(project="france-hvac", id=run_id, resume="allow")

# Calculate ROC
fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)

# ============ CREATE FULLY CUSTOMIZED PLOT ============
fig = go.Figure()

# 1. Model ROC curve with custom styling
fig.add_trace(go.Scatter(
    x=fpr,
    y=tpr,
    mode='lines',  # 'lines', 'lines+markers', 'markers'
    name=f'Model (AUC = {roc_auc:.3f})',
    line=dict(
        color='#FF6B6B',  # Hex color
        width=4,          # Line thickness
        dash='solid'      # 'solid', 'dash', 'dot', 'dashdot'
    ),
    hovertemplate=(
        'FPR: %{x:.3f}<br>' +
        'TPR: %{y:.3f}<br>' +
        '<extra></extra>'  # Hides trace name from hover
    ),
    showlegend=True
))

# 2. Random classifier with custom styling
fig.add_trace(go.Scatter(
    x=[0, 1],
    y=[0, 1],
    mode='lines',
    name='Random (AUC = 0.500)',
    line=dict(
        color='#4ECDC4',
        width=3,
        dash='dash'
    ),
    showlegend=True
))

# 3. Optional: Perfect classifier
fig.add_trace(go.Scatter(
    x=[0, 0, 1],
    y=[0, 1, 1],
    mode='lines',
    name='Perfect (AUC = 1.000)',
    line=dict(
        color='#45B7D1',
        width=2,
        dash='dot'
    ),
    showlegend=True
))

# ============ CUSTOMIZE LAYOUT ============
fig.update_layout(
    # Size
    width=900,   # pixels
    height=700,
    
    # Title
    title=dict(
        text="<b>ROC Curve Analysis</b>",
        font=dict(
            family="Arial",
            size=24,
            color="#2C3E50"
        ),
        x=0.5,  # Center title
        xanchor="center"
    ),
    
    # Axes
    xaxis=dict(
        title=dict(
            text="<b>False Positive Rate</b>",
            font=dict(size=16, color="#34495E")
        ),
        range=[-0.02, 1.02],  # Extend slightly beyond 0-1
        gridcolor='lightgray',
        gridwidth=1,
        zerolinecolor='gray',
        showline=True,
        linewidth=2,
        linecolor='black',
        tickfont=dict(size=12)
    ),
    
    yaxis=dict(
        title=dict(
            text="<b>True Positive Rate</b>",
            font=dict(size=16, color="#34495E")
        ),
        range=[-0.02, 1.02],
        gridcolor='lightgray',
        gridwidth=1,
        zerolinecolor='gray',
        showline=True,
        linewidth=2,
        linecolor='black',
        tickfont=dict(size=12)
    ),
    
    # Legend
    legend=dict(
        x=0.02,  # Left position
        y=0.98,  # Top position
        bgcolor='rgba(255, 255, 255, 0.8)',
        bordercolor='gray',
        borderwidth=1,
        font=dict(size=14)
    ),
    
    # Background
    plot_bgcolor='white',
    paper_bgcolor='white',
    
    # Margins
    margin=dict(l=80, r=80, t=100, b=80),
    
    # Hover behavior
    hovermode='x unified',  # 'x', 'y', 'closest', False
    
    # Shapes (add reference areas)
    shapes=[
        # Good performance area
        dict(
            type="rect",
            xref="x", yref="y",
            x0=0, x1=0.2,
            y0=0.8, y1=1,
            fillcolor="rgba(0, 255, 0, 0.1)",
            line=dict(width=0),
            layer="below"
        ),
        # Random area
        dict(
            type="rect",
            xref="x", yref="y",
            x0=0, x1=1,
            y0=0, y1=1,
            fillcolor="rgba(128, 128, 128, 0.05)",
            line=dict(width=0),
            layer="below"
        )
    ],
    
    # Annotations (add text)
    annotations=[
        dict(
            x=0.6,
            y=0.3,
            text=f"<b>AUC = {roc_auc:.3f}</b>",
            showarrow=True,
            arrowhead=2,
            ax=50,
            ay=-40,
            font=dict(size=16, color="#FF6B6B"),
            bgcolor="white",
            bordercolor="#FF6B6B",
            borderwidth=2,
            borderpad=4
        )
    ]
)

# ============ CUSTOMIZE MODEBAR CONTROLS ============
# Show/hide specific controls
fig.update_layout(
    modebar=dict(
        # Remove buttons you don't want
        remove=[
            'lasso2d', 
            'select2d',
            'hoverClosestCartesian',
            'hoverCompareCartesian',
            'toggleSpikelines'
        ],
        # Keep only these buttons
        add=[
            'drawline',
            'drawopenpath',
            'drawclosedpath',
            'drawcircle',
            'drawrect',
            'eraseshape'
        ]
    )
)

# ============ ADD CUSTOM BUTTONS ============
fig.update_layout(
    updatemenus=[
        dict(
            type="buttons",
            direction="right",
            x=0.95,
            y=1.15,
            showactive=True,
            buttons=list([
                dict(
                    label="Log Scale",
                    method="update",
                    args=[{"visible": [True, True, True]},
                          {"yaxis": {"type": "log"}}]
                ),
                dict(
                    label="Linear Scale",
                    method="update",
                    args=[{"visible": [True, True, True]},
                          {"yaxis": {"type": "linear"}}]
                ),
                dict(
                    label="Hide Perfect",
                    method="update",
                    args=[{"visible": [True, True, False]},
                          {"title": "ROC Curve (Perfect Hidden)"}]
                )
            ]),
        )
    ]
)

# ============ LOG TO WANDB ============
wandb.log({
    "plots/roc": fig,
    # "roc_auc": roc_auc
})

print("‚úÖ Custom Plotly graph logged to wandb")

In [None]:
import wandb
import pandas as pd
import json
from sklearn.metrics import roc_curve, auc
wandb.init(project="france-hvac", id=run_id, resume="allow")
# Calculate ROC
fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)

# Create DataFrame
roc_df = pd.DataFrame({
    'False Positive Rate': fpr,
    'True Positive Rate': tpr
})

# Create wandb Table
table = wandb.Table(dataframe=roc_df)

# Define Vega spec as a dictionary
vega_spec = {
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "description": "ROC Curve",
    "width": "container",
    "height": 400,
    "data": {"name": "table"},
    "mark": {
        "type": "line",
        "point": False,
        "tooltip": True
    },
    "encoding": {
        "x": {
            "field": "False Positive Rate",
            "type": "quantitative",
            "title": "False Positive Rate",
            "scale": {"domain": [0, 1]}
        },
        "y": {
            "field": "True Positive Rate", 
            "type": "quantitative",
            "title": "True Positive Rate",
            "scale": {"domain": [0, 1]}
        }
    }
}

# Log with correct API
wandb.log({
    "roc_vega": wandb.plot_table(
        "roc_vega",           # Key name
        table,                # Data table
        vega_spec             # Vega specification
    ),
    "auc": roc_auc
})

In [23]:
from sklearn.metrics import confusion_matrix
wandb.init(project="france-hvac", id=run_id, resume="allow")
# Calculate confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Prepare data
cm_data = []
for i in range(2):
    for j in range(2):
        cm_data.append({
            "actual": f"Actual {i}",
            "predicted": f"Predicted {j}",
            "count": cm[i, j]
        })

cm_df = pd.DataFrame(cm_data)
cm_table = wandb.Table(dataframe=cm_df)

# Vega spec for heatmap
cm_vega_spec = {
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "width": 400,
    "height": 350,
    "data": {"name": "table"},
    "mark": "rect",
    "encoding": {
        "x": {
            "field": "predicted",
            "type": "nominal",
            "title": "Predicted"
        },
        "y": {
            "field": "actual",
            "type": "nominal",
            "title": "Actual"
        },
        "color": {
            "field": "count",
            "type": "quantitative",
            "scale": {"scheme": "blues"}
        }
    }
}

wandb.log({
    "confusion_matrix": wandb.plot_table("confusion_matrix", cm_table, cm_vega_spec)
})

0,1
auc,‚ñÅ

0,1
auc,0.67981


In [24]:
import wandb
import pandas as pd
import numpy as np
from sklearn.metrics import roc_curve, auc, confusion_matrix
wandb.init(project="france-hvac", id=run_id, resume="allow")
def log_combined_visualizations(y_true, y_pred, y_pred_proba, model_name=""):
    """Log ROC and Confusion Matrix to wandb"""
    
    # Calculate metrics
    fpr, tpr, _ = roc_curve(y_true, y_pred_proba)
    roc_auc = auc(fpr, tpr)
    cm = confusion_matrix(y_true, y_pred)
    
    # ===== ROC CURVE =====
    roc_data = []
    for fp, tp in zip(fpr, tpr):
        roc_data.append({"fpr": fp, "tpr": tp, "series": f"Model (AUC={roc_auc:.3f})"})
    roc_data.append({"fpr": 0, "tpr": 0, "series": "Random (AUC=0.500)"})
    roc_data.append({"fpr": 1, "tpr": 1, "series": "Random (AUC=0.500)"})
    
    roc_table = wandb.Table(dataframe=pd.DataFrame(roc_data))
    roc_plot = wandb.plot.line(
        roc_table,
        x="fpr",
        y="tpr",
        stroke="series",
        title=f"{model_name} ROC Curve"
    )
    
    # ===== CONFUSION MATRIX =====
    cm_data = []
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            cm_data.append({
                "actual": f"Actual {i}",
                "predicted": f"Predicted {j}",
                "count": cm[i, j]
            })
    
    cm_table = wandb.Table(dataframe=pd.DataFrame(cm_data))
    
    # Try different plot methods
    try:
        # Method 1: line_series for simple heatmap effect
        cm_plot = wandb.plot.line_series(
            xs=[[0, 0, 1, 1], [0, 1, 0, 1]],  # X positions
            ys=[[0, 1, 0, 1], [0, 0, 1, 1]],  # Y positions
            keys=["TN", "FP", "FN", "TP"],     # Labels
            title="Confusion Matrix",
            xname="Predicted"
        )
    except:
        # Method 2: Table with custom rendering
        cm_plot = cm_table
    
    # Log everything
    wandb.log({
        f"{model_name}_roc": roc_plot,
        f"{model_name}_cm": cm_plot,
        f"{model_name}_auc": roc_auc,
        f"{model_name}_accuracy": (cm[0,0] + cm[1,1]) / cm.sum()
    })
    
    return roc_auc

# Usage
log_combined_visualizations(y_test, y_pred, y_pred_proba, "customer_model")

0,1
auc,0.67981


0.6798052050347146

In [None]:
# def plot_multiclass_roc_cm(y_true, y_pred, y_pred_proba, class_names):
#     """For multiclass problems"""
#     from sklearn.metrics import roc_curve, auc, confusion_matrix
#     import plotly.express as px
    
#     n_classes = len(class_names)
    
#     fig = make_subplots(
#         rows=1, cols=2,
#         subplot_titles=('ROC Curves', 'Confusion Matrix'),
#         specs=[[{'type': 'scatter'}, {'type': 'heatmap'}]]
#     )
    
#     # ROC curves for each class
#     for i, class_name in enumerate(class_names):
#         fpr, tpr, _ = roc_curve(y_true == i, y_pred_proba[:, i])
#         roc_auc = auc(fpr, tpr)
        
#         fig.add_trace(
#             go.Scatter(
#                 x=fpr, y=tpr,
#                 name=f'{class_name} (AUC={roc_auc:.2f})',
#                 mode='lines'
#             ),
#             row=1, col=1
#         )
    
#     # Add diagonal
#     fig.add_trace(
#         go.Scatter(x=[0, 1], y=[0, 1], mode='lines', 
#                   name='Random', line=dict(dash='dash', color='gray')),
#         row=1, col=1
#     )
    
#     # Confusion Matrix
#     cm = confusion_matrix(y_true, y_pred)
#     fig.add_trace(
#         go.Heatmap(
#             z=cm,
#             x=class_names,
#             y=class_names,
#             text=cm,
#             texttemplate='%{text}',
#             colorscale='Blues'
#         ),
#         row=1, col=2
#     )
    
#     fig.update_layout(width=1000, height=500)
    
#     return fig

In [14]:

from plotly.subplots import make_subplots
wandb.init(project="france-hvac", id=run_id, resume="allow")

cm = confusion_matrix(y_test, y_pred)

# Minimal combined plot
fig = make_subplots(rows=1, cols=2)

# ROC
fig.add_trace(go.Scatter(x=fpr, y=tpr, name='ROC'), row=1, col=1)
fig.add_trace(go.Scatter(x=[0,1], y=[0,1], name='Random', line=dict(dash='dash')), row=1, col=1)

# Confusion Matrix
fig.add_trace(go.Heatmap(z=cm, x=['0','1'], y=['0','1'], text=cm, texttemplate='%{text}'), row=1, col=2)

fig.update_layout(width=900, height=400)
wandb.log({"simple_combined": fig})

In [15]:
# Log ROC and Confusion Matrix for customer model
auc_customer = plot_and_log_roc(y_test, y_pred_proba, "customer_model")
plot_and_log_confusion_matrix(y_test, y_pred, "customer_model")

# Log single metric (clean, no automatic charts)
# wandb.log({
#     "customer_auc": auc_customer,
#     "customer_accuracy": accuracy_score(y_test, y_pred),
#     "customer_f1": f1_score(y_test, y_pred, zero_division=0)
# })

print(f"  ‚úì Customer AUC: {auc_customer:.3f}")
print(f"  ‚úì ROC & Confusion Matrix logged to wandb")

# Test 2: Sequence features
print("\nüß™ SEQUENCE FEATURES MODEL")
result_sequence = train_rf(X_sequence_clean, y_sequence_clean, 'sequence_model')

# Create predictions for sequence model
X_train_seq, X_test_seq, y_train_seq, y_test_seq = train_test_split(
    X_sequence_clean, y_sequence_clean, test_size=0.2, random_state=42
)
y_pred_seq = result_sequence['model'].predict(X_test_seq)
y_pred_proba_seq = result_sequence['model'].predict_proba(X_test_seq)[:, 1]

# Log ROC and Confusion Matrix for sequence model
auc_sequence = plot_and_log_roc(y_test_seq, y_pred_proba_seq, "sequence_model")
plot_and_log_confusion_matrix(y_test_seq, y_pred_seq, "sequence_model")

# # Log single metric
# wandb.log({
#     "sequence_auc": auc_sequence,
#     "sequence_accuracy": accuracy_score(y_test_seq, y_pred_seq),
#     "sequence_f1": f1_score(y_test_seq, y_pred_seq, zero_division=0)
# })

print(f"  ‚úì Sequence AUC: {auc_sequence:.3f}")
print(f"  ‚úì ROC & Confusion Matrix logged to wandb")

# Create comparison bar chart (only one extra chart)
print("\nüìä CREATING MODEL COMPARISON")
fig, ax = plt.subplots(figsize=(8, 6))
models = ['Customer', 'Sequence']
aucs = [auc_customer, auc_sequence]

bars = ax.bar(models, aucs, color=['skyblue', 'lightcoral'])
ax.set_ylabel('AUC Score')
ax.set_title('Model AUC Comparison')
ax.set_ylim([0, 1])

# Add value labels on bars
for bar, auc in zip(bars, aucs):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{auc:.3f}', ha='center', va='bottom')

# Log comparison chart
wandb.log({"model_comparison": wandb.Image(fig)})
plt.close(fig)

# # Log final summary
# wandb.run.summary["best_model"] = "Sequence" if auc_sequence > auc_customer else "Customer"
# wandb.run.summary["best_auc"] = max(auc_customer, auc_sequence)
# wandb.run.summary["improvement"] = auc_sequence - auc_customer

print("\n" + "="*80)
print("RESULTS SUMMARY")
print("="*80)
print(f"Customer Model AUC: {auc_customer:.3f}")
print(f"Sequence Model AUC: {auc_sequence:.3f}")
print(f"Improvement: {auc_sequence - auc_customer:.3f}")


  ‚úì Customer AUC: 0.680
  ‚úì ROC & Confusion Matrix logged to wandb

üß™ SEQUENCE FEATURES MODEL
‚úì Model saved: sequence_model.pkl
‚úì AUC: 0.738
  ‚úì Sequence AUC: 0.786
  ‚úì ROC & Confusion Matrix logged to wandb

üìä CREATING MODEL COMPARISON

RESULTS SUMMARY
Customer Model AUC: 0.680
Sequence Model AUC: 0.786
Improvement: 0.106
