In [1]:
# from wandbhelper.util import init_wandb, log_all_plots, log_plot_as_image

# wandb_run_id = init_wandb(run_name="price_match_simulation")
# print(f"Run ID {wandb_run_id}")

In [2]:
import pandas as pd
import pickle
import time

from ml_simulation.dataset_split import customer_split
from ml_simulation.util import HiddenPrints
from ml_features.features import create_features
from ml_training.train_xgb import train_xgb
from ml_simulation__cross_sell.sample import sample_cross_sell_customers
from ml_simulation__cross_sell.data import get_cross_sell_compute_function 
from ml_simulation__cross_sell.widget import show_cross_sell_widget
    
import warnings
warnings.filterwarnings('ignore')

df_quotes = pd.read_csv('cleaned_quote_data.csv')
df_quotes['dt_creation_devis'] = pd.to_datetime(df_quotes['dt_creation_devis'])

In [3]:
split_result = customer_split(df_quotes)
df_train = split_result['train']
df_sim = split_result['simulation']


SPLIT CUSTOMERS: TRAIN vs SIMULATION: TRAINING SIZE 0.95
Split: 22708 train, 1180 sim customers


In [4]:
TRAIN = False

In [5]:
# Model building
if TRAIN:
    with HiddenPrints():
        X_train = create_features(df_train)
    
    y_train = X_train['converted']
    X_train = X_train.drop(['numero_compte', 'converted'], axis=1)
    feature_names = X_train.columns.tolist()
    
    result = train_xgb(X_train, y_train, "simulation_poc")
    model = result['model']
    feature_names = result['features']
    
    print(f"Model trained: {len(feature_names)} features")
else:
    with open('simulation_poc.pkl', 'rb') as file:
        model_data = pickle.load(file)
        model = model_data['model']
        feature_names = model_data['features']

In [6]:
# Sampling
sample_seed = int(time.time() * 1000) % 10000000
selected_ids = sample_cross_sell_customers(df_sim, random_state=sample_seed)


=== CROSS-SELL SAMPLING: HEAT PUMP â†’ STOVE ===
Scanning 200 customers...
âœ… Found 12 eligible heat pump owners in cold regions

ðŸŽ¯ SELECTED 5 CROSS-SELL CANDIDATES:
  â€¢ CL00209916 - Normandie
  â€¢ CL00063658 - Normandie
  â€¢ CL00183311 - Normandie
  â€¢ CL00212369 - Normandie
  â€¢ CL00068855 - Normandie

Selected IDs: ['CL00209916', 'CL00063658', 'CL00183311', 'CL00212369', 'CL00068855']


In [7]:
# Simulation
compute = get_cross_sell_compute_function(model, feature_names, df_sim, selected_ids)
show_cross_sell_widget(compute, selected_ids)

Initialized Simulation with 5 sampled IDs.


VBox(children=(HBox(children=(Dropdown(description='ScÃ©nario :', layout=Layout(width='420px'), options=('Situaâ€¦