In [1]:
import pandas as pd
import pickle
import time

from ml_simulation.dataset_split import customer_split
from ml_simulation.util import HiddenPrints
from ml_training.train_xgb import train_xgb
from ml_simulation__follow_up.sample import sample_follow_up_customers
from ml_simulation__follow_up.data import get_simulation_compute_function
from ml_simulation__follow_up.widget import show_follow_up_widget
    
import warnings
warnings.filterwarnings('ignore')

df_quotes = pd.read_csv('cleaned_quote_data.csv')
df_quotes['dt_creation_devis'] = pd.to_datetime(df_quotes['dt_creation_devis'])

In [2]:
split_result = customer_split(df_quotes)
df_train = split_result['train']
df_sim = split_result['simulation']


SPLIT CUSTOMERS: TRAIN vs SIMULATION: TRAINING SIZE 0.95
Split: 22708 train, 1180 sim customers


In [3]:
TRAIN = False

In [4]:
# Model building
if TRAIN:
    with HiddenPrints():
        X_train = create_features(df_train)
    
    y_train = X_train['converted']
    X_train = X_train.drop(['numero_compte', 'converted'], axis=1)
    feature_names = X_train.columns.tolist()
    
    result = train_xgb(X_train, y_train, "simulation_poc")
    model = result['model']
    feature_names = result['features']
    
    print(f"Model trained: {len(feature_names)} features")
else:
    with open('simulation_poc.pkl', 'rb') as file:
        model_data = pickle.load(file)
        model = model_data['model']
        feature_names = model_data['features']

In [5]:
# Sampling
sample_seed = int(time.time() * 1000) % 10000000
selected_ids = sample_follow_up_customers(df_sim, random_state=sample_seed)

Non-converted customers   : 923
Among them with 1 quote    : 691
     customer_id              product     price
2886  CL00068862        Climatisation   9725.35
2902  CL00066986                Poêle   8325.30
5742  CL00277083     Appareil hybride  14569.97
6025  CL00226674  Plomberie Sanitaire   2079.43
3588  CL00063554            Chaudière   4155.00
['CL00068862', 'CL00066986', 'CL00277083', 'CL00226674', 'CL00063554']


In [6]:
# Simulation
compute = get_simulation_compute_function(model, feature_names, df_sim, selected_ids)
show_follow_up_widget(compute, selected_ids)

VBox(children=(HBox(children=(Dropdown(description='Scénario :', layout=Layout(width='380px'), options=('Actue…