In [1]:
# !pip install seaborn
# !pip install scikit-learn
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns

import mlflow
import mlflow.sklearn # Wrapper pour scikit-learn

from lightgbm.sklearn import LGBMClassifier
from sklearn.metrics import f1_score, PrecisionRecallDisplay, precision_recall_curve#, plot_precision_recall_curve



In [2]:
os.chdir("c:\\Users\\vnarv\\purchase-predict\\")

In [3]:
os.getcwd()

'c:\\Users\\vnarv\\purchase-predict'

In [12]:
path_data = "data/05_model_input"
X_train = pd.read_csv(path_data + "/X_train.csv")
X_test = pd.read_csv(path_data + "/X_test.csv")
y_train = pd.read_csv(path_data + "/y_train.csv").values.flatten()
y_test = pd.read_csv(path_data + "/y_test.csv").values.flatten()

In [5]:
# Hyper-paramètres des modèles
hyp_params = {
    "num_leaves": 60,
    "min_child_samples": 10,
    "max_depth": 12,
    "n_estimators": 100,
    "learning_rate": 0.1
}

In [13]:
# Identification de l'interface MLflow
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")
mlflow.set_experiment("purchase-predict")


mlflow-artifacts:/306273990621708128/269038045a6d4bd099a59525e561a949/artifacts


In [26]:
def save_pr_curve(X, y, model):
    plt.figure(figsize=(16,11))
    prec, recall, _ = precision_recall_curve(y, model.predict_proba(X)[:,1], pos_label=1)
    pr_display = PrecisionRecallDisplay(precision=prec, recall=recall).plot(ax=plt.gca())
    plt.title("PR Curve", fontsize=16)
    plt.gca().xaxis.set_major_formatter(mtick.PercentFormatter(1, 0))
    plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(1, 0))
    print(os.getcwd())
    plt.savefig(os.path.expanduser("~/purchase-predict/notebooks/plots/pr_curve.png"))
    plt.close()

def train_model(params):
    
    with mlflow.start_run() as run:
        model = LGBMClassifier(**params, objective="binary", verbose=-1)
        model.fit(X_train, y_train)

        score = f1_score(y_test, model.predict(X_test))
        save_pr_curve(X_test, y_test, model)

        mlflow.log_params(hyp_params)
        mlflow.log_metric("f1", score)
        mlflow.log_artifact(os.path.expanduser("~/purchase-predict/notebooks/plots/pr_curve.png"), artifact_path="plots")
        mlflow.sklearn.log_model(model, "model")

In [27]:
train_model({**hyp_params, **{'n_estimators': 200, 'learning_rate': 0.05}})
train_model({**hyp_params, **{'n_estimators': 500, 'learning_rate': 0.025}})
train_model({**hyp_params, **{'n_estimators': 1000, 'learning_rate': 0.01}})

c:\Users\vnarv\purchase-predict
c:\Users\vnarv\purchase-predict
c:\Users\vnarv\purchase-predict
