In [13]:
import pandas as pd
import numpy as np
import xgboost as xgb
import mlflow
import logging
import mlflow.sklearn
from sklearn.metrics import roc_auc_score, fbeta_score
from sklearn.model_selection import train_test_split
from urllib.parse import urlparse

try:
    path = ''
    X = pd.read_csv(path + "train_encoded.csv")
    train_old = pd.read_csv(path + 'train_sample.csv')

except Exception as e:
    logging.exception(
        "Unable to download training & test CSV, check path. Error: %s", e
    )

## The predicted column is "TARGET" which is a scalar Binary [0, 1]
X = X.merge(train_old[['SK_ID_CURR', 'TARGET']], on='SK_ID_CURR', how='left')
y = X['TARGET']
del train_old
X = X.dropna(subset=['TARGET'])
X = X.drop(columns=["TARGET"], axis=1)
y.dropna(inplace=True)
X.fillna(X.median(), inplace=True)
indices=X['SK_ID_CURR']
X = X.set_index('SK_ID_CURR')

# split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

learning_rates = [0.483, 0.484, 0.485, 0.486, 0.487]
n_estimators = [98, 99, 100, 101, 102]
max_depths = [2, 3, 4]

for learning_rate in learning_rates:
    for n_estimator in n_estimators:
        for max_depth in max_depths:
            with mlflow.start_run(nested = True) as run:
                hyperparameters = {
                    "learning_rate": learning_rate,
                    "n_estimators": n_estimator,
                    "max_depth": max_depth
                }
                
                # log the hyperparameters
                mlflow.log_params(hyperparameters)
                
                # create the XGBoost model
                model = xgb.XGBClassifier(**hyperparameters)
                model.fit(X_train, y_train)
                
                # make predictions on the test set
                y_pred = model.predict(X_test)
                
                # calculate the AUC score
                auc = roc_auc_score(y_test, y_pred)
                mlflow.log_metric("AUC", auc)
                
                # calculate the FBeta score
                fbeta = fbeta_score(y_test, y_pred, beta=0)
                mlflow.log_metric("FBeta0.0", fbeta)

    # log the model to the run
    mlflow.sklearn.log_model(model, "model", registered_model_name="XGBoostCreditScore")

    #tracking_url = mlflow.get_tracking_uri()

    #print(tracking_url)

Registered model 'XGBoostCreditScore' already exists. Creating a new version of this model...
2023/02/08 17:13:07 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: XGBoostCreditScore, version 11
Created version '11' of model 'XGBoostCreditScore'.
Registered model 'XGBoostCreditScore' already exists. Creating a new version of this model...
2023/02/08 17:13:31 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: XGBoostCreditScore, version 12
Created version '12' of model 'XGBoostCreditScore'.
Registered model 'XGBoostCreditScore' already exists. Creating a new version of this model...
2023/02/08 17:13:51 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: XGBoostCreditScore, version 13
Created version '13' of model 'XGBoost