In [16]:
import mlflow
import mlflow.xgboost
import xgboost
import pandas as pd
import xgboost as xgb
from sklearn.metrics import roc_auc_score, fbeta_score
from mlflow.tracking import MlflowClient

def train_xgboost(train_data, test_data, max_depth, learning_rate, n_estimators, gamma, subsample, colsample_bytree):
    with mlflow.start_run():
        # Train the XGBoost model
        model = xgb.XGBClassifier(max_depth: (3, 5),
                                  learning_rate: (0.01, 0.2),
                                  n_estimators: (50, 200),
                                  gamma: (0, 1),
                                  subsample: (0.5, 1),
                                  colsample_bytree: (0.5, 1),
                                  objective="binary:logistic",
                                  n_jobs=-1)
        model.fit(train_data.drop("target", axis=1), train_data["target"])

        # Make predictions on the test data
        predictions = model.predict(test_data.drop("target", axis=1))

        # Log the model to the tracking server
        mlflow.xgboost.log_model(model, "model")

        # Calculate and log the AUC score
        auc_score = roc_auc_score(test_data["target"], predictions)
        mlflow.log_metric("auc_score", auc_score)

        # Calculate and log the F1 Beta score
        fbeta_score = fbeta_score(test_data["target"], predictions, beta=0.5)
        mlflow.log_metric("fbeta_score", fbeta_score)

        return fbeta_score

if __name__ == "__main__":
    train_data = pd.read_csv("train_encoded.csv")
    test_data = pd.read_csv("test_encoded.csv")
    
    mlflow.run(".",
               parameters={
                   "max_depth": (3, 5),
                   "learning_rate": (0.01, 0.2),
                   "n_estimators": (50, 200),
                   "gamma": (0, 1),
                   "subsample": (0.5, 1),
                   "colsample_bytree": (0.5, 1)
               },
               experiment_name="xgboost_hyperparameter_tuning",
               version=None,
               entry_point="train_xgboost",
               backend="local")
    
    # Find the best run based on the F1 Beta score
    client = MlflowClient()
    experiment = client.get_experiment_by_name("xgboost_hyperparameter_tuning")

SyntaxError: invalid syntax (1889776034.py, line 12)

In [17]:
# The data set used in this example is from http://archive.ics.uci.edu/ml/datasets/Wine+Quality
# P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis.
# Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009.

import os
#import warnings
import sys

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from urllib.parse import urlparse
import mlflow
import mlflow.sklearn

#import logging

#logging.basicConfig(level=logging.WARN)
#logger = logging.getLogger(__name__)


def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2


if __name__ == "__main__":
    #warnings.filterwarnings("ignore")
    np.random.seed(40)

    try:
        path = ''
        train = pd.read_csv(path + "train_encoded.csv")
        test = pd.read_csv(path + "test_encoded.csv")
        train_old = pd.read_csv(path + 'train_sample.csv')
    except Exception as e:
        logger.exception(
            "Unable to download training & test CSV, check path. Error: %s", e
        )

    ## The predicted column is "TARGET" which is a scalar Binary [0, 1]
    train = train.merge(train_old[['SK_ID_CURR', 'TARGET']], on='SK_ID_CURR', how='left')
    train_y = train['TARGET']
    del train_old
    train = train.dropna(subset=['TARGET'])
    train = train.drop(columns=["TARGET"], axis=1)
    train_y.dropna(inplace=True)
    train.fillna(train.median(), inplace=True)
    test.fillna(test.median(), inplace=True)
    indices=train['SK_ID_CURR']
    test = test.set_index('SK_ID_CURR')
    train = train.set_index('SK_ID_CURR')

    #alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5
    #l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5

    with mlflow.start_run():
        
        model = xgb.XGBClassifier(max_depth: (3, 5),
                                  learning_rate: (0.01, 0.2),
                                  n_estimators: (50, 200),
                                  gamma: (0, 1),
                                  subsample: (0.5, 1),
                                  colsample_bytree: (0.5, 1),
                                  objective="binary:logistic",
                                  n_jobs=-1)
        model.fit(train_x.drop("TARGET", axis=1), train_x["TARGET"])

        predicted_qualities = lr.predict(test_x)

        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        print("Elasticnet model (alpha={:f}, l1_ratio={:f}):".format(alpha, l1_ratio))
        print("  RMSE: %s" % rmse)
        print("  MAE: %s" % mae)
        print("  R2: %s" % r2)

        mlflow.log_param("max_depth", max_depth)
        mlflow.log_param("learning_rate", learning_rate)
        mlflow.log_param("n_estimators", max_depth)
        mlflow.log_param("gamma", learning_rate)
        mlflow.log_param("subsample", max_depth)
        mlflow.log_param("colsample_bytree", learning_rate)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2", r2)
        mlflow.log_metric("mae", mae)

        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

        # Model registry does not work with file store
        if tracking_url_type_store != "file":

            # Register the model
            # There are other ways to use the Model Registry, which depends on the use case,
            # please refer to the doc for more information:
            # https://mlflow.org/docs/latest/model-registry.html#api-workflow
            mlflow.sklearn.log_model(lr, "model", registered_model_name="ElasticnetWineModel")
        else:
            mlflow.sklearn.log_model(lr, "model")
            
# mlflow.run(".",
#                parameters={
#                    "max_depth": (3, 5),
#                    "learning_rate": (0.01, 0.2),
#                    "n_estimators": (50, 200),
#                    "gamma": (0, 1),
#                    "subsample": (0.5, 1),
#                    "colsample_bytree": (0.5, 1)
#                },
#                experiment_name="xgboost_hyperparameter_tuning",
#                version=None,
#                entry_point="train_xgboost",
#                backend="local")
    
    # Find the best run based on the F1 Beta score
    client = MlflowClient()
    experiment = client.get_experiment_by_name("xgboost_hyperparameter_tuning")

SyntaxError: invalid syntax (2283505627.py, line 63)