In [16]:
import mlflow
import mlflow.xgboost
import xgboost
import pandas as pd
import xgboost as xgb
from sklearn.metrics import roc_auc_score, fbeta_score
from mlflow.tracking import MlflowClient

def train_xgboost(train_data, test_data, max_depth, learning_rate, n_estimators, gamma, subsample, colsample_bytree):
    with mlflow.start_run():
        # Train the XGBoost model
        model = xgb.XGBClassifier(max_depth: (3, 5),
                                  learning_rate: (0.01, 0.2),
                                  n_estimators: (50, 200),
                                  gamma: (0, 1),
                                  subsample: (0.5, 1),
                                  colsample_bytree: (0.5, 1),
                                  objective="binary:logistic",
                                  n_jobs=-1)
        model.fit(train_data.drop("target", axis=1), train_data["target"])

        # Make predictions on the test data
        predictions = model.predict(test_data.drop("target", axis=1))

        # Log the model to the tracking server
        mlflow.xgboost.log_model(model, "model")

        # Calculate and log the AUC score
        auc_score = roc_auc_score(test_data["target"], predictions)
        mlflow.log_metric("auc_score", auc_score)

        # Calculate and log the F1 Beta score
        fbeta_score = fbeta_score(test_data["target"], predictions, beta=0.5)
        mlflow.log_metric("fbeta_score", fbeta_score)

        return fbeta_score

if __name__ == "__main__":
    train_data = pd.read_csv("train_encoded.csv")
    test_data = pd.read_csv("test_encoded.csv")
    
    mlflow.run(".",
               parameters={
                   "max_depth": (3, 5),
                   "learning_rate": (0.01, 0.2),
                   "n_estimators": (50, 200),
                   "gamma": (0, 1),
                   "subsample": (0.5, 1),
                   "colsample_bytree": (0.5, 1)
               },
               experiment_name="xgboost_hyperparameter_tuning",
               version=None,
               entry_point="train_xgboost",
               backend="local")
    
    # Find the best run based on the F1 Beta score
    client = MlflowClient()
    experiment = client.get_experiment_by_name("xgboost_hyperparameter_tuning")

SyntaxError: invalid syntax (1889776034.py, line 12)

In [17]:
    try:
        path = ''
        train = pd.read_csv(path + "train_encoded.csv")
        test = pd.read_csv(path + "test_encoded.csv")
        train_old = pd.read_csv(path + 'train_sample.csv')
    except Exception as e:
        logger.exception(
            "Unable to download training & test CSV, check path. Error: %s", e
        )

    ## The predicted column is "TARGET" which is a scalar Binary [0, 1]
    train = train.merge(train_old[['SK_ID_CURR', 'TARGET']], on='SK_ID_CURR', how='left')
    train_y = train['TARGET']
    del train_old
    train = train.dropna(subset=['TARGET'])
    train = train.drop(columns=["TARGET"], axis=1)
    train_y.dropna(inplace=True)
    train.fillna(train.median(), inplace=True)
    test.fillna(test.median(), inplace=True)
    indices=train['SK_ID_CURR']
    test = test.set_index('SK_ID_CURR')
    train = train.set_index('SK_ID_CURR')

    
import pandas as pd
import numpy as np
import xgboost as xgb
import mlflow
import mlflow.sklearn
from sklearn.metrics import roc_auc_score, fbeta_score
from sklearn.model_selection import train_test_split

# load your data
data = pd.read_csv("your_data.csv")

# split data into features and target
X = data.drop(columns=["TARGET"])
y = data["TARGET"]

# split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# set the hyperparameters for the XGBoost model
hyperparameters = {
    "learning_rate": 0.1,
    "n_estimators": 100,
    "max_depth": 3
}

# start a new MLFlow run
with mlflow.start_run() as run:
    # log the hyperparameters
    mlflow.log_params(hyperparameters)

    # create the XGBoost model
    model = xgb.XGBClassifier(**hyperparameters)
    model.fit(X_train, y_train)

    # make predictions on the test set
    y_pred = model.predict(X_test)

    # calculate the AUC score
    auc = roc_auc_score(y_test, y_pred)
    print("AUC score:", auc)
    mlflow.log_metric("AUC", auc)

    # calculate the FBeta score
    fbeta = fbeta_score(y_test, y_pred, beta=0.5)
    print("FBeta score (beta=0.5):", fbeta)
    mlflow.log_metric("FBeta (beta=0.5)", fbeta)

    # log the model to the run
    mlflow.sklearn.log_model(model, "model")

SyntaxError: invalid syntax (2283505627.py, line 63)