In [1]:
import numpy as np 
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import warnings
from sklearn.datasets import make_classification
from sklearn.metrics import classification_report
from xgboost import XGBClassifier
warnings.filterwarnings('ignore')
from imblearn.combine import SMOTETomek
import mlflow
import mlflow.sklearn
import mlflow.xgboost

In [2]:
x,y = make_classification(n_samples=1000,n_features=10,n_informative=2,n_redundant=8,
                          weights=[0.9,0.1],flip_y=0,random_state=42)

In [3]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,stratify=y,random_state=42)

In [4]:
len(x_train)

700

In [5]:
x_train.shape

(700, 10)

In [6]:
smt = SMOTETomek(random_state=42)
x_train_res, y_train_res = smt.fit_resample(x_train,y_train)
np.unique(y_train_res,return_counts=True)

(array([0, 1]), array([619, 619], dtype=int64))

In [7]:
x_train_res.shape

(1238, 10)

In [8]:
models = [
    (
        "Logistic_Regression",
        {"C":1,"solver":'liblinear'},
        LogisticRegression(),
        (x_train,y_train),
        (x_test,y_test)
    ),
    (
        "Random Forest",
        {"n_estimators":30,"max_depth":3},
        RandomForestClassifier(),
        (x_train,y_train),
        (x_test,y_test),
    ),
    (
        "XGBClassifier",
        {"use_label_encoder":False,"eval_metric":'logloss'},
        XGBClassifier(),
        (x_train,y_train),
        (x_test,y_test)
    ),
    (
       "XGBClassifier With SMOTE",
        {"use_label_encoder":False,"eval_metric":'logloss'},
        XGBClassifier(),
        (x_train_res,y_train_res),
        (x_test,y_test)
    )
]

In [9]:
reports = []
for model_name,params, model, train_set, test_set in models:
    x_train = train_set[0]
    y_train = train_set[1]
    x_test = test_set[0]
    y_test = test_set[1]
    
    model.set_params(**params)
    model.fit(x_train,y_train)
    y_pred = model.predict(x_test)
    report = classification_report(y_test,y_pred,output_dict=True)
    reports.append(report)

In [10]:
reports

[{'0': {'precision': 0.9454545454545454,
   'recall': 0.9629629629629629,
   'f1-score': 0.9541284403669724,
   'support': 270.0},
  '1': {'precision': 0.6,
   'recall': 0.5,
   'f1-score': 0.5454545454545454,
   'support': 30.0},
  'accuracy': 0.9166666666666666,
  'macro avg': {'precision': 0.7727272727272727,
   'recall': 0.7314814814814814,
   'f1-score': 0.749791492910759,
   'support': 300.0},
  'weighted avg': {'precision': 0.9109090909090909,
   'recall': 0.9166666666666666,
   'f1-score': 0.9132610508757297,
   'support': 300.0}},
 {'0': {'precision': 0.96415770609319,
   'recall': 0.9962962962962963,
   'f1-score': 0.9799635701275046,
   'support': 270.0},
  '1': {'precision': 0.9523809523809523,
   'recall': 0.6666666666666666,
   'f1-score': 0.7843137254901961,
   'support': 30.0},
  'accuracy': 0.9633333333333334,
  'macro avg': {'precision': 0.9582693292370712,
   'recall': 0.8314814814814815,
   'f1-score': 0.8821386478088503,
   'support': 300.0},
  'weighted avg': {'pr

In [19]:
mlflow.set_experiment("Anomaly Detection2")
mlflow.set_tracking_uri("http://127.0.0.1:5000/")

for i , element in enumerate(models):
    model_name = element[0]
    params = element[1]
    model = element[2]
    report = reports[i]
    with mlflow.start_run(run_name=model_name):
        mlflow.log_params(params)
        mlflow.log_metrics({
            "accuracy":report['accuracy'],
            "recall_class_1":report['1']['recall'],
            "recall_class_0":report['0']['recall'],
            "f1_score_marco":report['macro avg']['f1-score']
        })
        
        if "XGB" in model_name:
            mlflow.xgboost.log_model(model,"model")
        else: mlflow.sklearn.log_model(model,"model")

2024/08/15 17:14:39 INFO mlflow.tracking.fluent: Experiment with name 'Anomaly Detection2' does not exist. Creating a new experiment.
2024/08/15 17:14:44 INFO mlflow.tracking._tracking_service.client: üèÉ View run Logistic_Regression at: http://127.0.0.1:5000//#/experiments/121706232950516761/runs/ee2ad8626aec4b83a94b5d411132a3c9.
2024/08/15 17:14:44 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000//#/experiments/121706232950516761.
2024/08/15 17:14:45 INFO mlflow.tracking._tracking_service.client: üèÉ View run Random Forest at: http://127.0.0.1:5000//#/experiments/121706232950516761/runs/69f45a260ffd4d5b8dd9be9110e9cf4e.
2024/08/15 17:14:45 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000//#/experiments/121706232950516761.
2024/08/15 17:14:47 INFO mlflow.tracking._tracking_service.client: üèÉ View run XGBClassifier at: http://127.0.0.1:5000//#/experiments/121706232950516761/runs/0157136171414c

# Register the model

In [29]:
model_name = "XGB-Smote"
run_id = input("Enter Run ID:")
model_uri = f"runs:/{run_id}/model"
result = mlflow.register_model(
    model_uri,model_name
)

Enter Run ID: df739dbb915e442aa61bc745fdf18c6d


Successfully registered model 'XGB-Smote'.
2024/08/15 17:30:53 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: XGB-Smote, version 1
Created version '1' of model 'XGB-Smote'.


In [31]:
# df739dbb915e442aa61bc745fdf18c6d

In [30]:
model_name = "XGB-Smote"
model_version = 1
model_uri = f"models:/{model_name}/{model_version}"

loaded_model = mlflow.xgboost.load_model(model_uri)
y_pred = loaded_model.predict(x_test)
y_pred[:4]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

array([0, 0, 0, 0])

In [32]:
model_name = "XGB-Smote"
model_version = 1
model_uri = f"models:/{model_name}@challenger"

loaded_model = mlflow.xgboost.load_model(model_uri)
y_pred = loaded_model.predict(x_test)
y_pred[:4]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

array([0, 0, 0, 0])

In [33]:
# challenger

In [34]:
dev_model_uri = f"models:/{model_name}@challenger"
prod_model = 'anomaly-detection-prod'
client = mlflow.MlflowClient()
client.copy_model_version(src_model_uri=dev_model_uri,dst_name=prod_model)

Successfully registered model 'anomaly-detection-prod'.
Copied version '1' of model 'XGB-Smote' to version '1' of model 'anomaly-detection-prod'.


<ModelVersion: aliases=[], creation_timestamp=1723723659120, current_stage='None', description='', last_updated_timestamp=1723723659120, name='anomaly-detection-prod', run_id='df739dbb915e442aa61bc745fdf18c6d', run_link='', source='models:/XGB-Smote/1', status='READY', status_message='', tags={}, user_id='', version='1'>

In [36]:
model_uri = f"models:/{prod_model}@champion"
loaded_model = mlflow.xgboost.load_model(model_uri)
y_pred = loaded_model.predict(x_test)
y_pred[:4]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

array([0, 0, 0, 0])