In [16]:
# ! pip install mlflow

In [17]:
import pandas as pd
import imblearn
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report, confusion_matrix,accuracy_score,precision_recall_curve,auc
import mlflow
from urllib.parse import urlparse
from markupsafe import escape
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

In [18]:
clean_test= pd.read_csv('clean_test.csv')
clean_train= pd.read_csv('clean_train.csv')
clean_valid= pd.read_csv('clean_valid.csv')

In [19]:
## checking the blance of traing data
clean_train.target.value_counts()

0    2892
1     452
Name: target, dtype: int64

In [20]:
### our data is imbalance with more value of 0 and less value of 1 so we will use smote oversampling for balancing the dataset

In [21]:
X_train = clean_train.loc[:, clean_train.columns != 'target']

In [22]:
X_train=  X_train.values
Y_train = clean_train['target'].values

In [23]:
## our dataset in imbalnce making this balance using smote
oversample = SMOTE(random_state=42)
X_over, y_over = oversample.fit_resample(X_train, Y_train) ## balnced dataset


2023/02/27 08:03:09 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '6a485ca6fe304238a5b36dc823eade29', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


In [24]:
X_vaild=clean_valid.loc[:, clean_valid.columns != 'target'].values
Y_valid = clean_valid['target'].values

In [25]:
X_test=clean_test.loc[:, clean_test.columns != 'target'].values
Y_test = clean_test['target'].values

**Model 1 : logistic Regression model**

In [26]:
def eval_metrics(actual, pred):
    precision, recall, thresholds = precision_recall_curve(actual, pred)
    auc_precision_recall = auc(recall, precision)
    return (auc_precision_recall)

In [27]:
mlflow.sklearn.autolog()

n_estimators = 200
max_depth = 5
clf = LogisticRegression(random_state=101)
clf.fit(X_over, y_over)

y_pred = clf.predict(X_vaild)
aucpr = eval_metrics(Y_valid, y_pred)
acc = accuracy_score(Y_valid, y_pred)
conf_1=confusion_matrix(Y_valid, y_pred)

with mlflow.start_run(run_name="logistic Regression"):
  
    
    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("AUCPR",aucpr)
    mlflow.log_dict(np.array(conf_1).tolist(), "confusion_matrix.json")
    mlflow.sklearn.log_model(clf, "model")

    print("\nRandom Classifier Model (no_of_estimator={:f}, max_depth={:f}):".format(n_estimators, max_depth))
    print(f"Accuracy: {acc}")
    print(f"AUCPR: {aucpr} ")
    print(f"Confusion Matrix: {conf_1} \n \n")
    

    tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
    mlflow.sklearn.log_model(
        sk_model=clf,
        artifact_path="sklearn-model",
        registered_model_name="Logistic Regression"
    )
    if tracking_url_type_store != "file":
      mlflow.sklearn.log_model(clf, "model", registered_model_name="Logistic Regression")
    else:
      mlflow.sklearn.log_model(clf, "model")

2023/02/27 08:03:11 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '8b961f6272d743e2abc08e38cb777840', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow



Random Classifier Model (no_of_estimator=200.000000, max_depth=5.000000):
Accuracy: 0.7874439461883408
AUCPR: 0.38424645799502405 
Confusion Matrix: [[819 156]
 [ 81  59]] 
 



Registered model 'Logistic Regression' already exists. Creating a new version of this model...
2023/02/27 08:03:19 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: Logistic Regression, version 2
Created version '2' of model 'Logistic Regression'.


In [28]:
## printing 1st model version of logistic regression
mlflow.tracking.MlflowClient().get_model_version("Logistic Regression","1")

<ModelVersion: creation_timestamp=1677484937019, current_stage='None', description=None, last_updated_timestamp=1677484937019, name='Logistic Regression', run_id='37ecd3ac7d5d4bef8979735fd6bc4c7c', run_link=None, source='file:///content/mlruns/0/37ecd3ac7d5d4bef8979735fd6bc4c7c/artifacts/sklearn-model', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [29]:
# ! mlflow ui

## Model 2 :multinomial Naive Bayes

In [30]:
mlflow.sklearn.autolog()

clf = GaussianNB()
clf.fit(X_over, y_over)

y_pred = clf.predict(X_vaild)
aucpr = eval_metrics(Y_valid, y_pred)
acc = accuracy_score(Y_valid, y_pred)
conf_2=confusion_matrix(Y_valid, y_pred)

with mlflow.start_run(run_name="Multinomial Naive Bayes"):

    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("AUCPR",aucpr)
    mlflow.sklearn.log_model(clf, "model")
    mlflow.log_dict(np.array(conf_2).tolist(), "confusion_matrix.json")

    print("\nMultinomial Naive Bayes")
    print(f"Accuracy: {acc}")
    print(f"AUCPR: {aucpr}")
    print(f"Confusion Matrix: {conf_2} \n\n")
    

    tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
    mlflow.sklearn.log_model(
        sk_model=clf,
        artifact_path="sklearn-model",
        registered_model_name="gaussian-nb-model"
    )
    if tracking_url_type_store != "file":
      mlflow.sklearn.log_model(clf, "model", registered_model_name="gaussian-nb-model")
    else:
      mlflow.sklearn.log_model(clf, "model")

2023/02/27 08:03:22 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '8571dc2b526c46728b1381a776229c3d', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow



Multinomial Naive Bayes
Accuracy: 0.5354260089686098
AUCPR: 0.42076306923711515
Confusion Matrix: [[508 467]
 [ 51  89]] 




Successfully registered model 'gaussian-nb-model'.
2023/02/27 08:03:31 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: gaussian-nb-model, version 1
Created version '1' of model 'gaussian-nb-model'.


In [31]:
## printing 1st model version of gaussian naive bayes
mlflow.tracking.MlflowClient().get_model_version("gaussian-nb-model","1")

<ModelVersion: creation_timestamp=1677485011704, current_stage='None', description=None, last_updated_timestamp=1677485011704, name='gaussian-nb-model', run_id='9db27ca639134277be156fd6b073c234', run_link=None, source='file:///content/mlruns/0/9db27ca639134277be156fd6b073c234/artifacts/sklearn-model', status='READY', status_message=None, tags={}, user_id=None, version=1>

**Model 03 : multilayer perceptron**

In [32]:
mlflow.sklearn.autolog()

clf = MLPClassifier(random_state=101,learning_rate='adaptive')
clf.fit(X_over, y_over)

y_pred = clf.predict(X_vaild)
aucpr = eval_metrics(Y_valid, y_pred)
acc = accuracy_score(Y_valid, y_pred)
conf_3=confusion_matrix(Y_valid, y_pred)

with mlflow.start_run(run_name="Multilayer Perceptron"):

    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("AUCPR",aucpr)
    mlflow.sklearn.log_model(clf, "model")
    
    print("\nMultilayer Perceptron")
    print(f"Accuracy: {acc}")
    print(f"AUCPR: {aucpr}")
    print(f"Confusion Matrix {conf_3} \n\n")
    

    tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
    mlflow.sklearn.log_model(
        sk_model=clf,
        artifact_path="sklearn-model",
        registered_model_name="multilayer-perceptron-model"
    )
    if tracking_url_type_store != "file":
      mlflow.sklearn.log_model(clf, "model", registered_model_name="multilayer-perceptron-model")
    else:
      mlflow.sklearn.log_model(clf, "model")

2023/02/27 08:03:33 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '747a9a634bbd49b184c634d4e1ddf53b', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow



Multilayer Perceptron
Accuracy: 0.7103139013452915
AUCPR: 0.4409002843302636
Confusion Matrix [[709 266]
 [ 57  83]] 




Successfully registered model 'multilayer-perceptron-model'.
2023/02/27 08:03:46 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: multilayer-perceptron-model, version 1
Created version '1' of model 'multilayer-perceptron-model'.


In [35]:
## geting 1st model version of multilayer perceptron
mlflow.tracking.MlflowClient().get_model_version("multilayer-perceptron-model","1")


<ModelVersion: creation_timestamp=1677485026775, current_stage='None', description=None, last_updated_timestamp=1677485026775, name='multilayer-perceptron-model', run_id='47fa9983ff264df7a8d48ca281b712c7', run_link=None, source='file:///content/mlruns/0/47fa9983ff264df7a8d48ca281b712c7/artifacts/sklearn-model', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [None]:
! mlflow ui

In [41]:
import os

In [42]:
os.listdir('/content')

['.config',
 'clean_test.csv',
 'mlruns',
 'clean_train.csv',
 'clean_valid.csv',
 'sample_data']