In [5]:
# %pip install --pre mlflow

    Import Libraries

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

    import Dataset

In [7]:
training_data_01 =  pd.read_csv('./final_training_data.csv')


    Split the Final Dataset into Features and Target Variable

In [8]:
X = training_data_01.drop('PromotionStatus', axis=1)   # Features
y = training_data_01['PromotionStatus']                # Target Variable

    Split the Dataset into Training and Test Set

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [10]:
# %pip install imbalanced-learn
# 
X_test.shape
X_train.shape


(8000, 17)

    OverSampling

In [11]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)


In [12]:
X_train_resampled.shape

(14262, 17)

    Experiment-1 
    RandomForest Classifier

In [52]:

params = {"random_state":42 ,"n_estimators": 100,"max_depth":10}

model = RandomForestClassifier(**params)  
model.fit(X_train_resampled, y_train_resampled)

y_pred = model.predict(X_test)


print("Confusion Matrix\n",confusion_matrix(y_test, y_pred))
print("\nClassification Report\n",classification_report(y_test, y_pred))

Confusion Matrix
 [[ 239   50    0]
 [  48 1084   57]
 [   0   80  442]]

Classification Report
               precision    recall  f1-score   support

           0       0.83      0.83      0.83       289
           1       0.89      0.91      0.90      1189
           2       0.89      0.85      0.87       522

    accuracy                           0.88      2000
   macro avg       0.87      0.86      0.87      2000
weighted avg       0.88      0.88      0.88      2000



In [53]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(model, X, y, cv=5) 
print("Cross-validation scores:", scores)
print("Mean accuracy:", scores.mean())

Cross-validation scores: [0.863  0.843  0.868  0.86   0.8665]
Mean accuracy: 0.8600999999999999


    Create dictionary of metrics

In [None]:
report_dict = classification_report(y_test, y_pred, output_dict=True)
report_dict

In [55]:
import mlflow

    Log the Model

In [56]:
mlflow.set_experiment("First Experiment")
mlflow.set_tracking_uri("http://127.0.0.1:5000/")


with mlflow.start_run(run_name="Random Forest Classifier with SMOTE"):
    mlflow.set_tag("Training Info", "Trained with Project final Data")
    # mlflow.log_params({'random_state': 42})
    
    mlflow.log_params(params=params)

    mlflow.log_metrics({
        'accuracy': report_dict['accuracy'],
        'recall_class_0': report_dict['0']['recall'],
        'recall_class_1': report_dict['1']['recall'],
        'recall_class_2': report_dict['2']['recall'],
        'precision_class_0':report_dict['0']['precision'],
        'precision_class_1':report_dict['1']['precision'],
        'precision_class_2':report_dict['2']['precision'],
        'f1_score_macro': report_dict['macro avg']['f1-score']
    })
    # mlflow.log_metric('Cross-validation scores',scores)
    mlflow.log_metric('cross_val_mean_accuracy', scores.mean())

    model_info=  mlflow.sklearn.log_model(model, "Random Forest Classifier with SMOTE")

2024/10/29 12:01:21 INFO mlflow.tracking._tracking_service.client: 🏃 View run Random Forest Classifier with SMOTE at: http://127.0.0.1:5000/#/experiments/140846681390198027/runs/78cc9fa06282497b96b8be4d2dca8f5f.
2024/10/29 12:01:21 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/140846681390198027.


    Load our saved model as a Python Function


    Load Model from the Experiments

In [57]:
# loaded_model_1 = mlflow.pyfunc.load_model("file:///c:/Assessments/final_project/Data_Science/mlruns/140846681390198027/6d3fc1e2dac64a0db9e272d9b3fd82cc/artifacts/Random Forest Classifier")
loaded_model_2 = mlflow.pyfunc.load_model(model_info.model_uri)
print(model_info.model_uri) 

runs:/78cc9fa06282497b96b8be4d2dca8f5f/Random Forest Classifier with SMOTE


    Use our model to predict 

In [58]:
y_pred = loaded_model_2.predict(X_test)

print("Confusion Matrix\n",confusion_matrix(y_test, y_pred))
print("\nClassification Report\n",classification_report(y_test, y_pred))


Confusion Matrix
 [[ 239   50    0]
 [  48 1084   57]
 [   0   80  442]]

Classification Report
               precision    recall  f1-score   support

           0       0.83      0.83      0.83       289
           1       0.89      0.91      0.90      1189
           2       0.89      0.85      0.87       522

    accuracy                           0.88      2000
   macro avg       0.87      0.86      0.87      2000
weighted avg       0.88      0.88      0.88      2000



    Register Out Model

In [51]:
#Method-1
model_name = "Random Forest Classifier with SMOTE"
result = mlflow.register_model(
    model_info.model_uri, model_name
)

Successfully registered model 'Random Forest Classifier with SMOTE'.
2024/10/29 11:57:23 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Random Forest Classifier with SMOTE, version 1
Created version '1' of model 'Random Forest Classifier with SMOTE'.


In [68]:
#Method-2
model_name = "Random Forest Classifier with SMOTE"
run_id = input("Enter Run ID")
model_uri = f"runs:/{run_id}/{model_name}"

result = mlflow.register_model(
            model_uri, model_name
)


Registered model 'Random Forest Classifier with SMOTE' already exists. Creating a new version of this model...
2024/10/29 12:17:48 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Random Forest Classifier with SMOTE, version 2
Created version '2' of model 'Random Forest Classifier with SMOTE'.


    Load Models from the Registry

In [None]:
model_name = "Random Forest Classifier with SMOTE"
model_version = 2 
model_uri = f"models:/{model_name}/{model_version}"
# model_uri = f"models:/{model_name}@challenger"


loaded_model_3= mlflow.sklearn.load_model(model_uri)
y_pred = loaded_model_3.predict(X_test)

# print(y[:4])

print("Confusion Matrix\n",confusion_matrix(y_test, y_pred))
print("\nClassification Report\n",classification_report(y_test, y_pred))

print(loaded_model_3)

    Development Environment to Production

In [71]:

dev_model_uri = f"models:/{model_name}@challenger"
prod_model = "Employee Promotion Prediction"
client = mlflow.MlflowClient()
client.copy_model_version(src_model_uri= dev_model_uri ,dst_name = prod_model )

Successfully registered model 'Employee Promotion Prediction'.
Copied version '2' of model 'Random Forest Classifier with SMOTE' to version '1' of model 'Employee Promotion Prediction'.


<ModelVersion: aliases=[], creation_timestamp=1730184923906, current_stage='None', description='', last_updated_timestamp=1730184923906, name='Employee Promotion Prediction', run_id='78cc9fa06282497b96b8be4d2dca8f5f', run_link='', source='models:/Random Forest Classifier with SMOTE/2', status='READY', status_message='', tags={}, user_id='', version='1'>

    Set version and alias 

In [112]:


vr = client.get_model_version("Random Forest Classifier with SMOTE", 2)
# print(vr)
client.set_registered_model_alias("Random Forest Classifier with SMOTE", "challenger",vr.version)
# client.delete_registered_model_alias("Random Forest Classifier with SMOTE", "challenger")

client.get_model_version_by_alias("Random Forest Classifier with SMOTE", "challenger")


<ModelVersion: aliases=['challenger'], creation_timestamp=1730184468931, current_stage='None', description='', last_updated_timestamp=1730184468931, name='Random Forest Classifier with SMOTE', run_id='78cc9fa06282497b96b8be4d2dca8f5f', run_link='', source=('file:///c:/Assessments/final_project/Data_Science/mlruns/140846681390198027/78cc9fa06282497b96b8be4d2dca8f5f/artifacts/Random '
 'Forest Classifier with SMOTE'), status='READY', status_message='', tags={}, user_id='', version='2'>

    Experiment-2 
    Logistic Regression Model

In [22]:
training_data_01 =  pd.read_csv('./final_training_data.csv')

X = training_data_01.drop('PromotionStatus', axis=1) 
y = training_data_01['PromotionStatus'] 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [23]:
# from imblearn.over_sampling import SMOTE

# smote = SMOTE(random_state=42)
# X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [24]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_resampled = scaler.fit_transform(X_train_resampled)
X_test = scaler.transform(X_test)




In [25]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=15000)  
model.fit(X_train_resampled, y_train_resampled)
y_pred = model.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


[[ 287    2    0]
 [   7 1180    2]
 [   0    2  520]]
              precision    recall  f1-score   support

           0       0.98      0.99      0.98       289
           1       1.00      0.99      0.99      1189
           2       1.00      1.00      1.00       522

    accuracy                           0.99      2000
   macro avg       0.99      0.99      0.99      2000
weighted avg       0.99      0.99      0.99      2000



    Log different Models
    Track Experiments using MLFLOW

In [26]:
models = [
    (
        "Random Forest Classifier",
        {"random_state":42, "max_depth":10,"n_estimators" :100},
        RandomForestClassifier(),
        (X_train_resampled,y_train_resampled),
        (X_test,y_test)
     ) ,
     
     (
        "Logistic Regression Model with StandardScaler",
        {"max_iter":1000 },
        LogisticRegression(),
        (X_train_resampled,y_train_resampled),
        (X_test,y_test)    
     )

]

In [27]:
reports = []

for model_name,params, model, train_set, test_set in models:
    X_train = train_set[0]
    y_train = train_set[1]
    X_test = test_set[0]
    y_test = test_set[1]

    model.set_params(**params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    report = classification_report(y_test,y_pred, output_dict=True)
    reports.append(report)

In [28]:
reports

[{'0': {'precision': 0.8327526132404182,
   'recall': 0.8269896193771626,
   'f1-score': 0.8298611111111112,
   'support': 289.0},
  '1': {'precision': 0.8929159802306426,
   'recall': 0.911690496215307,
   'f1-score': 0.9022055763628797,
   'support': 1189.0},
  '2': {'precision': 0.8857715430861723,
   'recall': 0.8467432950191571,
   'f1-score': 0.8658178256611165,
   'support': 522.0},
  'accuracy': 0.8825,
  'macro avg': {'precision': 0.8704800455190776,
   'recall': 0.861807803537209,
   'f1-score': 0.8659615043783692,
   'support': 2000.0},
  'weighted avg': {'precision': 0.8823576756058485,
   'recall': 0.8825,
   'f1-score': 0.882254598200839,
   'support': 2000.0}},
 {'0': {'precision': 0.9761904761904762,
   'recall': 0.9930795847750865,
   'f1-score': 0.9845626072041166,
   'support': 289.0},
  '1': {'precision': 0.9966216216216216,
   'recall': 0.992430613961312,
   'f1-score': 0.9945217024863042,
   'support': 1189.0},
  '2': {'precision': 0.9961685823754789,
   'recall':

In [29]:
import mlflow

mlflow.set_experiment("Log Differetn Models")
mlflow.set_tracking_uri("http://127.0.0.1:5000/")

for i, element in enumerate(models):
    model_name = element[0]
    params = element[1]
    model = element[2]
    report = reports[i]

    with mlflow.start_run(run_name=model_name):
        # mlflow.set_tag("Training Info", "Trained with Project final Data")
        mlflow.log_params(params)

        mlflow.log_metrics({
        'accuracy': report['accuracy'],
        'recall_class_0': report['0']['recall'],
        'recall_class_1': report['1']['recall'],
        'recall_class_2': report['2']['recall'],
        'precision_class_0':report['0']['precision'],
        'precision_class_1':report['1']['precision'],
        'precision_class_2':report['2']['precision'],
        'f1_score_macro': report['macro avg']['f1-score']
        })


        # mlflow.log_metric('accuracy',report['accuracy'])
        # mlflow.log_metric('Cross-validation scores',scores)
        # mlflow.log_metric('cross_val_mean_accuracy', scores.mean())

        model_info  =  mlflow.sklearn.log_model(model, model_name)



2024/10/29 11:13:34 INFO mlflow.tracking._tracking_service.client: 🏃 View run Random Forest Classifier at: http://127.0.0.1:5000/#/experiments/887698436236583196/runs/7096e08a354748349509bd97fe85bddc.
2024/10/29 11:13:34 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/887698436236583196.
2024/10/29 11:13:41 INFO mlflow.tracking._tracking_service.client: 🏃 View run Logistic Regression Model with StandardScaler at: http://127.0.0.1:5000/#/experiments/887698436236583196/runs/24b16c0cf29c492482baddcbbe5a09f1.
2024/10/29 11:13:41 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/887698436236583196.
