Importing Dependencies

In [6]:
import joblib
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import(
    StratifiedKFold,
    cross_validate
)
warnings.filterwarnings("ignore")

Load The Data

In [4]:
X_train = np.load('../artifacts/X_train.npz')['arr_0']
Y_train = np.load('../artifacts/Y_train.npz')['arr_0']
X_test = np.load('../artifacts/X_test.npz')['arr_0']
Y_test = np.load('../artifacts/Y_test.npz')['arr_0']

In [7]:
models = {
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(),
    'Decision Tree': DecisionTreeClassifier()
    
}

In [8]:
cv = StratifiedKFold(
    n_splits=5, shuffle=True, random_state=42
)

In [12]:
for model_name, model in models.items():
    print(f"Cross-validating {model_name}...")
    
    cv_results = cross_validate(
        model, 
        X_train, 
        Y_train,
        cv=cv,
        scoring = 'f1',
        return_estimator=True,
        return_train_score=True
    )

    test_scores = cv_results['test_score']
    best_index = np.argmax(test_scores)
    
    estimator = cv_results['estimator'][best_index]
    print(estimator)
    
        

Cross-validating Logistic Regression...
LogisticRegression()
Cross-validating Random Forest...
RandomForestClassifier()
Cross-validating Decision Tree...
DecisionTreeClassifier()
