In [8]:
import pandas as pd
import numpy as np
from model_tuner import *

In [60]:
from sklearn.datasets import fetch_california_housing
import sklearn
sklearn.__version__

'1.4.1.post1'

In [24]:
# # Load the California housing dataset
# california_housing = fetch_california_housing(as_frame=True)

In [28]:
# df = california_housing["frame"]

In [30]:
iris = load_iris()
iris = pd.DataFrame(
    data=np.c_[iris["data"], iris["target"]],
    columns=iris["feature_names"] + ["target"],
)
features = [col for col in iris.columns if col != "target"]
target = "target"

X = iris[features].values  # independant variables
y = iris[target].values.astype(int)  # dependent variable

# breast_sk = load_breast_cancer()
# breast = pd.DataFrame(
#     data=np.c_[breast_sk.data, breast_sk.target],
# )
# breast.columns = list(breast_sk.feature_names) + ["target"]
# features = [col for col in breast.columns if col != "target"]
# target = "target"

# X = breast[features].values  # independant variables
# y = breast[target].values.astype(int)  # dependent variable

lr = LogisticRegression(class_weight="balanced", C=1, max_iter=1000)

estimator_name = "lr"
# Set the parameters by cross-validation
tuned_parameters = [{estimator_name + "__C": np.logspace(-4, 0, 10)}]

In [32]:
kfold = True
calibrate = True

model = Model(
    name="Iris_model",
    estimator_name=estimator_name,
    calibrate=calibrate,
    estimator=lr,
    kfold=kfold,
    stratify=True,
    grid=tuned_parameters,
    randomized_grid=False,
    n_iter=3,
    scoring=["roc_auc_ovr", "precision_macro"],
    n_splits=2,
    random_state=3,
)

model.grid_search_param_tuning(X, y)

model.fit(X, y)

# Tuning hyper-parameters for roc_auc_ovr
Fitting 2 folds for each of 10 candidates, totalling 20 fits
[CV] END .......................................lr__C=0.0001; total time=   0.0s
[CV] END .......................................lr__C=0.0001; total time=   0.0s
[CV] END ........................lr__C=0.0002782559402207126; total time=   0.0s
[CV] END ........................lr__C=0.0002782559402207126; total time=   0.0s
[CV] END .........................lr__C=0.000774263682681127; total time=   0.0s
[CV] END .........................lr__C=0.000774263682681127; total time=   0.0s
[CV] END .........................lr__C=0.002154434690031882; total time=   0.0s
[CV] END .........................lr__C=0.002154434690031882; total time=   0.0s
[CV] END .........................lr__C=0.005994842503189409; total time=   0.0s
[CV] END .........................lr__C=0.005994842503189409; total time=   0.0s
[CV] END .........................lr__C=0.016681005372000592; total time=   0.0s
[CV] E

In [58]:
if model.calibrate:
    model.calibrateModel(X, y)
else:
    pass

if kfold:
    print(model.xval_output["train_score"], model.xval_output["test_score"])
    for i in range(len(model.xval_output["estimator"])):
        print("\n" + str(i) + " Fold: ")
        if calibrate:
            importance = (
                model.xval_output["estimator"][i]
                .calibrated_classifiers_[i]
                .estimator.steps[1][1]
                .coef_[0]
            )
        else:
            importance = model.xval_output["estimator"][i].steps[1][1].coef_[0]

        sort_imp_indx = np.argsort(importance)[::-1]
        # print(importance)
        # print(sort_imp_indx)
        for i in sort_imp_indx:
            print("Feature: %s, Score: %.5f" % (features[i], importance[i]))
else:
    if calibrate:
        importance = model.estimator.estimator.steps[1][1].coef_[0]
    else:
        importance = model.estimator.steps[1][1].coef_[0]
    sort_imp_indx = np.argsort(importance)[::-1]
    # print(importance)
    # print(sort_imp_indx)
    # summarize feature importance
    for i in sort_imp_indx:
        print("Feature: %s, Score: %.5f" % (features[i], importance[i]))


[0.97893333 0.96666667] [0.95786667 0.97893333]

0 Fold: 
Feature: sepal width (cm), Score: 0.66715
Feature: sepal length (cm), Score: -0.95095
Feature: petal width (cm), Score: -1.34057
Feature: petal length (cm), Score: -1.65491

1 Fold: 
Feature: sepal width (cm), Score: 0.85867
Feature: sepal length (cm), Score: -0.94016
Feature: petal width (cm), Score: -1.49184
Feature: petal length (cm), Score: -1.54397
