In [46]:
# this will execute utils.py in your notebook’s namespace
%run ./utils.py


In [55]:
from pathlib import Path
import pickle
# in another notebook
import pandas as pd
from utils import preprocess_credit_card_data

test_df = pd.read_pickle('test_df.pkl')
print(test_df.head())

# list the notebook files (relative to this notebook)
nb_files = [
    Path('CC_NN.ipynb'),
    Path('CC_logistic_regression.ipynb'),
    Path('svm') / 'CC_svm.ipynb',
]

best_models = []
for nb in nb_files:
    pkl = nb.with_suffix('.pkl')          # swaps .ipynb → .pkl
    with open(pkl, 'rb') as f:
        best_models.append(pickle.load(f))

# now best_models is [best_nn_model, best_logistic_model, best_svm_model]
nn_model = best_models[0]
log_regr = best_models[1]
svm = best_models[2]


# 1) SVM (you already have this)
svm_params = svm[['feature_method','degree','n_components','gamma']].iloc[0]
print("SVM params:\n", svm_params, "\n")

# 2) Logistic Regression
# pick the columns that correspond to your hyperparameters
log_params = log_regr[['penalty','C']].iloc[0]
print("Logistic Regression params:\n", log_params, "\n")

# 3) Neural Network
# pick the columns that correspond to your NN hyperparameters
nn_params = nn_model[['activation','alpha','hidden_layer_sizes','learning_rate_init']].iloc[0]
print("Neural Net params:\n", nn_params)




      ID  LIMIT_BAL  SEX  EDUCATION  MARRIAGE  AGE  PAY_0  PAY_2  PAY_3  \
0   2982   100000.0    1          2         1   43      1     -2     -2   
1  12658   290000.0    1          2         1   38      0      0      0   
2  21153   110000.0    1          2         1   48      0      0      0   
3  26048   170000.0    2          3         1   44      0      0      0   
4  21402   120000.0    2          2         3   59      2      2      2   

   PAY_4  ...  BILL_AMT4  BILL_AMT5  BILL_AMT6  PAY_AMT1  PAY_AMT2  PAY_AMT3  \
0     -2  ...        0.0      551.0      551.0       0.0       0.0       0.0   
1      0  ...    82168.0    80299.0    77324.0    3530.0    3019.0    2818.0   
2      0  ...   136859.0    60276.0    48652.0    5000.0    4087.0   64142.0   
3      0  ...   147611.0    28697.0   107142.0    6936.0    8266.0    5792.0   
4      2  ...    86910.0    87893.0    86370.0       0.0    7898.0    3300.0   

   PAY_AMT4  PAY_AMT5  PAY_AMT6  default.payment.next.month  
0     

In [56]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.decomposition   import PCA
from sklearn.svm             import SVC, LinearSVC
from sklearn.linear_model    import LogisticRegression
from sklearn.neural_network  import MLPClassifier
from sklearn.base            import clone
from sklearn.metrics.pairwise import rbf_kernel

def test_models_on_df(models, params_list, test_df, target_col='target'):
    """
    models      : list of fitted estimator objects [nn_model, log_model, svm_model]
    params_list : list of pd.Series of hyper-params matching each model
    test_df     : the DataFrame containing both features and the target
    target_col  : name of the label column in test_df
    ---
    returns a DataFrame with columns:
      model_name, accuracy, precision, recall, f1
    """
    # split out features / target
    y_true = test_df[target_col]
    X      = test_df.drop(columns=[target_col])

    results = []
    for model, p in zip(models, params_list):
        # build a fresh copy so we don't pollute the original
        clf = clone(model)

        # we need to apply the SAME feature transform that was used in training:
        fm = p.get('feature_method', None)

        if fm == 'polynomial':
            poly = PolynomialFeatures(degree=int(p['degree']), include_bias=False)
            X_proc = poly.fit_transform(X)

        elif fm == 'pca':
            pca = PCA(n_components=int(p['n_components']))
            X_proc = pca.fit_transform(X)

        elif fm == 'rbf':
            # RBF: compute kernel against training “basis” from the fitted SVM
            # we assume the saved model has a `.support_` attribute
            X_basis = clf.support_vectors_
            X_proc  = rbf_kernel(X, X_basis, gamma=p['gamma'])

        else:
            # linear / no extra transform
            X_proc = X.values  # as numpy array

        # now predict & score
        y_pred = clf.predict(X_proc)

        results.append({
            'model_name':        clf.__class__.__name__,
            'feature_method':    fm or 'linear',
            'accuracy':          accuracy_score(y_true, y_pred),
            'precision':         precision_score(y_true, y_pred),
            'recall':            recall_score(y_true, y_pred),
            'f1':                f1_score(y_true, y_pred)
        })

    return pd.DataFrame(results)



In [57]:

print(test_df.shape)
print(test_df.head())

(250, 25)
      ID  LIMIT_BAL  SEX  EDUCATION  MARRIAGE  AGE  PAY_0  PAY_2  PAY_3  \
0   2982   100000.0    1          2         1   43      1     -2     -2   
1  12658   290000.0    1          2         1   38      0      0      0   
2  21153   110000.0    1          2         1   48      0      0      0   
3  26048   170000.0    2          3         1   44      0      0      0   
4  21402   120000.0    2          2         3   59      2      2      2   

   PAY_4  ...  BILL_AMT4  BILL_AMT5  BILL_AMT6  PAY_AMT1  PAY_AMT2  PAY_AMT3  \
0     -2  ...        0.0      551.0      551.0       0.0       0.0       0.0   
1      0  ...    82168.0    80299.0    77324.0    3530.0    3019.0    2818.0   
2      0  ...   136859.0    60276.0    48652.0    5000.0    4087.0   64142.0   
3      0  ...   147611.0    28697.0   107142.0    6936.0    8266.0    5792.0   
4      2  ...    86910.0    87893.0    86370.0       0.0    7898.0    3300.0   

   PAY_AMT4  PAY_AMT5  PAY_AMT6  default.payment.next.mont

In [58]:


y_test = test_df['default.payment.next.month']
X_test = test_df.drop(columns=['default.payment.next.month'])

model_names = ['NeuralNet','LogisticReg','SVM']

In [59]:
print(svm_params)

feature_method    polynomial
degree                     3
n_components               5
gamma                    0.1
Name: 20, dtype: object


In [61]:
print('nn_model:', nn_model)
print('log model:', log_regr)
print('svm:', svm)


nn_model:     accuracy_validation  accuracy_train  precision_validation  \
11                0.728        0.973226              0.448276   

    recall_validation  f1_validation  f1_train  precision_train  recall_train  \
11           0.419355       0.433333  0.942529         0.942529      0.942529   

   activation  alpha hidden_layer_sizes  learning_rate_init  
11       relu  0.001           (15, 15)                0.01  
log model:    feature_method  degree  n_components  gamma  accuracy_validation  \
55     polynomial       3            10    0.5                0.764   

    accuracy_train  precision_validation  recall_validation  f1_validation  \
55        0.904953              0.534884           0.370968       0.438095   

    f1_train  precision_train  recall_train     C penalty  
55  0.750877         0.963964      0.614943  0.01      l2  
svm:    feature_method  degree  n_components  gamma  accuracy_validation  \
20     polynomial       3             5    0.1                 0.

In [54]:
nn_params  = nn_model.iloc[0][['activation', 'alpha', 'hidden_layer_sizes', 'learning_rate_init']]
log_params = log_regr.iloc[0][['penalty', 'C']]
svm_params = svm.iloc[0][['feature_method', 'degree', 'n_components', 'gamma', 'C']]


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC

# Create estimators using extracted parameters
nn_estimator = MLPClassifier(
    hidden_layer_sizes = tuple(nn_params['hidden_layer_sizes']),
    activation         = nn_params['activation'],
    alpha              = nn_params['alpha'],
    learning_rate_init = nn_params['learning_rate_init'],
    max_iter           = 500,
    random_state       = 42
)

log_estimator = LogisticRegression(
    penalty = log_params['penalty'],
    C       = log_params['C'],
    solver  = 'liblinear' if log_params['penalty'] == 'l1' else 'lbfgs',
    max_iter = 1000
)

svm_estimator = SVC(
    kernel = 'poly' if svm_params['feature_method'] == 'polynomial' else 'rbf',
    degree = int(svm_params['degree']),
    gamma  = svm_params['gamma'],
    C      = svm_params['C']
)


In [None]:
nn_estimator = MLPClassifier(
    activation         = nn_params['activation'],
    alpha              = nn_params['alpha'],
    hidden_layer_sizes = tuple(nn_params['hidden_layer_sizes']),
    learning_rate_init = nn_params['learning_rate_init'],
    max_iter           = 500,
    random_state       = 42
)

TypeError: Cannot clone object '    accuracy_validation  accuracy_train  precision_validation  \
11                0.728        0.973226              0.448276   

    recall_validation  f1_validation  f1_train  precision_train  recall_train  \
11           0.419355       0.433333  0.942529         0.942529      0.942529   

   activation  alpha hidden_layer_sizes  learning_rate_init  
11       relu  0.001           (15, 15)                0.01  ' (type <class 'pandas.core.frame.DataFrame'>): it does not seem to be a scikit-learn estimator as it does not implement a 'get_params' method.