In [2]:
# 1. Import pandas
import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import PolynomialFeatures
 
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.base            import clone

from sklearn.model_selection import ParameterGrid
from sklearn.metrics         import accuracy_score, precision_recall_fscore_support
from utils import grid_evaluate, preprocess_credit_card_data





# 2. Load the saved CSVs
X_train = pd.read_csv('X_train.csv')
X_test = pd.read_csv('X_test.csv')
y_train = pd.read_csv('y_train.csv')
y_test = pd.read_csv('y_test.csv')

# 3. (Optional but important) - if y_train and y_test are DataFrames, squeeze to make them Series
y_train = y_train.squeeze()
y_test = y_test.squeeze()

# 4. Verify shapes
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)


(747, 31)
(250, 31)
(747,)
(250,)


In [3]:


def grid_evaluate(
    estimator,
    param_grid,
    X_train, X_test,
    y_train, y_test
):
    """
    Brute-force “no CV” grid search over both feature transforms and classifier hyper-params.
    param_grid keys can include:
      - 'feature_method': [None, 'polynomial', 'pca', 'rbf']
      - 'degree':         [2,3]       (for polynomial)
      - 'n_components':   [5,10]      (for PCA)
      - 'gamma':          [0.1,0.5]   (for RBF)
      + any estimator params (e.g. 'C', 'penalty', etc.)
    """
    rows = []
    for params in ParameterGrid(param_grid):
        # 1) pull out transform params
        fm    = params.pop('feature_method', None)
        deg   = params.pop('degree',         None)
        ncomp = params.pop('n_components',   None)
        gam   = params.pop('gamma',          None)

        # 2) fit+transform on train, transform on test
        if fm == 'polynomial':
            poly = PolynomialFeatures(degree=deg, include_bias=False)
            X_tr = poly.fit_transform(X_train)
            X_te = poly.transform(X_test)

        elif fm == 'pca':
            pca = PCA(n_components=ncomp)
            X_tr = pca.fit_transform(X_train)
            X_te = pca.transform(X_test)

        elif fm == 'rbf':
            X_tr = rbf_kernel(X_train, X_train, gamma=gam)
            X_te = rbf_kernel(X_test,  X_train, gamma=gam)

        else:
            X_tr, X_te = X_train, X_test

        # 3) train & predict
        clf = clone(estimator).set_params(**params)
        clf.fit(X_tr, y_train)
        y_pred = clf.predict(X_te)

        # 4) metrics
        acc  = accuracy_score(y_test, y_pred)
        prec, rec, f1, _ = precision_recall_fscore_support(
            y_test, y_pred, average='binary', zero_division=0
        )

        # 5) record
        record = {
            'feature_method': fm,
            'degree':         deg,
            'n_components':   ncomp,
            'gamma':          gam,
            'accuracy':       acc,
            'precision':      prec,
            'recall':         rec,
            'f1_score':       f1,
        }
        record.update(params)  # remaining clf params
        rows.append(record)

    return pd.DataFrame(rows)



In [None]:
from sklearn.linear_model import LogisticRegression

base_clf = LogisticRegression(max_iter=5000)
param_grid = {
    'feature_method': [None, 'polynomial', 'pca', 'rbf'],
    'degree':         [2, 3],
    'n_components':   [5, 10],
    'gamma':          [0.1, 0.5],   
    'C':              [0.1, 1.0, 10.0],
    'penalty':        ['l2']
}

results = grid_evaluate(
    estimator  = base_clf,
    param_grid = param_grid,
    X_train    = X_train,
    X_test     = X_test,
    y_train    = y_train,
    y_test     = y_test
)

# best by F1
best = results.sort_values('f1_score', ascending=False)
print(best)
print(best.iloc[0])



   feature_method  degree  n_components  gamma  accuracy  precision    recall  \
48           None       3             5    0.1     0.796   0.703704  0.306452   
49           None       3            10    0.1     0.796   0.703704  0.306452   
50           None       3             5    0.5     0.796   0.703704  0.306452   
51           None       3            10    0.5     0.796   0.703704  0.306452   
35           None       2            10    0.5     0.796   0.703704  0.306452   
..            ...     ...           ...    ...       ...        ...       ...   
47            rbf       2            10    0.5     0.752   0.500000  0.016129   
31            rbf       3            10    0.5     0.752   0.000000  0.000000   
30            rbf       3             5    0.5     0.752   0.000000  0.000000   
14            rbf       2             5    0.5     0.752   0.000000  0.000000   
15            rbf       2            10    0.5     0.752   0.000000  0.000000   

    f1_score    C penalty  