In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PNN import PNN
from GRNN import GRNN
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler

from sklearn.datasets import load_diabetes, load_digits
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error

## Base Functions

In [4]:
def evaluvate_on_kfold(X, y, model):
    
    if isinstance(model, GRNN):
        score_func = mean_squared_error
    else:
        score_func = accuracy_score
        
    kf = KFold(n_splits=3)

    scores = []
    
    for i, (train_index, test_index) in enumerate(kf.split(X)):
        X_train, y_train = X[train_index], y[train_index]
        X_test, y_test = X[test_index], y[test_index]
        
        model.fit(X_train, y_train)
        preds = [model.predict([x_test]) for x_test in X_test]
        
        scores.append(score_func(y_true=y_test, y_pred=preds))

    return np.mean(scores)

In [5]:
from sklearn.model_selection import ParameterGrid

def grid_search_cv(X, y, model_class, sigma_range, tau_range):
    model_configs = [
        {"name": "No Regularization", "regularization": None},
        {"name": "L1 Dropout Regularization", "regularization": ['dropout', 'l1']},
        {"name": "L2 Dropout Regularization", "regularization": ['dropout', 'l2']},
        {"name": "L1 Regularization", "regularization": 'l1'},
        {"name": "L2 Regularization", "regularization": 'l2'},
    ]

    score_name = 'MSE' if model_class == GRNN else 'Accuracy'
    results = []

    for config in model_configs:
        print(f"----- Fitting {config['name']} model -----")

        best_score = float("inf") if model_class == GRNN else 0
        best_sigma = None
        best_tau = None

        if config["regularization"] is None:
            param_grid = ParameterGrid({'sigma': sigma_range})
        else:
            param_grid = ParameterGrid({'sigma': sigma_range, 'tau': tau_range})

        for params in param_grid:
            sigma = params['sigma']
            tau = params.get('tau', None)

            if model_class == GRNN:
                model = GRNN(kernel="gaussian", sigma=sigma,
                             regularization=config["regularization"], tau=tau)
            else:
                model = PNN(kernel="gaussian", sigma=sigma,
                            regularization=config["regularization"], tau=tau,
                            n_classes=2, losses=[1]*2)

            try:
                score = evaluvate_on_kfold(X, y, model)
                if (model_class == GRNN and score < best_score) or (model_class != GRNN and score > best_score):
                    best_score = score
                    best_sigma = sigma
                    best_tau = tau
            except (ZeroDivisionError, RuntimeError):
                continue

        results.append({
            "Model": config["name"],
            "Best Sigma": best_sigma,
            "Best Tau": best_tau,
            score_name: best_score
        })

    return results

### GRNN - Diabetes Dataset

In [7]:
data = load_diabetes()

In [8]:
X = data['data']
y = data['target']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [9]:
results = grid_search_cv(
    X, y, GRNN,
    tau_range=[round(x * 0.01, 2) for x in range(1, 11)],
    sigma_range=[round(x * 0.01, 2) for x in range(1, 11)]
)

----- Fitting No Regularization model -----
----- Fitting L1 Dropout Regularization model -----
----- Fitting L2 Dropout Regularization model -----
----- Fitting L1 Regularization model -----
----- Fitting L2 Regularization model -----


In [10]:
results_df = pd.DataFrame(results)
results_df['Impovement %'] = np.round((results_df['MSE'].iloc[0] / results_df['MSE']) * 100, 2) - 100
results_df

Unnamed: 0,Model,Best Sigma,Best Tau,MSE,Impovement %
0,No Regularization,0.05,,3228.996154,0.0
1,L1 Dropout Regularization,0.05,0.09,3216.995714,0.37
2,L2 Dropout Regularization,0.07,0.1,4035.168238,-19.98
3,L1 Regularization,0.08,0.03,3212.496162,0.51
4,L2 Regularization,0.06,0.07,3218.981511,0.31


### GRNN - Boston Housing

In [102]:
df = fetch_openml(name="boston", version=1)

In [104]:
X = df['data'].astype(np.float64).to_numpy()
y = df['target'].astype(np.float64).to_numpy()
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
results = grid_search_cv(X, y, GRNN,
                         tau_range=[round(x * 0.1, 2) for x in range(201, 301)],
                         sigma_range=[round(x, 2) for x in [i * 0.5 for i in range(20, 31)]])

----- Fitting No Regularization model -----
----- Fitting L1 Dropout Regularization model -----
----- Fitting L2 Dropout Regularization model -----
----- Fitting L1 Regularization model -----


In [None]:
results_df = pd.DataFrame(results)
results_df['Impovement %'] = np.round((results_df['MSE'].iloc[0] / results_df['MSE']) * 100, 2) - 100
results_df

### PNN - Adult income classification

In [15]:
train = pd.read_csv('./data/adult_income_train.csv').astype(np.float64).sample(600, random_state=30)
test = pd.read_csv('./data/adult_income_test.csv').astype(np.float64).sample(150, random_state=30)

X_train, X_test = train.drop(columns=['class']).to_numpy(), test.drop(columns=['class']).to_numpy()
y_train, y_test = train['class'].to_numpy(), test['class'].to_numpy()

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [16]:
model_configs = [
    {"name": "No Regularization", "regularization": None},
    {"name": "L1 Regularization", "regularization": 'l1'},
    {"name": "L2 Regularization", "regularization": 'l2'}
]

results = []

for config in model_configs:

    else:
        tau_range = [0.1, 0.2, 0.5, 1.0, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0]
    
    sigma_range = [0.1, 0.25, 0.5, 1.0, 2.0, 3.0, 4.0]
    param_grid = {"sigma": sigma_range, "tau": tau_range}
    
    best_accuracy = 0
    best_params = {}
    
    for params in ParameterGrid(param_grid):
        sigma = params["sigma"]
        tau = params["tau"]
    
        model = PNN(kernel="gaussian", sigma=sigma,
                    regularization=config["regularization"],
                    tau=tau if config["regularization"] is not None else None,
                    n_classes=2, losses=[1]*2)
        model.fit(X_train, y_train)

        try:
            preds = [model.predict([x_test]) for x_test in X_test]
            accuracy = accuracy_score(y_test, preds)
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_params = {"sigma": sigma, "tau": tau}
        except ZeroDivisionError:
            continue


    results.append({
        "Model": config["name"],
        "Best Sigma": best_params["sigma"],
        "Best Tau": best_params["tau"],
        "Accuracy": best_accuracy
    })

In [17]:
results_df = pd.DataFrame(results)
results_df['Impovement %'] = np.round((results_df['Accuracy'] / results_df['Accuracy'].iloc[0]) * 100, 2) - 100
results_df

Unnamed: 0,Model,Best Sigma,Best Tau,Accuracy,Impovement %
0,No Regularization,0.25,0.1,0.813333,0.0
1,L1 Regularization,2.0,0.2,0.826667,1.64
2,L2 Regularization,0.5,1.0,0.82,0.82
3,L1 Dropout Regularization,0.5,11.0,0.833333,2.46
4,L2 Dropout Regularization,0.5,15.0,0.833333,2.46
