In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PNN import PNN
from GRNN import GRNN
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import ParameterGrid
from sklearn.preprocessing import StandardScaler

from sklearn.datasets import load_iris, load_diabetes, load_digits, fetch_california_housing
from sklearn.datasets import fetch_openml
from torchvision.transforms import ToTensor
from torchvision.datasets import FashionMNIST
from torchvision.datasets import CIFAR10
from torch import squeeze
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, r2_score, mean_squared_error

### GRNN - Boston Housing

In [9]:
df = fetch_openml(name="boston", version=1)

In [10]:
X = df['data'].astype(np.float64).to_numpy()
y = df['target'].to_numpy()
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, random_state=42, test_size=0.25)

In [12]:
model_configs = [
    {"name": "No Regularization", "regularization": None},
    {"name": "L1 Regularization", "regularization": 'l1'},
    {"name": "L2 Regularization", "regularization": 'l2'},
    {"name": "L1 Dropout Regularization", "regularization": ['dropout', 'l1']},
    {"name": "L2 Dropout Regularization", "regularization": ['dropout', 'l2']},
]

results = []

for config in model_configs:

    if config["regularization"] is not None and isinstance(config["regularization"], list) and 'dropout' in config["regularization"]:
        tau_range = list(range(1, 16))
    else:
        tau_range = [0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 2.5, 5.0]
    
    sigma_range = [0.1, 0.2, 0.5, 1.0, 2.0]
    param_grid = {"sigma": sigma_range, "tau": tau_range}
    
    best_mse = float("inf")
    best_params = {}
    
    for params in ParameterGrid(param_grid):
        sigma = params["sigma"]
        tau = params["tau"]
    
        model = GRNN(kernel="gaussian", sigma=sigma,
                     regularization=config["regularization"],
                     tau=tau if config["regularization"] is not None else None)
        model.fit(X_train, y_train)

        try:
            preds = [model.predict([x_test]) for x_test in X_test]
            mse = mean_squared_error(y_test, preds)
            if mse < best_mse:
                best_mse = mse
                best_params = {"sigma": sigma, "tau": tau}
        except ZeroDivisionError:
            continue


    results.append({
        "Model": config["name"],
        "Best Sigma": best_params["sigma"],
        "Best Tau": best_params["tau"],
        "MSE": best_mse
    })

In [13]:
results_df = pd.DataFrame(results)
results_df['Impovement %'] = np.round((results_df['MSE'].iloc[0] / results_df['MSE']) * 100, 2) - 100
results_df

Unnamed: 0,Model,Best Sigma,Best Tau,MSE,Impovement %
0,No Regularization,0.5,0.01,17.244074,0.0
1,L1 Regularization,2.0,0.1,16.774283,2.8
2,L2 Regularization,0.5,0.5,16.75263,2.93
3,L1 Dropout Regularization,0.5,15.0,16.203154,6.42
4,L2 Dropout Regularization,0.5,4.0,16.953243,1.72


### PNN - Adult income classification

In [15]:
train = pd.read_csv('./data/adult_income_train.csv').astype(np.float64).sample(600, random_state=30)
test = pd.read_csv('./data/adult_income_test.csv').astype(np.float64).sample(150, random_state=30)

X_train, X_test = train.drop(columns=['class']).to_numpy(), test.drop(columns=['class']).to_numpy()
y_train, y_test = train['class'].to_numpy(), test['class'].to_numpy()

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [16]:
model_configs = [
    {"name": "No Regularization", "regularization": None},
    {"name": "L1 Regularization", "regularization": 'l1'},
    {"name": "L2 Regularization", "regularization": 'l2'},
    {"name": "L1 Dropout Regularization", "regularization": ['dropout', 'l1']},
    {"name": "L2 Dropout Regularization", "regularization": ['dropout', 'l2']},
]

results = []

for config in model_configs:

    if config["regularization"] is not None and isinstance(config["regularization"], list) and 'dropout' in config["regularization"]:
        tau_range = list(range(4, 16))
    else:
        tau_range = [0.1, 0.2, 0.5, 1.0, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0]
    
    sigma_range = [0.1, 0.25, 0.5, 1.0, 2.0, 3.0, 4.0]
    param_grid = {"sigma": sigma_range, "tau": tau_range}
    
    best_accuracy = 0
    best_params = {}
    
    for params in ParameterGrid(param_grid):
        sigma = params["sigma"]
        tau = params["tau"]
    
        model = PNN(kernel="gaussian", sigma=sigma,
                    regularization=config["regularization"],
                    tau=tau if config["regularization"] is not None else None,
                    n_classes=2, losses=[1]*2)
        model.fit(X_train, y_train)

        try:
            preds = [model.predict([x_test]) for x_test in X_test]
            accuracy = accuracy_score(y_test, preds)
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_params = {"sigma": sigma, "tau": tau}
        except ZeroDivisionError:
            continue


    results.append({
        "Model": config["name"],
        "Best Sigma": best_params["sigma"],
        "Best Tau": best_params["tau"],
        "Accuracy": best_accuracy
    })

In [17]:
results_df = pd.DataFrame(results)
results_df['Impovement %'] = np.round((results_df['Accuracy'] / results_df['Accuracy'].iloc[0]) * 100, 2) - 100
results_df

Unnamed: 0,Model,Best Sigma,Best Tau,Accuracy,Impovement %
0,No Regularization,0.25,0.1,0.813333,0.0
1,L1 Regularization,2.0,0.2,0.826667,1.64
2,L2 Regularization,0.5,1.0,0.82,0.82
3,L1 Dropout Regularization,0.5,11.0,0.833333,2.46
4,L2 Dropout Regularization,0.5,15.0,0.833333,2.46
