In [31]:
import numpy as np
import torch
import pandas as pd
from PNN import TrainablePNN
from GRNN import TrainableGRNN
from PNN import PNN
from GRNN import GRNN
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_breast_cancer, load_diabetes
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from ucimlrepo import fetch_ucirepo
from sklearn.utils import shuffle

# Base functions

In [2]:
def evaluvate_on_kfold(X, y, model):
    
    if 'GRNN' in model_class.__name__:
        score_func = mean_squared_error
    else:
        score_func = accuracy_score
        
    kf = KFold(n_splits=3)

    scores = []
    
    for i, (train_index, test_index) in enumerate(kf.split(X)):
        X_train, y_train = X[train_index], y[train_index]
        X_test, y_test = X[test_index], y[test_index]
        
        model.fit(X_train, y_train)
        preds = [model.predict([x_test]) for x_test in X_test]
        
        scores.append(score_func(y_true=y_test, y_pred=preds))

    return np.mean(scores)

In [3]:
def grid_search_cv(X, y, model_class, sigma_range, tau_range, n_classes=2, losses=None):

    if losses is None:
        losses = [1] * n_classes

    if 'GRNN' in model_class.__name__:
        score_func = mean_squared_error
    else:
        score_func = accuracy_score
                
    best_sigma = None
    best_score = float('inf') if score_func == mean_squared_error else 0

    for sigma in sigma_range:
        try:
            if 'GRNN' in model_class.__name__:
                score_func = mean_squared_error
                model = model_class(kernel="gaussian", sigma=sigma)
            else:
                score_func = accuracy_score
                model = model_class(kernel="gaussian", sigma=sigma,
                                    n_classes=n_classes, losses=losses)
            score = evaluvate_on_kfold(X, y, model)
            if ((score_func == accuracy_score and score > best_score) or
                (score_func == mean_squared_error and score < best_score)):
                best_score = score
                best_sigma = sigma
        except ZeroDivisionError:
            continue

    results = []
    results.append({
        "Model": f"{model_class.__name__}",
        "Best Sigma": best_sigma,
        "Best Tau": None,
        "Score": best_score
    })

    print(f"Best sigma found: {best_sigma} (Score: {best_score:.4f})")

    regularizations = ['l1', 'l2']
    
    for reg in regularizations:
        best_tau = None
        best_score = float('inf') if score_func == mean_squared_error else 0

        for tau in tau_range:
            try:
                if 'GRNN' in model_class.__name__:
                    trainable_model_class = TrainableGRNN
                    model = trainable_model_class(sigma=best_sigma,
                                    regularization=reg, tau=tau)
                else:
                    trainable_model_class = TrainablePNN
                    model = trainable_model_class(sigma=best_sigma,
                                    regularization=reg, tau=tau,
                                    n_classes=n_classes, losses=losses)
                
                score = evaluvate_on_kfold(X, y, model)
                if ((score_func == accuracy_score and score > best_score) or
                    (score_func == mean_squared_error and score < best_score)):
                    best_score = score
                    best_tau = tau
            except ZeroDivisionError as e:
                continue

        results.append({
            "Model": f"{model_class.__name__} with {reg.upper()}",
            "Best Sigma": best_sigma,
            "Best Tau": best_tau,
            "Score": best_score
        })

    return results

In [4]:
all_results_data = []

# Breast cancer

In [5]:
data = load_breast_cancer()
X = data['data']
y_train = data['target']

scaler = StandardScaler()
X_train = scaler.fit_transform(X)
print(len(X_train))

569


In [6]:
model_class = PNN
sigma_range = np.round(np.arange(0.1, 5.1, 0.1), 2)
tau_range = np.round(np.arange(0.1, 5.1, 0.1), 2)

evals = grid_search_cv(X_train, y_train, model_class, sigma_range, tau_range, n_classes=2, losses=None)

Best sigma found: 0.8 (Score: 0.9561)


  input = torch.tensor(input, dtype=torch.float32)


In [7]:
results_df = pd.DataFrame(evals)
results_df['Impovement %'] = np.round((results_df['Score'] / results_df['Score'].iloc[0]) * 100, 2) - 100
results_df['Dataset'] = 'Breast Cancer'
results_df['Instances'] = len(X_train)
results_df

Unnamed: 0,Model,Best Sigma,Best Tau,Score,Impovement %,Dataset,Instances
0,PNN,0.8,,0.956094,0.0,Breast Cancer,569
1,PNN with L1,0.8,4.9,0.959612,0.37,Breast Cancer,569
2,PNN with L2,0.8,2.2,0.959612,0.37,Breast Cancer,569


In [8]:
all_results_data.append(results_df)

# Suspicious firms

In [9]:
df = pd.read_csv('./data/audit_risk.csv')

df = df.dropna()

df = df.drop(columns=['LOCATION_ID'])

X = df.drop(columns=['Risk']).values
y_train = df['Risk'].values

scaler = StandardScaler()
X_train = scaler.fit_transform(X)
print(len(X_train))

775


In [10]:
model_class = PNN
sigma_range = np.round(np.arange(0.1, 5.1, 0.1), 2)
tau_range = np.round(np.arange(0.1, 5.1, 0.1), 2)

evals = grid_search_cv(X_train, y_train, model_class, sigma_range, tau_range, n_classes=2, losses=None)

Best sigma found: 0.5 (Score: 0.9548)


In [11]:
results_df = pd.DataFrame(evals)
results_df['Impovement %'] = np.round((results_df['Score'] / results_df['Score'].iloc[0]) * 100, 2) - 100
results_df['Dataset'] = 'Suspicious firms'
results_df['Instances'] = len(X_train)
results_df

Unnamed: 0,Model,Best Sigma,Best Tau,Score,Impovement %,Dataset,Instances
0,PNN,0.5,,0.95484,0.0,Suspicious firms,775
1,PNN with L1,0.5,1.6,0.936797,-1.89,Suspicious firms,775
2,PNN with L2,0.5,3.2,0.936797,-1.89,Suspicious firms,775


In [12]:
all_results_data.append(results_df)

# Diabetes

In [13]:
data = load_diabetes()

X = data['data']
y_train = data['target']
scaler = StandardScaler()
X_train = scaler.fit_transform(X)
print(len(X_train))

442


In [14]:
model_class = GRNN
sigma_range = np.round(np.arange(0.1, 5.1, 0.1), 2)
tau_range = np.round(np.arange(0.1, 5.1, 0.1), 2)

evals = grid_search_cv(X_train, y_train, model_class, sigma_range, tau_range, n_classes=2, losses=None)

Best sigma found: 1.0 (Score: 3219.3482)


In [15]:
results_df = pd.DataFrame(evals)
results_df['Impovement %'] = np.round((results_df['Score'].iloc[0] / results_df['Score']) * 100, 2) - 100
results_df['Dataset'] = 'Diabetes'
results_df['Instances'] = len(X_train)
results_df

Unnamed: 0,Model,Best Sigma,Best Tau,Score,Impovement %,Dataset,Instances
0,GRNN,1.0,,3219.348204,0.0,Diabetes,442
1,GRNN with L1,1.0,4.0,3091.181332,4.15,Diabetes,442
2,GRNN with L2,1.0,3.0,3101.525256,3.8,Diabetes,442


In [16]:
all_results_data.append(results_df)

# Concrete Compressive Strength

In [17]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/compressive/Concrete_Data.xls"
df = pd.read_excel(url)

X = df.iloc[:, :-1].values
y_train = df.iloc[:, -1].values

scaler = StandardScaler()
X_train = scaler.fit_transform(X)
print(len(X_train))

1030


In [18]:
model_class = GRNN
sigma_range = np.round(np.arange(0.1, 5.1, 0.1), 2)
tau_range = np.round(np.arange(0.1, 5.1, 0.1), 2)

evals = grid_search_cv(X_train, y_train, model_class, sigma_range, tau_range, n_classes=2, losses=None)

Best sigma found: 0.6 (Score: 142.1832)


In [19]:
results_df = pd.DataFrame(evals)
results_df['Impovement %'] = np.round((results_df['Score'].iloc[0] / results_df['Score']) * 100, 2) - 100
results_df['Dataset'] = 'Concrete Compressive Strength'
results_df['Instances'] = len(X_train)
results_df

Unnamed: 0,Model,Best Sigma,Best Tau,Score,Impovement %,Dataset,Instances
0,GRNN,0.6,,142.183214,0.0,Concrete Compressive Strength,1030
1,GRNN with L1,0.6,1.3,135.971447,4.57,Concrete Compressive Strength,1030
2,GRNN with L2,0.6,3.1,136.932485,3.83,Concrete Compressive Strength,1030


In [20]:
all_results_data.append(results_df)

# Stock portfolio performance

In [27]:
stock_portfolio_performance = fetch_ucirepo(id=390)
  
X = stock_portfolio_performance.data.features.iloc[:, :-6]
y_train = stock_portfolio_performance.data.targets['Excess Return'].apply(lambda x: float(x[:-1])).to_numpy()

scaler = StandardScaler()
X_train = scaler.fit_transform(X)
print(len(X_train))

315


In [22]:
model_class = GRNN
sigma_range = np.round(np.arange(0.1, 5.1, 0.1), 2)
tau_range = np.round(np.arange(0.1, 5.1, 0.1), 2)

evals = grid_search_cv(X_train, y_train, model_class, sigma_range, tau_range, n_classes=2, losses=None)

Best sigma found: 5.0 (Score: 6.7776)


In [23]:
results_df = pd.DataFrame(evals)
results_df['Impovement %'] = np.round((results_df['Score'].iloc[0] / results_df['Score']) * 100, 2) - 100
results_df['Dataset'] = 'Stock portfolio performance'
results_df['Instances'] = len(X_train)
results_df

Unnamed: 0,Model,Best Sigma,Best Tau,Score,Impovement %,Dataset,Instances
0,GRNN,5.0,,6.777578,0.0,Stock portfolio performance,315
1,GRNN with L1,5.0,4.8,6.232009,8.75,Stock portfolio performance,315
2,GRNN with L2,5.0,3.2,6.243142,8.56,Stock portfolio performance,315


In [24]:
all_results_data.append(results_df)

# Parkinsons

In [62]:
parkinsons = fetch_ucirepo(id=174) 
  
X = parkinsons.data.features 
y_train = parkinsons.data.targets['status'].to_numpy()

scaler = StandardScaler()
X_train = scaler.fit_transform(X)

X_train, y_train = shuffle(X_train, y_train, random_state=42)

print(len(X_train))

195


In [63]:
model_class = PNN
sigma_range = np.round(np.arange(0.1, 5.1, 0.1), 2)
tau_range = np.round(np.arange(0.1, 5.1, 0.1), 2)

evals = grid_search_cv(X_train, y_train, model_class, sigma_range, tau_range, n_classes=2, losses=None)

Best sigma found: 0.6 (Score: 0.9538)


In [65]:
results_df = pd.DataFrame(evals)
results_df['Impovement %'] = np.round((results_df['Score'] / results_df['Score'].iloc[0]) * 100, 2) - 100
results_df['Dataset'] = 'Parkinsons'
results_df['Instances'] = len(X_train)
results_df

Unnamed: 0,Model,Best Sigma,Best Tau,Score,Impovement %,Dataset,Instances
0,PNN,0.6,,0.953846,0.0,Parkinsons,195
1,PNN with L1,0.6,1.5,0.948718,-0.54,Parkinsons,195
2,PNN with L2,0.6,1.9,0.958974,0.54,Parkinsons,195


In [85]:
all_results_data.append(results_df)

# Data joining

In [89]:
combined_df = pd.concat(all_results_data, ignore_index=True)

In [90]:
combined_df

Unnamed: 0,Model,Best Sigma,Best Tau,Score,Impovement %,Dataset,Instances
0,PNN,0.8,,0.956094,0.0,Breast Cancer,569
1,PNN with L1,0.8,4.9,0.959612,0.37,Breast Cancer,569
2,PNN with L2,0.8,2.2,0.959612,0.37,Breast Cancer,569
3,PNN,0.5,,0.95484,0.0,Suspicious firms,775
4,PNN with L1,0.5,1.6,0.936797,-1.89,Suspicious firms,775
5,PNN with L2,0.5,3.2,0.936797,-1.89,Suspicious firms,775
6,GRNN,1.0,,3219.348204,0.0,Diabetes,442
7,GRNN with L1,1.0,4.0,3091.181332,4.15,Diabetes,442
8,GRNN with L2,1.0,3.0,3101.525256,3.8,Diabetes,442
9,GRNN,0.6,,142.183214,0.0,Concrete Compressive Strength,1030


In [95]:
results = []

for dataset in combined_df['Dataset'].unique():
    df_dataset = combined_df[combined_df['Dataset'] == dataset]
    
    for model_type in ['PNN', 'GRNN']:
        base = df_dataset[df_dataset['Model'] == model_type]
        with_l1 = df_dataset[df_dataset['Model'] == f'{model_type} with L1']
        with_l2 = df_dataset[df_dataset['Model'] == f'{model_type} with L2']
        
        if base.empty:
            continue

        score_metric = 'Accuracy' if model_type == 'PNN' else 'MSE'

        row = {
            'Dataset': dataset,
            'Score metric': score_metric,
            'Best Sigma': base['Best Sigma'].values[0],
            'Score Base': base['Score'].values[0],  # PNN/GRNN базовый скор
            'Best tau (L1)': with_l1['Best Tau'].values[0] if not with_l1.empty else None,
            'Score L1': with_l1['Score'].values[0] if not with_l1.empty else None,
            'Impovement % (L1)': with_l1['Impovement %'].values[0] if not with_l1.empty else None,
            'Best tau (L2)': with_l2['Best Tau'].values[0] if not with_l2.empty else None,
            'Score L2': with_l2['Score'].values[0] if not with_l2.empty else None,
            'Impovement % (L2)': with_l2['Impovement %'].values[0] if not with_l2.empty else None,
            'Instances': base['Instances'].values[0]
        }
        
        results.append(row)

final_df = pd.DataFrame(results)

In [103]:
final_df = final_df.drop(columns=['Score metric'])

In [107]:
latex_table = final_df.to_latex(
    index=False,
    escape=False,
    formatters={
        'Best Sigma': '{:.1f}'.format,
        'Best tau (L1)': '{:.1f}'.format,
        'Best tau (L2)': '{:.1f}'.format,
        'Score Base': '{:.3f}'.format,
        'Score L1': '{:.3f}'.format,
        'Score L2': '{:.3f}'.format,
        'Impovement % (L1)': '{:.2f}'.format,
        'Impovement % (L2)': '{:.2f}'.format
    }
)

print(latex_table)

\begin{tabular}{lrrrrrrrrr}
\toprule
Dataset & Best Sigma & Score Base & Best tau (L1) & Score L1 & Impovement % (L1) & Best tau (L2) & Score L2 & Impovement % (L2) & Instances \\
\midrule
Breast Cancer & 0.8 & 0.956 & 4.9 & 0.960 & 0.37 & 2.2 & 0.960 & 0.37 & 569 \\
Suspicious firms & 0.5 & 0.955 & 1.6 & 0.937 & -1.89 & 3.2 & 0.937 & -1.89 & 775 \\
Diabetes & 1.0 & 3219.348 & 4.0 & 3091.181 & 4.15 & 3.0 & 3101.525 & 3.80 & 442 \\
Concrete Compressive Strength & 0.6 & 142.183 & 1.3 & 135.971 & 4.57 & 3.1 & 136.932 & 3.83 & 1030 \\
Stock portfolio performance & 5.0 & 6.778 & 4.8 & 6.232 & 8.75 & 3.2 & 6.243 & 8.56 & 315 \\
Parkinsons & 0.6 & 0.954 & 1.5 & 0.949 & -0.54 & 1.9 & 0.959 & 0.54 & 195 \\
\bottomrule
\end{tabular}

