In [None]:
# Imports
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import joblib

print('Imports ok')

Imports ok


In [None]:
def train_and_evaluate(X, y, label_names=None, prefix='nca'):
    # Split, scale, train simple SVMs (linear, rbf, poly) and report metrics
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_test_s = scaler.transform(X_test)

    kernels=['linear','rbf','poly']
    results = {}
    for k in kernels:
        clf = SVC(kernel=k, random_state=42, gamma='scale')
        clf.fit(X_train_s, y_train)
        y_pred = clf.predict(X_test_s)
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
        rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
        f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
        results[k] = {'model': clf, 'accuracy': acc, 'precision': prec, 'recall': rec, 'f1': f1, 'y_pred': y_pred}
        print(f'Kernel={k:6s}  Accuracy={acc:.4f}  Precision={prec:.4f}  Recall={rec:.4f}  F1={f1:.4f}')

    # pick best by accuracy
    best_k = max(results.keys(), key=lambda kk: results[kk]['accuracy'])
    best_model = results[best_k]['model']
    # save scaler + model
    model_file = f'{prefix}_svm_best.joblib'
    scaler_file = f'{prefix}_scaler.joblib'
    joblib.dump(best_model, model_file)
    joblib.dump(scaler, scaler_file)
    print(f'Saved best model ({best_k}) -> {model_file} and scaler -> {scaler_file}')

    if label_names is not None:
        print('Classification report (best):')
        print(classification_report(y_test, results[best_k]['y_pred'], target_names=label_names, zero_division=0))
    return results, best_model, scaler

In [None]:
# Try loading 2-D NCA features
files_checked = []
if os.path.exists('features_nca_2.csv'):
    df2 = pd.read_csv('features_nca_2.csv')
    print('Loaded features_nca_2.csv ->', df2.shape)
    files_checked.append('features_nca_2.csv')
    # build X,y from columns that start with 'nca'
    nca_cols = [c for c in df2.columns if c.startswith('nca')]
    X2 = df2[nca_cols].values
    if 'label' in df2.columns:
        y2 = df2['label'].values
    elif 'label_code' in df2.columns:
        y2 = df2['label_code'].values
    else:
        # fallback: extract from filename (case-insensitive)
        y2 = df2['filename'].astype(str).str.extract(r'(?i)(h\d+)')[0].str.upper().fillna('UNKNOWN').values
    le2 = LabelEncoder()
    y2_enc = le2.fit_transform(y2)
    print('Classes (2D):', list(le2.classes_))
    res2, best2, scaler2 = train_and_evaluate(X2, y2_enc, label_names=le2.classes_, prefix='nca2')

# Try loading 10-D NCA features (if present)
if os.path.exists('features_nca_10.csv'):
    df10 = pd.read_csv('features_nca_10.csv')
    print('Loaded features_nca_10.csv ->', df10.shape)
    files_checked.append('features_nca_10.csv')
    feat_cols = [c for c in df10.columns if c.startswith('nca10_') or c.startswith('nca')]
    X10 = df10[feat_cols].values
    if 'label' in df10.columns:
        y10 = df10['label'].values
    elif 'label_code' in df10.columns:
        y10 = df10['label_code'].values
    else:
        y10 = df10['filename'].astype(str).str.extract(r'(?i)(h\d+)')[0].str.upper().fillna('UNKNOWN').values
    le10 = LabelEncoder()
    y10_enc = le10.fit_transform(y10)
    print('Classes (10D):', list(le10.classes_))
    res10, best10, scaler10 = train_and_evaluate(X10, y10_enc, label_names=le10.classes_, prefix='nca10')

if not files_checked:
    print('No NCA feature files found. Run `nca_analysis.ipynb` to generate features_nca_2.csv / features_nca_10.csv')

print('Files checked:', files_checked)
,


Loaded features_nca_2.csv -> (3277, 4)


''