In [2]:
from classifier import classifier_no_patch
%reload_ext autoreload
%autoreload 2
import numpy
import os
import pathlib
import sklearn.ensemble
import sklearn.neighbors
import sklearn.neural_network
import sklearn.preprocessing
import sklearn.svm
import sklearn.tree
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from output import save_mean_std

In [3]:
cfg = {
    "fold": 5,
    "n_labels": 5,
    "path_base": "../dataset",
    "path_out": "out",
    "test_size": 0.2,
    "train_size": 0.8,
}

In [4]:
hyperparams = {
    "DecisionTreeClassifier": {
        "criterion": ["gini", "entropy"],
        "splitter": ["best", "random"],
        "max_depth": [10, 100, 1000]
    },
    "KNeighborsClassifier": {
        "n_neighbors": [2, 4, 6, 8, 10],
        "weights": ["uniform", "distance"],
        "metric": ["euclidean", "manhattan"]
    },
    "MLPClassifier": {
        "activation": ["identity", "logistic", "tanh", "relu"],
        "solver": ["adam", "sgd"],
        "learning_rate_init": [0.01, 0.001, 0.0001],
        "momentum": [0.9, 0.4, 0.1]
    },
    "RandomForestClassifier": {
        "n_estimators": [200, 400, 600, 800, 1000],
        "max_features": ["sqrt", "log2"],
        "criterion": ["gini", "entropy"],
        "max_depth": [10, 100, 1000]
    },
    "SVC": {
        'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    }
}
index = sklearn.model_selection.StratifiedShuffleSplit(n_splits=cfg["fold"], train_size=cfg["train_size"], test_size=cfg["test_size"], random_state=1)

In [5]:
surf = numpy.loadtxt(os.path.join(cfg["path_base"], "surf64.txt"))
samples, n_features = surf.shape
x, y = surf[0:, 0:n_features - 1], surf[:, n_features - 1]
x_normalized = sklearn.preprocessing.StandardScaler().fit_transform(x)

print(x.shape, y.shape)

(375, 257) (375,)


In [6]:
for pca in (128, 256, -1):
    x = x_normalized
    if pca > 0:
        x = sklearn.decomposition.PCA(n_components=pca).fit_transform(x)

    list_result_classifier = list()
    for classifier in (sklearn.tree.DecisionTreeClassifier(random_state=1), sklearn.neighbors.KNeighborsClassifier(n_jobs=-1), sklearn.neural_network.MLPClassifier(random_state=1), sklearn.ensemble.RandomForestClassifier(random_state=1), sklearn.svm.SVC(random_state=1, probability=True)):
        classifier_name = classifier.__class__.__name__

        model = sklearn.model_selection.GridSearchCV(classifier, hyperparams[classifier_name], scoring="accuracy", cv=cfg["fold"])
        model.fit(x, y)

        best_classifier = model.best_estimator_
        best_params = model.best_params_

        list_result_fold = list()
        list_time = list()

        path_classifier = os.path.join(cfg["path_out"], "surf", classifier_name)
        pathlib.Path(path_classifier).mkdir(parents=True, exist_ok=True)
        for fold, (index_train, index_test) in enumerate(index.split(x, y)):
            classifier_no_patch(cfg, best_classifier, classifier_name, "surf", fold, index_test, index_train, list_result_fold, list_time, path_classifier, pca, x, y)
        save_mean_std(best_params, cfg, list_result_fold, list_time, path_classifier)
        list_result_classifier = list_result_classifier + list_result_fold
        break
    break

0 DecisionTreeClassifier (300, 128) (75, 128)
1 DecisionTreeClassifier (300, 128) (75, 128)
2 DecisionTreeClassifier (300, 128) (75, 128)
3 DecisionTreeClassifier (300, 128) (75, 128)
4 DecisionTreeClassifier (300, 128) (75, 128)
best_accuracy: 42.9333, best_rule: max

