In [13]:
import numpy
import os
import pathlib
import sklearn.ensemble
import sklearn.neighbors
import sklearn.preprocessing
import sklearn.svm
import sklearn.tree
import time

from old.output import save_fold, save_mean_std
from result import calculate_test
from samples import get_samples_with_patch

In [14]:
cfg = {
    "fold": 5,
    "n_labels": 5,
    "path_base": "dataset",
    "path_out": "out",
    "test_size": 0.2,
    "train_size": 0.8,
}

In [15]:
hyperparams = {
    "DecisionTreeClassifier": {
        "criterion": ["gini", "entropy"],
        "splitter": ["best", "random"],
        "max_depth": [10, 100, 1000]
    },
    "KNeighborsClassifier": {
        "n_neighbors": [2, 4, 6, 8, 10],
        "weights": ["uniform", "distance"],
        "metric": ["euclidean", "manhattan"]
    },
    "MLPClassifier": {
        "activation": ["identity", "logistic", "tanh", "relu"],
        "solver": ["adam", "sgd"],
        "learning_rate_init": [0.01, 0.001, 0.0001],
        "momentum": [0.9, 0.4, 0.1]
    },
    "RandomForestClassifier": {
        "n_estimators": [200, 400, 600, 800, 1000],
        "max_features": ["sqrt", "log2"],
        "criterion": ["gini", "entropy"],
        "max_depth": [10, 100, 1000]
    },
    "SVC": {
        'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    }
}
index = sklearn.model_selection.StratifiedShuffleSplit(n_splits=cfg["fold"], train_size=cfg["train_size"], test_size=cfg["test_size"], random_state=1)

In [16]:
surf = numpy.loadtxt(os.path.join("../", cfg["path_base"], "surf64.txt"))
samples, n_features = surf.shape
x_surf, y_surf = surf[0:, 0:n_features - 1], surf[:, n_features - 1]

In [17]:
mobilenet = numpy.load(os.path.join("../", cfg["path_base"], "horizontal+patch=3+mobilenetv2.npz"))
n_patch = 3
x = mobilenet["features"]
y = mobilenet["labels"]
x = sklearn.preprocessing.StandardScaler().fit_transform(x)

print(x.shape, y.shape)

(1125, 1280) (1125,)


In [18]:
for pca in (128, 256):
    x = sklearn.decomposition.PCA(n_components=pca).fit_transform(x)

    list_result_classifier = list()
    for classifier in (sklearn.tree.DecisionTreeClassifier(random_state=1), sklearn.neighbors.KNeighborsClassifier(n_jobs=-1), sklearn.neural_network.MLPClassifier(random_state=1), sklearn.ensemble.RandomForestClassifier(random_state=1), sklearn.svm.SVC(random_state=1, probability=True))[4:]:
        classifier_name = classifier.__class__.__name__

        model = sklearn.model_selection.GridSearchCV(classifier, hyperparams[classifier_name], scoring="accuracy", cv=cfg["fold"])
        model.fit(x, y)

        best_classifier = model.best_estimator_
        best_params = model.best_params_

        list_result_fold = list()
        list_time = list()

        path_classifier = os.path.join(cfg["path_out"], "mobilenetv2", classifier_name)
        pathlib.Path(path_classifier).mkdir(parents=True, exist_ok=True)
        for fold, (index_train, index_test) in enumerate(index.split(x_surf, y_surf)):
            x_train, y_train = get_samples_with_patch(x, y, index_train, n_patch)
            x_test, y_test = get_samples_with_patch(x, y, index_test, n_patch)

            print(fold, classifier_name, x_train.shape, x_test.shape)

            start_time = time.time()
            best_classifier.fit(x_train, y_train)
            y_pred = best_classifier.predict_proba(x_test)
            end_time = time.time()

            path_fold = os.path.join(path_classifier, str(n_patch), str(pca), str(fold))
            pathlib.Path(path_fold).mkdir(parents=True, exist_ok=True)

            result_max_rule, result_prod_rule, result_sum_rule = calculate_test(cfg, classifier, fold, y_pred, y_test, n_patch=n_patch)

            final_time = end_time - start_time

            list_result_fold.append(result_max_rule)
            list_result_fold.append(result_prod_rule)
            list_result_fold.append(result_sum_rule)
            list_time.append(final_time)

            save_fold(classifier_name, "mobilenetv2", final_time, (result_max_rule, result_prod_rule, result_sum_rule), path_fold)
        save_mean_std(cfg, list_result_fold, list_time, path_classifier)
        list_result_classifier = list_result_classifier + list_result_fold
    break

0 SVC (900, 128) (225, 128)
1 SVC (900, 128) (225, 128)
2 SVC (900, 128) (225, 128)
3 SVC (900, 128) (225, 128)
4 SVC (900, 128) (225, 128)
best_accuracy: 80.8, best_rule: prod

