### Setup

In [1]:
import pickle
from pathlib import Path

import numpy as np
import matplotlib as plt
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import LeavePGroupsOut
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from IPython.display import clear_output

In [2]:
%cd '/content/drive/MyDrive/texture/data'

/content/drive/MyDrive/texture/data


### Utils

In [3]:
def root(name):
    return str(name).split("+")[0]

### Selection

In [4]:
def search_lda(root, x, y, g=None, cv=None):
    opt = GridSearchCV(
        estimator=LinearDiscriminantAnalysis(solver='lsqr'),
        param_grid={'shrinkage': np.arange(0, 1.2, .2)},
        n_jobs=-1,
        cv=cv
    ).fit(x, y, groups=g)

    with open(f"{root}+lda.pickle", "wb") as file:
        pickle.dump(opt, file)

def search_svc(root, x, y, g=None, cv=None):
    auto = 1 / x.shape[1]
    scale = 1 / (x.shape[1] * x.var())

    opt = GridSearchCV(
        estimator=SVC(),
        param_grid={
            'C': 10. ** np.arange(-2, 3)
            #'gamma': [1e-5, scale, 1e-3, auto, 1e-1, 1e0],
            #'kernel': ['rbf', 'linear']
        },
        n_jobs=-1,
        cv=cv
    ).fit(x, y, groups=g)

    with open(f"{root}+svc.pickle", "wb") as file:
        pickle.dump(opt, file)

def search(xfile, features=[]):
    # load xfile if exists
    xfile = Path(xfile)
    if xfile.exists():
        xdict = np.load(xfile, allow_pickle=True)
        features = features if features else xdict
        x = np.hstack([xdict[key] for key in features])
    else:
        print(f"{xfile.stem} does not exist")
        clear_output(wait=True)
        return

    # load yfile if exists
    yfile = Path(f"{root(xfile)}+target.npz")
    if yfile.exists():
        y = np.load(yfile)['y']
    else:
        print(f"{yfile.stem} does not exist")
        clear_output(wait=True)
        return

    # load gfile if exists
    gfile = Path(f"{root(xfile)}+group.npz")
    if gfile.exists():
        g = np.load(gfile)['y']
        cv = LeavePGroupsOut(n_groups=3)
    else:
        g = None
        cv = None

    # execute hyperparameter search
    search_lda(xfile.stem, x, y, g, cv)
    search_svc(xfile.stem, x, y, g, cv)

    # print loading
    clear_output()
    print(f'{xfile.stem} trained')

In [5]:
def get_split(lst, n):
    size = round(len(filepaths) / 4)
    return filepaths[n * size: (n + 1) * size]

In [None]:
filepaths = set(Path(".").glob("*.npz"))
filepaths -= set(Path(".").glob("*target.npz"))
filepaths -= set(Path(".").glob("*group.npz"))
filepaths = sorted(filepaths)

for filepath in get_split(filepaths, 1):
    search(filepath)

kthtips2b+vgg19 trained
