In [1]:
import time
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from matplotlib import cm
from scipy.linalg import svd
from sklearn.neighbors import BallTree
from scipy.stats import pearsonr as rcoef
from scipy import fftpack as fft
import torch
from torchvision import transforms
import torchvision.datasets as datasets
from sklearn.datasets import fetch_lfw_people

from hades import misc
from hades import Hades, hp_grid
from misc import plot_filt, plot

  import pandas.util.testing as tm


In [2]:
def import_save_mnist(data_dir, X_name, z_name, download=False):
    # Import data
    dataset = datasets.MNIST(root=data_dir, download=download, train=True, transform=transforms.ToTensor())
    loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=32, shuffle=False)
    X = [] # Image data
    z = [] # Label
    for item in dataset:
        X.append(np.array(item[0]))
        z.append(item[1])
    X = np.concatenate(X, axis = 0)
    z = np.array(z)
    np.save(data_dir+'/'+X_name, X)
    np.save(data_dir+'/'+z_name, z)
    

def import_save_fmnist(data_dir, X_name, z_name, download=False):
    # Import data
    dataset = datasets.FashionMNIST(root=data_dir, download=download, train=True, transform=transforms.ToTensor())
    loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=32, shuffle=False)
    X = [] # Image data
    z = [] # Label
    for item in dataset:
        X.append(np.array(item[0]))
        z.append(item[1])
    X = np.concatenate(X, axis = 0)
    z = np.array(z)
    np.save(data_dir+'/'+X_name, X)
    np.save(data_dir+'/'+z_name, z)
    

def import_save_face(data_dir, X_name, z_name, download=False):
    lfw_people = fetch_lfw_people(min_faces_per_person=50, resize=0.5)
    X = lfw_people.images
    z = lfw_people['target']
    np.save(data_dir+'/'+X_name, X)
    np.save(data_dir+'/'+z_name, z)
    
    
def split_by_class(X, z):
    classes = np.sort(list(set(z)))
    inds_cls = [np.nonzero(z==i)[0] for i in classes]
    X_split = [X[item] for item in inds_cls]
    return (X_split, classes)


def dct(X, dct_thr):
    X_dct = fft.dctn(X, axes = (1, 2)) # Discrete cosine transform
    X_dct = X_dct[:, :dct_thr, :dct_thr]
    X_flat = X_dct.reshape(X_dct.shape[0], dct_thr * dct_thr)
    return X_flat


def process_input(X, z, dct_thr):
    X_d = dct(X, dct_thr)
    X_sp, _ = split_by_class(X, z)
    X_dsp, _ = split_by_class(X_d, z)
    return {'X_d': X_d, 'X_sp': X_sp, 'X_dsp': X_dsp}


def gallery_extreme(X_img, X_dsp, scores, classes, n_ext, 
                    fig_scale=1.5, save_fig=True,
                    wspace=0.03, hspace=0.03, fig_name='untitled'):
    n_classes = classes.size
    nrows = n_classes
    ncols = 2 * n_ext
    
    fig, ax = plt.subplots(nrows=nrows, ncols=ncols)
    fig.set_size_inches(fig_scale * ncols, fig_scale * nrows)
    plt.subplots_adjust(wspace=wspace, hspace=hspace)
    
    for i in range(nrows):
        for j in range(ncols):
            ax[i, j].set_xticks([])
            ax[i, j].set_yticks([])

    for i in range(nrows):
        X = X_dsp[i]
        sco = scores[i]
        inds_sort = np.argsort(sco).astype(int)
        inds_high = inds_sort[-n_ext:]
        inds_low = inds_sort[:n_ext]
        
        imgs_high = X_img[i][inds_high]
        imgs_low = X_img[i][inds_low]
        for j in range(n_ext):
            ax[i, j].imshow(imgs_low[j], cmap='gray')
            ax[i, j+n_ext].imshow(imgs_high[j], cmap='gray')
    
    plt.savefig('output/' + fig_name + '.pdf')
    plt.show()
    plt.clf()

In [8]:
def runthrough(X_img, z, dct_thr, cls, hp=None, train_prop=1.0):
    # Process X_d, X_sp, X_dsp
    print('Processing input.')
    processed_input = process_input(X_img, z, dct_thr)
    X_d = processed_input['X_d']
    X_sp = processed_input['X_sp']
    X_dsp = processed_input['X_dsp']
    
    # Train classifier
    print(f'Training classifiers.')
    clfs = [Hades() for i in cls]
    # hp = hp_grid(k=100)
    for i in cls:
        print(f'Doing {i}...')
        clfs[i].fit(X_dsp[i], hp=hp, probe_prop=train_prop)

    # Evaluate singularity score
    print(f'Scoring...')
    scores = [clfs[i].score_samples(X_dsp[i]) for i in cls]
    
    return {'X_img': X_img,
            'z': z,
            'processed_input': processed_input,
            'clfs': clfs,
            'scores': scores}

In [4]:
download_data = False
convert_into_numpy = False
if convert_into_numpy:
    import_save_mnist(data_dir, 'mnist_X.npy', 'mnist_z.npy', download=download_data)
    import_save_fmnist(data_dir, 'fmnist_X.npy', 'fmnist_z.npy', download=download_data)
    import_save_face(data_dir, 'face_X.npy', 'face_z.npy', download=download_data)
    
timestamp_now = misc.timestamp()

# MNIST

In [None]:
# Setting
data_dir = 'data'
X_name = 'mnist_X.npy'
z_name = 'mnist_z.npy'
cls = np.arange(10)
dct_thr = 10
hp = hp_grid(k=100)
train_prop = 1.0
# Display setting
fig_n_ext = 5
fig_scale = 1.5

# Runthrough
X_img = np.load(data_dir+'/'+X_name)
z = np.load(data_dir+'/'+z_name)
output = runthrough(X_img, z, dct_thr, cls, hp=hp, train_prop=train_prop)

# Display output
X_sp=output['processed_input']['X_sp']
X_dsp=output['processed_input']['X_dsp']
scores=output['scores']
fig_name = f'MNIST {timestamp_now}'
gallery_extreme(X_sp, X_dsp, scores, classes=cls, n_ext=fig_n_ext, fig_scale=fig_scale, fig_name=fig_name)

In [None]:
fit_times = [item.fit_time for item in output['clfs']]
(y_min, y_max) = (0, 2 * np.max(fit_times))
plt.ylim([y_min, y_max])
plt.suptitle(f'Fit time for MNIST, per class')
plt.plot(fit_times, marker = 'o')

# Fashion MNIST

In [None]:
# Setting
data_dir = 'data'
X_name = 'fmnist_X.npy'
z_name = 'fmnist_z.npy'
cls = np.arange(10)
dct_thr = 10
hp = hp_grid(k=100)
train_prop = 1.0
# Display setting
fig_n_ext = 5
fig_scale = 1.5

# Runthrough
X_img = np.load(data_dir+'/'+X_name)
z = np.load(data_dir+'/'+z_name)
output = runthrough(X_img, z, dct_thr, cls, hp=hp, train_prop=train_prop)

# Display output
X_sp=output['processed_input']['X_sp']
X_dsp=output['processed_input']['X_dsp']
scores=output['scores']
fig_name = f'FMNIST {timestamp_now}'
gallery_extreme(X_sp, X_dsp, scores, classes=cls, n_ext=fig_n_ext, fig_scale=fig_scale, fig_name=fig_name)

In [None]:
fit_times = [item.fit_time for item in output['clfs']]
(y_min, y_max) = (0, 2 * np.max(fit_times))
plt.ylim([y_min, y_max])
plt.suptitle(f'Fit time for FashionMNIST, per class')
plt.plot(fit_times, marker = 'o')