In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from sklearn.manifold import TSNE
from sklearn import datasets, metrics
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split

### Load and prepare data

In [None]:
x, y = datasets.fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)
y = y.astype(np.int)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=10000)

In [None]:
scaler = MinMaxScaler().fit(x_train)

In [None]:
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

### Embed data

In [None]:
USE_MLP = False
USE_TSNE_OR_PCA = 'PCA'

In [None]:
if USE_MLP:
    mlp = MLPClassifier(hidden_layer_sizes=(200,), random_state=1, max_iter=300).fit(x_train, y_train)
    print('MLP train accuracy {0:3.1f} %'.format(mlp.score(x_train, y_train)*100))
    print('MLP test accuracy {0:3.1f} %'.format(mlp.score(x_test, y_test)*100))
    def sklearn_mlpclassifier_tranform(self, xs):
        return np.dot(xs, self.coefs_[0])
    setattr(MLPClassifier, 'transform', sklearn_mlpclassifier_tranform)
    h_train = mlp.transform(x_train)
    h_test = mlp.transform(x_test)

In [None]:
if USE_TSNE_OR_PCA == 'TSNE':
    if USE_MLP:
        tsne_train = h_train
        tsne_test = h_test
    else:
        tsne_train = x_train
        tsne_test = x_test
    tsne = TSNE(n_components=2, n_iter=300, verbose=3).fit(tsne_train)
    z_train = tsne.transform(tsne_train)
    z_test = tsne.transform(tsne_test)
else:
    if USE_MLP:
        pca_train = h_train
        pca_test = h_test
    else:
        pca_train = x_train
        pca_test = x_test
    pca = PCA(n_components=2).fit(pca_train)
    z_train = pca.transform(pca_train)
    z_test = pca.transform(pca_test)

In [None]:
def plot_mnist(zs, ys):
    for i in range(10):
        idxs = np.argwhere(ys == i)
        plt.scatter(zs[idxs, 0], zs[idxs, 1], label=i)
    plt.legend()
    plt.show()

In [None]:
plot_mnist(z_test, y_test)

In [None]:
pca = PCA(n_components=100).fit(x_train)
f_train = pca.transform(x_train)
f_test = pca.transform(x_test)

In [None]:
def euclidean_distance(a, b):
    ed = np.sqrt(np.sum(np.square(a[:, np.newaxis, :] - b), axis=2))
    return ed


def np_euclidean_distance(a, b):
    ed = np.linalg.norm(a[:, np.newaxis] - b, axis = 2)
    return ed


#def find_nearest_inter(xs, ys, a_idx):
    #a_y = ys[a_idx]
    #a_x = xs[a_idx].reshape(1, -1)
    #y_idxs = np.argwhere(ys == a_y).reshape(-1)
    #inter_xs = xs[y_idxs]
    #ed = euclidean_distance(xs, a_x)
    #ed[a_idx] = np.inf
    #print(xs.shape, ys.shape, a_y, a_x.shape, ed.shape, inter_xs.shape, y_idxs)
    #np.argmin(ed)

In [None]:
def batch_each_n(xs, ys, n):
    batch_x = []
    batch_y = []
    for i in range(10):
        all_y_idxs = np.argwhere(ys == i).reshape(-1)
        y_idxs = np.random.choice(all_y_idxs, n, replace=False)
        batch_x.append(xs[y_idxs])
        batch_y.append(ys[y_idxs])
    return np.concatenate(batch_x, 0), np.concatenate(batch_y)

In [None]:
b = f_test[:100]

In [None]:
%timeit euclidean_distance(f_test, b)

In [None]:
%timeit np_euclidean_distance(f_test, b)

In [None]:
def batch_each_n(xs, ys, p, k):
    batch_x = []
    batch_y = []
    for i in range(p):
        all_y_idxs = np.argwhere(ys == i).reshape(-1)
        y_idxs = np.random.choice(all_y_idxs, k, replace=False)
        batch_x.append(xs[y_idxs])
        batch_y.append(ys[y_idxs])
    return np.concatenate(batch_x, 0), np.concatenate(batch_y)

In [None]:
batch_x, batch_y = batch_each_n(f_test, y_test, 10, 5)

In [None]:
ed = np_euclidean_distance(batch_x, batch_x)

In [None]:
plt.imshow(ed)

In [None]:
ed4 = ed.reshape((10, 5, 10, 5))

In [None]:
ed4.shape

In [None]:
P, K = 3, 3