In [1]:
from setup_mnist import MNIST

Using TensorFlow backend.


In [2]:
import numpy as np
from numpy import linalg as LA
import matplotlib.pyplot as plt

In [3]:
data =  MNIST()

In [4]:
print("Train size:", data.train_data.shape)
print("Validation size:", data.validation_data.shape)
print("Test size:", data.test_data.shape)

Train size: (55000, 28, 28, 1)
Validation size: (5000, 28, 28, 1)
Test size: (10000, 28, 28, 1)


In [5]:
X_train = data.train_data[:,:,:,0]*2+1
X_test = data.test_data[:,:,:,0]*2+1
Y_train = data.train_labels
Y_test = data.test_labels

In [6]:
def evaluate(X_test,Y_test,theta):
    true = 0
    for sample in range(X_test.shape[0]):
        pred = [np.dot(X_test[sample].flatten(),theta[c].flatten()) for c in range(Y_test.shape[1])]
        if np.argmax(np.asarray(pred)) == np.argmax(Y_test[sample]):
            true = true + 1
    
    accuracy = true/X_test.shape[0]
#     print("Accuracy: ", accuracy)
    return accuracy

In [7]:
def compute_proximal(gamma, lambda_, theta):
    prox = theta
    (c,width,height) = theta.shape
    
    theta_j = np.zeros(c)
    for row in range(width):
        for col in range(height):
            theta_j = prox[:,row,col]
            norm = LA.norm(theta_j)
            if norm > gamma*lambda_:
                coef = max(0, 1 - gamma*lambda_/norm)
                theta_j = coef*theta_j
            else:
                theta_j = np.zeros(c)
            
            prox[:,row,col] = theta_j
            
    return prox

In [8]:
def compute_g(gamma, Xi, Yi, theta):
    yi = np.argmax(Yi)
    g = np.zeros(theta.shape)
    wyi = np.dot(Xi.flatten(),theta[yi].flatten())
    
    loss_i = 0
    ci = yi
    for c in range(Yi.shape[0]):
        if c != yi:
            loss_c = np.dot(Xi.flatten(),theta[c].flatten()) + 1 - wyi
            if loss_c > loss_i:
                loss_i = loss_c
                ci = c
    
    if loss_i > 0:
        g[ci] = Xi
    
    return g

In [9]:
iterations = 1000000
lambda_ = 0.01
n = X_train.shape[0]
theta = np.ones((Y_train.shape[1],X_train.shape[1],X_train.shape[2]))

In [None]:
results = []
for t in range(iterations):
    i = np.random.randint(n)
    Xi = X_train[i]
    Yi = Y_train[i]
    gamma = 1/(t+1)
    g = compute_g(gamma, Xi, Yi, theta)
    theta = compute_proximal(gamma, lambda_, theta - gamma*g)
    
    if t%1000 == 0:
        accuracy = evaluate(X_test,Y_test,theta)
        results.append(accuracy)

In [None]:
plt.imshow(np.sum(np.absolute(theta),axis=0),vmin = 0, vmax = 10)

In [None]:
plt.imsave('l001.png', 10-np.sum(np.absolute(theta),axis=0),vmin = 0, vmax = 10, cmap='gray')

In [None]:
x_axis = 1000*np.asarray(range(len(results)))
plt.plot(x_axis,results)

In [None]:
np.savetxt('lamda001', results)