In [1]:
from __future__ import division, print_function
import os, sys, json
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
from scipy.spatial import distance as dist
from scipy import stats
from sklearn import preprocessing, manifold, decomposition, random_projection, neighbors, metrics, linear_model
from sklearn.model_selection import cross_val_score

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

import matplotlib.pyplot as plt

%matplotlib inline
import seaborn as sns
sns.set_style('whitegrid')
sns.set_context('talk', font_scale=1.2)
from IPython import display
np.random.seed(2018)
tf.set_random_seed(2018)

In [2]:
from utils import *

In [3]:
mnist = input_data.read_data_sets('MNIST_data', one_hot=False)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [4]:
print (mnist.train.num_examples)
print (mnist.test.num_examples)

55000
10000


In [5]:
X_train, X_test = mnist.train.images, mnist.test.images
print (X_train.shape, X_test.shape)
labels_train = mnist.train.labels
n_samples = int(mnist.train.num_examples)
print (labels_train.shape)

(55000, 784) (10000, 784)
(55000,)


In [6]:
# A simple Logistic Regression model to classify the digits
logit = linear_model.LogisticRegression()
logit.fit(X_train, labels_train)
print(metrics.accuracy_score(mnist.test.labels, logit.predict(X_test)))

0.9198


In [8]:
from autoencoder_models.VariationalAutoencoder import VariationalAutoencoder
training_epochs = 20
batch_size = 128
display_step = 1
learning_rate=0.001

# VAE architecture: 784 -> 500 -> 500 -> 2 -> 500 -> 500 -> 784
vae2d = VariationalAutoencoder([784, 500, 500, 2], 
                               learning_rate=learning_rate
                              )


In [9]:
# Training loop
for epoch in range(training_epochs):
    avg_loss = 0.
    total_batch = int(n_samples / batch_size)
    # Loop over all batches
    for i in range(total_batch):
        batch_xs, _ = mnist.train.next_batch(batch_size)
        # Fit training using batch data
        loss = vae2d.partial_fit(batch_xs)
        # Compute average loss
        avg_loss += loss / n_samples * batch_size
    # Display logs per epoch step
    if epoch % display_step == 0:
        print ("Epoch %d: loss = %.4f "% (epoch+1, avg_loss))

Epoch 1: loss = 3047.4703 
Epoch 2: loss = 2485.6395 
Epoch 3: loss = 2354.7739 
Epoch 4: loss = 2268.5700 
Epoch 5: loss = 2197.3673 
Epoch 6: loss = 2144.5467 
Epoch 7: loss = 2096.2754 
Epoch 8: loss = 2055.8172 
Epoch 9: loss = 2026.1778 
Epoch 10: loss = 2000.8083 
Epoch 11: loss = 1978.6501 
Epoch 12: loss = 1959.7616 
Epoch 13: loss = 1942.7579 
Epoch 14: loss = 1929.0888 
Epoch 15: loss = 1913.6448 
Epoch 16: loss = 1904.3606 
Epoch 17: loss = 1892.9023 
Epoch 18: loss = 1882.1108 
Epoch 19: loss = 1870.5142 
Epoch 20: loss = 1865.2764 


In [10]:
# VAE architecture: 784 -> 500 -> 500 -> 20 -> 500 -> 500 -> 784
vae20d = VariationalAutoencoder([784, 500, 500, 20], 
                               learning_rate=learning_rate
                              )


In [11]:
# Training loop
for epoch in range(training_epochs):
    avg_loss = 0.
    total_batch = int(n_samples / batch_size)
    # Loop over all batches
    for i in range(total_batch):
        batch_xs, _ = mnist.train.next_batch(batch_size)
        # Fit training using batch data
        loss = vae20d.partial_fit(batch_xs)
        # Compute average loss
        avg_loss += loss / n_samples * batch_size
    # Display logs per epoch step
    if epoch % display_step == 0:
        print ("Epoch %d: loss = %.4f "% (epoch+1, avg_loss))

Epoch 1: loss = 2449.0908 
Epoch 2: loss = 1146.7827 
Epoch 3: loss = 898.0939 
Epoch 4: loss = 776.1940 
Epoch 5: loss = 708.2477 
Epoch 6: loss = 660.1153 
Epoch 7: loss = 625.4214 
Epoch 8: loss = 596.4360 
Epoch 9: loss = 572.2996 
Epoch 10: loss = 553.3875 
Epoch 11: loss = 536.0296 
Epoch 12: loss = 520.6217 
Epoch 13: loss = 507.4268 
Epoch 14: loss = 493.1958 
Epoch 15: loss = 483.4196 
Epoch 16: loss = 473.3497 
Epoch 17: loss = 462.0246 
Epoch 18: loss = 454.7900 
Epoch 19: loss = 446.3437 
Epoch 20: loss = 438.4803 


In [12]:
vae2d.save('trained_models/VAE_2d')
vae20d.save('trained_models/VAE_20d')

'trained_models/VAE_20d/model.ckpt'