In [18]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import time

In [19]:
#use temporarily until tf.train bug is fixed 
#violet's mini batch code
class BatchSampler(object):
    '''
    A (very) simple wrapper to randomly sample batches without replacement.
    '''
    
    def __init__(self, data, targets, batch_size):
        self.num_points = data.shape[0]
        self.features = data.shape[1]
        self.data = data
        self.targets = targets
        self.batch_size = batch_size
        self.indices = np.arange(self.num_points)

    def random_batch_indices(self, m=None):
        if m is None:
            indices = np.random.choice(self.indices, self.batch_size, replace=False)
        else:
            indices = np.random.choice(self.indices, m, replace=False)
        return indices 

    def get_batch(self, m=None):
        '''
        Get a random batch without replacement from the dataset.
        If m is given the batch will be of size m. 
        Otherwise will default to the class initialized value.
        '''
        indices = self.random_batch_indices(m)
        X_batch = np.take(self.data, indices, 0)
        y_batch = self.targets[indices]
        return X_batch, y_batch

In [20]:
#loading data
with np.load("notMNIST.npz") as data :
    Data, Target = data ["images"], data["labels"]
    posClass = 2
    negClass = 9
    dataIndx = (Target==posClass) + (Target==negClass)
    Data = Data[dataIndx]/255.
    Target = Target[dataIndx].reshape(-1, 1)
    Target[Target==posClass] = 1
    Target[Target==negClass] = 0
    np.random.seed(521)
    randIndx = np.arange(len(Data))
    np.random.shuffle(randIndx)
    Data, Target = Data[randIndx], Target[randIndx]
    trainData, trainTarget = Data[:3500], Target[:3500]
    validData, validTarget = Data[3500:3600], Target[3500:3600]
    testData, testTarget = Data[3600:], Target[3600:]

In [21]:
trainData = np.reshape(trainData, [trainData.shape[0], 28*28])
validData = np.reshape(validData, [validData.shape[0], 28*28])
testData = np.reshape(testData, [testData.shape[0], 28*28])

In [22]:
#mse loss function
def mse_loss(yhat, target):
    loss = tf.reduce_mean(tf.pow(tf.subtract(yhat, target), 2))
    loss = tf.div(loss, 2)
    return loss

# ques 1

In [23]:
#hyperparameters

learning_rates = [0.005, 0.001, 0.0001]
# learning_rates = [0.001]
mini_batch = 500
reg_coeff = 0.0
train_iter = 20000
train_loss = []

for learning_rate in learning_rates:
    
    tf.reset_default_graph()
    train_loss_curr = []
    
    x = tf.placeholder(tf.float32, shape = [None, 784])
    y = tf.placeholder(tf.float32, shape = [None, 1])

    
    w = tf.Variable(tf.random_normal([784, 1], mean = 0, stddev = 0.1))
    
    b = tf.cast(tf.Variable(0), dtype = tf.float32)
    yhat = tf.add(tf.matmul(x, w), b)

    mse = mse_loss(yhat, y)
    regul = tf.multiply(reg_coeff, tf.reduce_mean(tf.square(w)))
    cost = tf.add(mse, regul)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    
    for epoch in range(train_iter):
#         x_batch, y_batch = tf.train.batch([trainData, trainTarget], mini_batch)
        
        trainBatchSampler = BatchSampler(trainData, trainTarget, mini_batch)
        x_batch, y_batch = trainBatchSampler.get_batch()
        
        sess.run(optimizer, feed_dict = {x: x_batch, y:y_batch})
        w_curr, b_curr, c = sess.run([w, b, cost], feed_dict = {x: x_batch, y:y_batch})
        train_loss_curr.append(c)
        
        if epoch%3500 is 0:
            print("epoch:", epoch/3500, " -> curr loss:", c)
            
    train_loss.append(train_loss_curr)
    
print("done")

epoch: 0.0  -> curr loss: 1.57022
epoch: 1.0  -> curr loss: 0.0335307
epoch: 2.0  -> curr loss: 0.0256465
epoch: 3.0  -> curr loss: 0.0215749
epoch: 4.0  -> curr loss: 0.0222005
epoch: 5.0  -> curr loss: 0.0195554
epoch: 0.0  -> curr loss: 1.01632
epoch: 1.0  -> curr loss: 0.0628631
epoch: 2.0  -> curr loss: 0.0505306
epoch: 3.0  -> curr loss: 0.033255
epoch: 4.0  -> curr loss: 0.02985
epoch: 5.0  -> curr loss: 0.0318562
epoch: 0.0  -> curr loss: 0.345789
epoch: 1.0  -> curr loss: 0.174148
epoch: 2.0  -> curr loss: 0.130619
epoch: 3.0  -> curr loss: 0.129413
epoch: 4.0  -> curr loss: 0.117767
epoch: 5.0  -> curr loss: 0.0991093
done


In [24]:
plotx = np.linspace(0, train_iter, train_iter)
ploty = np.array(train_loss)

plt.figure(1)
for i in range(ploty.shape[0]):
    plt.plot(plotx, ploty[i])

plt.savefig("linear_ques1.png")
plt.close()

# ques 2

In [21]:
#hyperparameters

#best learning rate from previous part
learning_rate = 0.005
# mini_batch = 500
mini_batches = [500, 1500, 3500]
reg_coeff = 0.0
train_iter = 20000
train_loss = []

for mini_batch in mini_batches:
    
    tf.reset_default_graph()
    start = time.time()
    train_loss_curr = []
    
    x = tf.placeholder(tf.float32, shape = [None, 784])
    y = tf.placeholder(tf.float32, shape = [None, 1])

    w = tf.Variable(tf.random_normal([784, 1], mean = 0, stddev = 0.1))
        
    b = tf.cast(tf.Variable(0), dtype = tf.float32)
    yhat = tf.add(tf.matmul(x, w), b)

    mse = mse_loss(yhat, y)
    regul = tf.multiply(reg_coeff, tf.reduce_mean(tf.square(w)))
    cost = tf.add(mse, regul)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    
    for epoch in range(train_iter):
#         x_batch, y_batch = tf.train.batch([trainData, trainTarget], mini_batch)
        
        trainBatchSampler = BatchSampler(trainData, trainTarget, mini_batch)
        x_batch, y_batch = trainBatchSampler.get_batch()
        
        sess.run(optimizer, feed_dict = {x: x_batch, y:y_batch})
        w_curr, b_curr, c = sess.run([w, b, cost], feed_dict = {x: x_batch, y: y_batch})
        
        
        if epoch%3500 is 0:
            print("epoch:", epoch/3500, " -> curr loss:", c)
            
        train_loss_curr.append(c)
        
    end = time.time()    
    train_loss.append(train_loss_curr)
    diff = end - start
    print(diff, " -> ", mini_batch)
    
print("done")

epoch: 0.0  -> curr loss: 0.629209
epoch: 1.0  -> curr loss: 0.189504
epoch: 2.0  -> curr loss: 0.21652
epoch: 3.0  -> curr loss: 0.20764
epoch: 4.0  -> curr loss: 0.204171
epoch: 5.0  -> curr loss: 0.204679
596.16224193573  ->  500
epoch: 0.0  -> curr loss: 0.653711
epoch: 1.0  -> curr loss: 0.196609
epoch: 2.0  -> curr loss: 0.200953
epoch: 3.0  -> curr loss: 0.206736
epoch: 4.0  -> curr loss: 0.200785
epoch: 5.0  -> curr loss: 0.201507
1793.1913492679596  ->  1500
epoch: 0.0  -> curr loss: 0.204779
epoch: 1.0  -> curr loss: 0.201141
epoch: 2.0  -> curr loss: 0.201209
epoch: 3.0  -> curr loss: 0.201211
epoch: 4.0  -> curr loss: 0.201215


KeyboardInterrupt: 

# ques 3

In [25]:
#hyperparameters

learning_rate = 0.005
mini_batch = 500
# reg_coeff = 0.0
reg_coeffs = [0.0, 0.001, 0.1, 1.0]
train_iter = 20000
valid_loss = []
test_loss = []

for reg_coeff in reg_coeffs:
    
    tf.reset_default_graph()
    valid_loss_curr = []
    test_loss_curr = []
    
    
    x = tf.placeholder(tf.float32, shape = [None, 784])
    y = tf.placeholder(tf.float32, shape = [None, 1])

    w = tf.Variable(tf.random_normal([784, 1], mean = 0, stddev = 0.1))
    
    b = tf.cast(tf.Variable(0), dtype = tf.float32)
    yhat = tf.add(tf.matmul(x, w), b)

    mse = mse_loss(yhat, y)
    regul = tf.multiply(reg_coeff, tf.reduce_mean(tf.square(w)))
    cost = tf.add(mse, regul)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    
    for epoch in range(train_iter):
#         x_batch, y_batch = tf.train.batch([trainData, trainTarget], mini_batch)
        
        trainBatchSampler = BatchSampler(trainData, trainTarget, mini_batch)
        x_batch, y_batch = trainBatchSampler.get_batch()
        
        sess.run(optimizer, feed_dict = {x: x_batch, y:y_batch})
        w_curr, b_curr, c = sess.run([w, b, cost], feed_dict = {x: x_batch, y:y_batch})
        
        if epoch%3500 is 0:
            print("epoch:", epoch/3500, " -> curr loss:", c)
            
        valid_error = sess.run(cost, feed_dict = {x: validData, y: validTarget})
        valid_loss_curr.append(valid_error)
        test_error = sess.run(cost, feed_dict = {x: testData, y: testTarget})
        test_loss_curr.append(test_error)
            
    valid_loss.append(valid_loss_curr)
    test_loss.append(test_loss_curr)
    
print(valid_loss)
print(test_loss)
print("done")

epoch: 0.0  -> curr loss: 0.885442
epoch: 1.0  -> curr loss: 0.0350061


KeyboardInterrupt: 