In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [2]:
#use temporarily until tf.train bug is fixed 
#violet's mini batch code
class BatchSampler(object):
    '''
    A (very) simple wrapper to randomly sample batches without replacement.
    '''
    
    def __init__(self, data, targets, batch_size):
        self.num_points = data.shape[0]
        self.features = data.shape[1]
        self.data = data
        self.targets = targets
        self.batch_size = batch_size
        self.indices = np.arange(self.num_points)

    def random_batch_indices(self, m=None):
        if m is None:
            indices = np.random.choice(self.indices, self.batch_size, replace=False)
        else:
            indices = np.random.choice(self.indices, m, replace=False)
        return indices 

    def get_batch(self, m=None):
        '''
        Get a random batch without replacement from the dataset.
        If m is given the batch will be of size m. 
        Otherwise will default to the class initialized value.
        '''
        indices = self.random_batch_indices(m)
        X_batch = np.take(self.data, indices, 0)
        y_batch = self.targets[indices]
        return X_batch, y_batch

In [3]:
#loading data
with np.load("notMNIST.npz") as data :
    Data, Target = data ["images"], data["labels"]
    posClass = 2
    negClass = 9
    dataIndx = (Target==posClass) + (Target==negClass)
    Data = Data[dataIndx]/255.
    Target = Target[dataIndx].reshape(-1, 1)
    Target[Target==posClass] = 1
    Target[Target==negClass] = 0
    np.random.seed(521)
    randIndx = np.arange(len(Data))
    np.random.shuffle(randIndx)
    Data, Target = Data[randIndx], Target[randIndx]
    trainData, trainTarget = Data[:3500], Target[:3500]
    validData, validTarget = Data[3500:3600], Target[3500:3600]
    testData, testTarget = Data[3600:], Target[3600:]

In [4]:
trainData = np.reshape(trainData, [trainData.shape[0], 28*28])
validData = np.reshape(validData, [validData.shape[0], 28*28])
testData = np.reshape(testData, [testData.shape[0], 28*28])

In [5]:
#mse loss function
def mse_loss(yhat, target):
    loss = tf.reduce_mean(tf.pow(tf.subtract(yhat, target), 2))
    loss = tf.div(loss, 2)
    return loss

# ques 1

In [6]:
#hyperparameters

learning_rates = [0.005, 0.001, 0.0001]
# learning_rates = [0.005]
mini_batch = 500
reg_coeff = 0.0
train_iter = 20000
train_loss = []

for learning_rate in learning_rates:
    
    tf.reset_default_graph()
    train_loss_curr = []
    
    x = tf.placeholder(tf.float32, shape = [None, 784])
    y = tf.placeholder(tf.float32, shape = [None, 1])

    w = tf.Variable(np.random.randn())
    b = tf.cast(tf.Variable(0), dtype = tf.float32)
    yhat = tf.add(tf.multiply(x, w), b)

    mse = mse_loss(yhat, y)
    regul = tf.multiply(reg_coeff, tf.reduce_mean(tf.square(w)))
    cost = tf.add(mse, regul)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    
    for epoch in range(train_iter):
#         x_batch, y_batch = tf.train.batch([trainData, trainTarget], mini_batch)
        
        trainBatchSampler = BatchSampler(trainData, trainTarget, mini_batch)
        x_batch, y_batch = trainBatchSampler.get_batch()
        
        sess.run(optimizer, feed_dict = {x: x_batch, y:y_batch})
        w_curr, b_curr, c = sess.run([w, b, cost], feed_dict = {x: x_batch, y:y_batch})
        train_loss_curr.append(c)
        
        if epoch%3500 is 0:
            print("curr loss:", c)
            print("epoch:", epoch/3500)
            
    train_loss.append(train_loss_curr)
    
print("done")

curr loss: 0.335863
epoch: 0.0
curr loss: 0.197385
epoch: 1.0
curr loss: 0.207153
epoch: 2.0
curr loss: 0.196189
epoch: 3.0
curr loss: 0.205951
epoch: 4.0
curr loss: 0.19697
epoch: 5.0
curr loss: 0.199093
epoch: 0.0
curr loss: 0.211201
epoch: 1.0
curr loss: 0.218195
epoch: 2.0
curr loss: 0.191294
epoch: 3.0
curr loss: 0.201303
epoch: 4.0
curr loss: 0.195375
epoch: 5.0
curr loss: 0.542416
epoch: 0.0
curr loss: 0.494644
epoch: 1.0
curr loss: 0.424321
epoch: 2.0
curr loss: 0.397807
epoch: 3.0
curr loss: 0.350537
epoch: 4.0
curr loss: 0.317258
epoch: 5.0
done


In [7]:
plotx = np.linspace(0, train_iter, train_iter)
ploty = np.array(train_loss)

plt.figure(1)
for i in range(ploty.shape[0]):
    plt.plot(plotx, ploty[i])

plt.savefig("linear_ques1.png")
plt.close()

# ques 2

In [8]:
#hyperparameters

#best learning rate from previous part
learning_rate = 0.005
# mini_batch = 500
mini_batches = [500, 1500, 3500]
reg_coeff = 0.0
train_iter = 20000
train_loss = []

for mini_batch in mini_batches:
    
    tf.reset_default_graph()
    train_loss_curr = []
    
    x = tf.placeholder(tf.float32, shape = [None, 784])
    y = tf.placeholder(tf.float32, shape = [None, 1])

    w = tf.Variable(np.random.randn())
    b = tf.cast(tf.Variable(0), dtype = tf.float32)
    yhat = tf.add(tf.multiply(x, w), b)

    mse = mse_loss(yhat, y)
    regul = tf.multiply(reg_coeff, tf.reduce_mean(tf.square(w)))
    cost = tf.add(mse, regul)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    
    for epoch in range(train_iter):
#         x_batch, y_batch = tf.train.batch([trainData, trainTarget], mini_batch)
        
        trainBatchSampler = BatchSampler(trainData, trainTarget, mini_batch)
        x_batch, y_batch = trainBatchSampler.get_batch()
        
        sess.run(optimizer, feed_dict = {x: x_batch, y:y_batch})
        w_curr, b_curr, c = sess.run([w, b, cost], feed_dict = {x: x_batch, y:y_batch})
        train_loss_curr.append(c)
        
        if epoch%3500 is 0:
            print("curr loss:", c)
            print("epoch:", epoch/3500)
            
    train_loss.append(train_loss_curr)
    
print("done")

curr loss: 0.21795
epoch: 0.0
curr loss: 0.193324
epoch: 1.0
curr loss: 0.201922
epoch: 2.0
curr loss: 0.216989
epoch: 3.0
curr loss: 0.206633
epoch: 4.0
curr loss: 0.204542
epoch: 5.0
curr loss: 0.333588
epoch: 0.0
curr loss: 0.200437
epoch: 1.0
curr loss: 0.196818
epoch: 2.0
curr loss: 0.201036
epoch: 3.0
curr loss: 0.206048
epoch: 4.0
curr loss: 0.201586
epoch: 5.0
curr loss: 1.31265
epoch: 0.0
curr loss: 0.200931
epoch: 1.0
curr loss: 0.201214
epoch: 2.0
curr loss: 0.201211
epoch: 3.0
curr loss: 0.201209
epoch: 4.0
curr loss: 0.201202
epoch: 5.0
done


In [9]:
plotx = np.linspace(0, train_iter, train_iter)
ploty = np.array(train_loss)

plt.figure(2)
for i in range(ploty.shape[0]):
    plt.plot(plotx, ploty[i])

plt.savefig("linear_ques2.png")
plt.close()

# ques 3

In [10]:
#hyperparameters

learning_rate = 0.005
mini_batch = 500
# reg_coeff = 0.0
reg_coeffs = [0.0, 0.001, 0.1, 1.0]
train_iter = 20000
train_loss = []

for reg_coeff in reg_coeffs:
    
    tf.reset_default_graph()
    train_loss_curr = []
    
    x = tf.placeholder(tf.float32, shape = [None, 784])
    y = tf.placeholder(tf.float32, shape = [None, 1])

    w = tf.Variable(np.random.randn())
    b = tf.cast(tf.Variable(0), dtype = tf.float32)
    yhat = tf.add(tf.multiply(x, w), b)

    mse = mse_loss(yhat, y)
    regul = tf.multiply(reg_coeff, tf.reduce_mean(tf.square(w)))
    cost = tf.add(mse, regul)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    
    for epoch in range(train_iter):
#         x_batch, y_batch = tf.train.batch([trainData, trainTarget], mini_batch)
        
        trainBatchSampler = BatchSampler(trainData, trainTarget, mini_batch)
        x_batch, y_batch = trainBatchSampler.get_batch()
        
        sess.run(optimizer, feed_dict = {x: x_batch, y:y_batch})
        w_curr, b_curr, c = sess.run([w, b, cost], feed_dict = {x: x_batch, y:y_batch})
        train_loss_curr.append(c)
        
        if epoch%3500 is 0:
            print("curr loss:", c)
            print("epoch:", epoch/3500)
            
    train_loss.append(train_loss_curr)
    
print("done")

curr loss: 0.47323
epoch: 0.0
curr loss: 0.210846
epoch: 1.0
curr loss: 0.197415
epoch: 2.0
curr loss: 0.195998
epoch: 3.0
curr loss: 0.201153
epoch: 4.0
curr loss: 0.208757
epoch: 5.0
curr loss: 0.221294
epoch: 0.0
curr loss: 0.200927
epoch: 1.0
curr loss: 0.198509
epoch: 2.0
curr loss: 0.205992
epoch: 3.0
curr loss: 0.211912
epoch: 4.0
curr loss: 0.201376
epoch: 5.0
curr loss: 0.663009
epoch: 0.0
curr loss: 0.222919
epoch: 1.0
curr loss: 0.229209
epoch: 2.0
curr loss: 0.210053
epoch: 3.0
curr loss: 0.213197
epoch: 4.0
curr loss: 0.213646
epoch: 5.0
curr loss: 3.40812
epoch: 0.0
curr loss: 0.250492
epoch: 1.0
curr loss: 0.239719
epoch: 2.0
curr loss: 0.233635
epoch: 3.0
curr loss: 0.256865
epoch: 4.0
curr loss: 0.242933
epoch: 5.0
done


In [11]:
plotx = np.linspace(0, train_iter, train_iter)
ploty = np.array(train_loss)

plt.figure(3)
for i in range(ploty.shape[0]):
    plt.plot(plotx, ploty[i])

plt.savefig("linear_ques3.png")
plt.close()