In [5]:
import tensorflow as tf
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
import time

In [6]:
class BatchSampler(object):
    '''
    A (very) simple wrapper to randomly sample batches without replacement.
    '''
    
    def __init__(self, data, targets, batch_size):
        self.num_points = data.shape[0]
        self.features = data.shape[1]
        self.data = data
        self.targets = targets
        self.batch_size = batch_size
        self.indices = np.arange(self.num_points)

    def random_batch_indices(self, m=None):
        if m is None:
            indices = np.random.choice(self.indices, self.batch_size, replace=False)
        else:
            indices = np.random.choice(self.indices, m, replace=False)
        return indices 

    def get_batch(self, m=None):
        '''
        Get a random batch without replacement from the dataset.
        If m is given the batch will be of size m. 
        Otherwise will default to the class initialized value.
        '''
        indices = self.random_batch_indices(m)
        X_batch = np.take(self.data, indices, 0)
        y_batch = self.targets[indices]
        return X_batch, y_batch  


In [7]:
def plotFig(_num, _dim, y , addInfo, title="default", xLabel="xlabel", yLabel="yLabel", plotLabel ="plotLabel" ):
    x = np.linspace(0, _dim, num=_dim)
   
    
    y = np.array(y)
    print(y.shape)
    plt.figure(_num)
    plt.title(title)
    plt.xlabel(xLabel)
    plt.ylabel(yLabel)
    for i in range(y.shape[0]):
        plt.plot(x, y[i], label = plotLabel + str(addInfo[i]))
        
    plt.legend()
    plt.savefig( title + str(_num) + ".png")
    plt.close()
    plt.clf()
    

In [8]:
def arrFlatten(arr):
    
    print(arr.shape)

    dataDim1, dum1, dum2 = arr.shape
    arr = np.reshape(arr, [ dataDim1 ,784 ])
    return arr


def loadBinData(linEqn = False):
# import binary NOTMIST data set
    with np.load("notMNIST.npz") as data :
        Data, Target = data ["images"], data["labels"]
        posClass = 2
        negClass = 9
        dataIndx = (Target==posClass) + (Target==negClass)
        Data = Data[dataIndx]/255.
        Target = Target[dataIndx].reshape(-1, 1)
        Target[Target==posClass] = 1
        Target[Target==negClass] = 0
        
        Data = arrFlatten(Data)
        
        if linEqn:
            Data = np.concatenate((np.ones((Data.shape[0], 1)),Data),axis=1)
        
        
        np.random.seed(521)
        randIndx = np.arange(len(Data))
        np.random.shuffle(randIndx)
        Data, Target = Data[randIndx], Target[randIndx]
        trainData, trainTarget = Data[:3500], Target[:3500]
        validData, validTarget = Data[3500:3600], Target[3500:3600]
        testData, testTarget = Data[3600:], Target[3600:]
        
    print("Data binary class Loaded")
    print("-------------------------------")
    return trainData, trainTarget,validData, validTarget,\
        testData, testTarget




def linearMSE(y_hat, target):
    '''
    y_hat, target will be fed
    '''
    print("y_hat",  y_hat.shape)
    print("target", target.shape)
    
    y_hat = tf.convert_to_tensor(y_hat)
    target = tf.convert_to_tensor(target)
    
    target = tf.cast(target, dtype = 'float64')
    se_mat = tf.square(tf.subtract(y_hat, target))
    #print("msemst", mse_mat)
    mse_mat = tf.reduce_mean(se_mat)

    loss = tf.reduce_mean(mse_mat)

    loss = loss/2.0 #tf.div(loss, tf.constant(2.0))
    #print(mse_mat.eval())

    return loss  

def accuracy(y_hat, target, classType = 0):
    if classType == 0:
        correctCases = tf.equal(tf.cast(tf.greater_equal(y_hat, 0.5), tf.float64), tf.floor(target))
    else:
        correctCases = tf.equal(tf.argmax(y_hat, 1), tf.argmax(target, 1))

    accuracy = tf.reduce_mean(tf.cast(correctCases, dtype="float"))
    
    return accuracy

    
def fit_regression(X,Y,  validX, validY, testX, testY):
    #TODO: implement linear regression
    # Remember to use np.linalg.solve instead of inverting!
    #X = np.concatenate((np.ones((X.shape[0],1)),X),axis=1) #add constant one feature - no bias needed
    xtx = np.dot(np.transpose(X), X)
    xty = np.dot(np.transpose(X), Y)
    w = np.linalg.solve(xtx, xty)
    #w = np.linalg.inv(xtx, xty)
    
    testYhat = np.dot(testX, w)
    validYhat = np.dot(validX, w)
    #print(testYhat)
    
    
    init = tf.global_variables_initializer()
    sess = tf.InteractiveSession()
    sess.run(init)
    validErr = linearMSE(validYhat, validY)
    testErr = linearMSE(testYhat, testY)
    print("in linear normal eqn")
    print("validErr", validErr.eval(), "testErr", testErr.eval())



def linearNormalEqn(trainData, trainTarget):
    y_target = tf.cast(trainTarget, dtype='float32')
    
    onesX = tf.ones(shape=tf.stack([tf.shape(trainData)[0], 1]))
    appendOnesX = tf.concat([trainData, onesX], 1)
    w_star = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(tf.transpose(appendOnesX),\
                                                appendOnesX)), tf.transpose(appendOnesX)), y_target)
    
    print(w_star)
    pred_y = tf.matmul(appendOnesX, w_star)
    print(pred_y)
    y_hat = tf.cast(tf.greater_equal(pred_y, 0.5), tf.float32) #float
    print("######################")
    print("linear normal equation")
    mseError = linearMSE(y_hat, trainTarget)
    print("mse error", mseError)
    
    return mseError
'''
def linearNormalEqn(trainData, trainTarget):

    X = tf.placeholder(tf.float32, shape=[None, 784], name='dataX')
    b = tf.Variable(0.0, name='biases')
    y_target = tf.placeholder(tf.float32, shape=[None, 1], name='targetY')
    
    onesX = tf.ones(shape=(1, 784))   #(shape=tf.stack([tf.shape(X)[0], 1]))
    appendOnesX = tf.concat([X, onesX], 0) #1 for offset b
    print(appendOnesX)
    w_star = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(tf.transpose(appendOnesX),\
                                                appendOnesX)), tf.transpose(appendOnesX)), y_target)

    print(w_star)
    pred_y = tf.matmul(appendOnesX, w_star)
    print(pred_y)
    y_hat = tf.cast(tf.greater_equal(pred_y, 0.5), tf.float32) #float to bool to float 1 or 0
    print(y_hat)
    mseError = linearMSE(y_hat, trainTarget)
    
    
    #finish build graph
    init = tf.global_variables_initializer()
    sess = tf.InteractiveSession()
    sess.run(init)
    errTrain = sess.run([mseError], feed_dict={X: trainData, y_target: trainTarget})

    
    print("######################")
    print("linear normal equation")
    print("mse error", mse)
    return X, y_target, errTrain


'''
def linearBuildGraph(_regLambda, _learningRate, gd = True):
    '''
    Input: _data is x in the equation, dim by 784 flattened tensor
       _target is y in the equaion
       _regLambda is the wegithed decay coeff
       _learningRate is the epsilon
    '''
    _regLambda = tf.cast(_regLambda, dtype = tf.float64)

    #declare using a placeholder, feed in _data and _target to x ,y 
    #x_dim, dum1 =_data.get_shape().as_list()
    X = tf.placeholder(tf.float64, shape=[None, 784], name='dataX')
    # W initialize to a gaussian distr, honestly anything would work
    W = tf.Variable(tf.truncated_normal(shape=[784, 1], stddev=0.1), name='weights')
    W = tf.cast(W, dtype=tf.float64)
    b = tf.Variable(0.0, name='biases')
    b = tf.cast(b, dtype=tf.float64)

    y_target = tf.placeholder(tf.float64, shape=[None, 1], name='targetY')
    
    #compute the current y_hat
    y_hat =  tf.matmul(X, W) + b
    #compute the current loss
    
    
    mseCurr = linearMSE(y_hat, y_target)
    print("mseerRor 1", mseCurr)

    #compute the decay/regularization term
    regTerm =tf.multiply(tf.constant(0.50, dtype = tf.float64), tf.multiply(_regLambda, tf.reduce_mean(tf.square(W))))
    
    print("regTerm", regTerm)
    mseCurr = tf.add(mseCurr, regTerm)
    print("mseerRor", mseCurr)
    
    
    if gd:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate = _learningRate)
    else:
        optimizer = tf.train.AdamOptimizer(learning_rate = _learningRate)
    
    train = optimizer.minimize(loss=mseCurr)

    return W, b, mseCurr, y_hat, X, y_target, train
    

def runLinearGraphPart1(trainData, trainTarget ):
    
    '''
    Input: _data,
           _target,
           _numIters
    Output:
           required accuracy/epoch plots
           
    '''
    
    regLambda = 0.0
    learningRateArr = [0.005, 0.001, 0.0001]
    numIter = 20000
    numEpoch =int(np.ceil(20000/7))
    batchSize = 500
    epochTrainSize = 3500
    trainLossAll = []
    
    for learningRate in learningRateArr:
        trainLossLR = []
        tf.reset_default_graph()
        W, b, mseError, y_hat, X, y_target, train = linearBuildGraph(regLambda, learningRate)
    

        init = tf.global_variables_initializer()
        sess = tf.InteractiveSession()
        sess.run(init)
        initialW = sess.run(W)  
        initialb = sess.run(b)            
        #training model and iter through batches
        print("learningrate = ", learningRate)
        
        for i in range(numIter):
            trainBatchSampler = BatchSampler(trainData, trainTarget, batchSize)
            dataBatch, targetBatch = trainBatchSampler.get_batch()
            #dataBatch = tf.stack(dataBatch)
            #targetBatch = tf.stack(targetBatch)
            currentW, currentb, errTrain, y_predict, trainModel = sess.run([W, b, mseError, y_hat, train], feed_dict={X: dataBatch, y_target: targetBatch})
            trainLossLR.append(errTrain)
            if i%3500 == 0:
                print("current err", errTrain)
                print("epoch ", i/3500)
        trainLossAll.append(trainLossLR)
        
    print("train done")
    plotFig(1, numIter, trainLossAll, learningRateArr,  title = "loss vs number of epoches",\
            plotLabel="learning rate")
    
    
def runLinearGraphPart2(trainData, trainTarget ):
    
    '''
    Input: _data,
           _target,
           _numIters
    Output:required accuracy/epoch plots
           
           
    '''
    
    regLambda = 0.0
    learningRate = 0.005 #chosen from part1
    numIter = 20000
    numEpoch =int(np.ceil(20000/7))
    batchSizeArr = [500, 1500, 3500]
    epochTrainSize = 3500
    trainLossAll = []
    
    for batchSize in batchSizeArr:
        start_time = time.time()
        
        trainLossLR = []
        tf.reset_default_graph()
        W, b, mseError, y_hat, X, y_target, train = linearBuildGraph(regLambda, learningRate)
    

        init = tf.global_variables_initializer()
        sess = tf.InteractiveSession()
        sess.run(init)
        initialW = sess.run(W)  
        initialb = sess.run(b)
        
        print("batchSize", batchSize)
        for i in range(numIter):
            trainBatchSampler = BatchSampler(trainData, trainTarget, batchSize)
            dataBatch, targetBatch = trainBatchSampler.get_batch()
            #dataBatch = tf.stack(dataBatch)
            #targetBatch = tf.stack(targetBatch)
            currentW, currentb, errTrain, y_predict, trainModel = sess.run([W, b, mseError, y_hat, train], feed_dict={X: dataBatch, y_target: targetBatch})
            #trainLossLR.append(errTrain)
            if i%3500 == 0:
                print("current err", errTrain)
                print("epoch ", i/3500)
        
        trainLossAll.append(errTrain)
        end = time.time()
        elapsed = end - start_time
        print("time ", elapsed, "batchsize", batchSize )
    print("trainLosssAll", trainLossAll)    

    
def runLinearGraphPart3(trainData, trainTarget, validData, validTarget,testData, testTarget):
    
    '''
    Input: _data,
           _target,
           _numIters
    Output:
           Lambda results
           
    '''
    
    regLambdaArr = [0.0, 0.001, 0.1, 1.0]
    learningRate = 0.005
    numIter = 20000
    numEpoch =int(np.ceil(20000/7))
    batchSize = 500
    epochTrainSize = 3500
    errTestAll = []
    errValidAll = []
    
    for regLambda in regLambdaArr:
        errValidArr = []
        errTestArr = []
        tf.reset_default_graph()
        W, b, mseError, y_hat, X, y_target, train = linearBuildGraph(regLambda, learningRate)
    

        init = tf.global_variables_initializer()
        sess = tf.InteractiveSession()
        sess.run(init)
        initialW = sess.run(W)  
        initialb = sess.run(b)            
        #training model and iter through batches
        print("learningrate = ", learningRate)
        for i in range(numIter):
            trainBatchSampler = BatchSampler(trainData, trainTarget, batchSize)
            dataBatch, targetBatch = trainBatchSampler.get_batch()
            #dataBatch = tf.stack(dataBatch)
            #targetBatch = tf.stack(targetBatch)
            currentW, currentb, errTrain, y_predict, trainModel = sess.run([W, b, mseError, y_hat, train], feed_dict={X: dataBatch, y_target: targetBatch})
            if i%3500 == 0:
                print("current err", errTrain)
                print("epoch ", i/3500)
        
            errValid = sess.run(mseError, feed_dict={X: validData, y_target: validTarget})
            errTest = sess.run(mseError, feed_dict={X: testData, y_target: testTarget})
            errValidArr.append(errValid)
            errTestArr.append(errTest)
        
        errValidAll.append(errValidArr)
        errTestAll.append(errTestArr)
    
    errValidAll = np.array(errValidAll)
    errTestAll = np.array(errTestAll)
    #print(errValidAll)
    for i in range(errValidAll.shape[0]):
        best = np.amin(errValidAll[i])
        print("bset err valid ",best, "lambda", regLambdaArr[i])
    
    for i in range(errTestAll.shape[0]):
        best = np.amin(errTestAll[i])
        print("bset err test ",best, "lambda", regLambdaArr[i])
    


def runQ2Part3Linear(trainData, trainTarget, validData, validTarget,testData, testTarget):
    '''
    labmda = 0
    n = 0.001
    adam optimizer for linear
    '''
    regLambda = 0.0
    learningRate = 0.001
    learningRateArr = [0.001] #just for plotting 

    numIter = 5000
    numEpoch =int(np.ceil(20000/7))
    batchSize = 500
    epochTrainSize = 3500 #for training data
    trainLossAll = []

    trainLossLR = []
    tf.reset_default_graph()
    W, b, crossEntropyErrorCurr, y_hat, X, y_target, train = linearBuildGraph(regLambda, learningRate, False)
    #y_hat_mse, target_mse, mseLoss = linearMSE()
    
    init = tf.global_variables_initializer()
    sess = tf.InteractiveSession()
    sess.run(init)
    initialW = sess.run(W)  
    initialb = sess.run(b)            
    #training model and iter through batches
    print("learningrate = ", learningRate)
    trainBatchSampler = BatchSampler(trainData, trainTarget, batchSize)

    for i in range(numIter):
        dataBatch, targetBatch = trainBatchSampler.get_batch()
        #dataBatch = tf.stack(dataBatch)
        #targetBatch = tf.stack(targetBatch)
        currentW, currentb, errTrain, y_predict, trainModel = sess.run([W, b, crossEntropyErrorCurr, y_hat, train], feed_dict={X: dataBatch, y_target: targetBatch})
        trainLossLR.append(errTrain)
        #mseLoss = linearMSE()
        if i%3500 == 0:
            print("current err", errTrain)
            print("epoch ", i/3500)
        
    trainLossAll.append(trainLossLR)
    
    print("train done")
    plotFig(4, numIter, trainLossAll, learningRateArr,  title = "q2-3 Adam Opt lambda = 0 linear loss vs number of epoches",\
            plotLabel="learning rate")
    
    
    
    
    
    

if __name__ == '__main__':  
        

    #trainTarget = arrFlatten(trainTarget)
    trainData, trainTarget, validData, validTarget,testData, testTarget = loadBinData(False)
    
    
    #data is (3500, 28, 28)
    #the label [1] or [0] is stored in target

    fit_regression(trainData, trainTarget, validData, validTarget,testData, testTarget)

    
    #trainData = arrFlatten(trainData)
    #validData = arrFlatten(validData)
    #testData = arrFlatten(testData)
    print("****** START Q1.1 *****")
    runLinearGraphPart1(trainData, trainTarget)
    print("****** START Q1.2 *****")
    runLinearGraphPart2(trainData, trainTarget)
    runLinearGraphPart3(trainData, trainTarget, validData, validTarget,testData, testTarget )

    
    print("*********START Q2 PART 3 LINEAR*******")
    runQ2Part3Linear(trainData, trainTarget, validData, validTarget,testData, testTarget)
    
   

(3745, 28, 28)
Data binary class Loaded
-------------------------------
y_hat (100, 1)
target (100, 1)
y_hat (145, 1)
target (145, 1)
in linear normal eqn
validErr 0.0226958099498 testErr 0.0369294998731
****** START Q1.1 *****
y_hat (?, 1)
target (?, 1)
mseerRor 1 Tensor("truediv:0", shape=(), dtype=float64)
regTerm Tensor("Mul_1:0", shape=(), dtype=float64)
mseerRor Tensor("Add:0", shape=(), dtype=float64)
learningrate =  0.005
current err 0.511327268798
epoch  0.0
current err 0.0256642873419
epoch  1.0
current err 0.018307179221
epoch  2.0
current err 0.0172149878759
epoch  3.0
current err 0.0179809822094
epoch  4.0
current err 0.0155519387488
epoch  5.0
y_hat (?, 1)
target (?, 1)
mseerRor 1 Tensor("truediv:0", shape=(), dtype=float64)
regTerm Tensor("Mul_1:0", shape=(), dtype=float64)
mseerRor Tensor("Add:0", shape=(), dtype=float64)
learningrate =  0.001
current err 2.03010084713
epoch  0.0
current err 0.0573251676646
epoch  1.0
current err 0.0356191352479
epoch  2.0
current err 0

KeyboardInterrupt: 

<matplotlib.figure.Figure at 0x11ca7dcf8>

time  28.17647886276245 batchsize 500
time  142.55758595466614 batchsize 1500
time  341.4439833164215 batchsize 3500
trainLosssAll [0.013990995, 0.015074042, 0.014701741]

bset err valid  0.0174993 lambda 0.0
bset err valid  0.0200263 lambda 0.001
bset err valid  0.019381 lambda 0.1
bset err valid  0.0181959 lambda 1.0
bset err test  0.0212588 lambda 0.0
bset err test  0.0222249 lambda 0.001
bset err test  0.0217733 lambda 0.1
bset err test  0.0242665 lambda 1.0