In [4]:
import tensorflow as tf
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
import time

class BatchSampler(object):
    '''
    A (very) simple wrapper to randomly sample batches without replacement.
    '''
    
    def __init__(self, data, targets, batch_size):
        self.num_points = data.shape[0]
        self.features = data.shape[1]
        self.data = data
        self.targets = targets
        self.batch_size = batch_size
        self.indices = np.arange(self.num_points)

    def random_batch_indices(self, m=None):
        if m is None:
            indices = np.random.choice(self.indices, self.batch_size, replace=False)
        else:
            indices = np.random.choice(self.indices, m, replace=False)
        return indices 

    def get_batch(self, m=None):
        '''
        Get a random batch without replacement from the dataset.
        If m is given the batch will be of size m. 
        Otherwise will default to the class initialized value.
        '''
        indices = self.random_batch_indices(m)
        X_batch = np.take(self.data, indices, 0)
        y_batch = self.targets[indices]
        return X_batch, y_batch  

def convertTarget(targetValues):
    numClasses = np.max(targetValues) + 1
    return np.eye(numClasses)[targetValues]

def linearMSE():
    '''
    TODO: the MSE calculation
    '''
    #print("y_hat",  y_hat)
    y_hat = tf.placeholder(tf.float64, shape=[None,10 ], name='y_hat')
    target = tf.placeholder(tf.float64, shape=[None, 10], name='target')
    
    se_mat = tf.square(tf.subtract(y_hat, target))
    #print("msemst", mse_mat)
    mse_mat = tf.reduce_mean(se_mat)
    loss = tf.reduce_mean(mse_mat)
    loss = tf.div(loss, tf.constant(2.0, dtype = tf.float64))

    return y_hat, target, loss        
    
    
##notMNIST
def loadNotMnist():
    with np.load("notMNIST.npz") as data:
        Data, Target = data ["images"], data["labels"]
        np.random.seed(521)
        randIndx = np.arange(len(Data))
        np.random.shuffle(randIndx)
        Data = Data[randIndx]/255.
        Target = Target[randIndx]
        trainData, trainTarget = Data[:15000], Target[:15000]
        validData, validTarget = Data[15000:16000], Target[15000:16000]
        testData, testTarget = Data[16000:], Target[16000:]
        
        trainTarget = convertTarget(trainTarget)
        validTarget = convertTarget(validTarget)
        testTarget = convertTarget(testTarget)
    return trainData, trainTarget, validData, validTarget, testData, testTarget

def arrFlatten(arr):

    dataDim1, dum1, dum2 = arr.shape
    arr = np.reshape(arr, [ dataDim1 ,784 ])
    return arr

def plotFig(_num, _dim, y , addInfo, title="default", xLabel="xlabel", yLabel="yLabel", plotLabel ="plotLabel" ):
    x = np.linspace(0, _dim, num=_dim)
   
    
    y = np.array(y)
    print(y.shape)
    plt.figure(_num)
    plt.title(title)
    plt.xlabel(xLabel)
    plt.ylabel(yLabel)
    for i in range(y.shape[0]):
        plt.plot(x, y[i], label = plotLabel + str(addInfo[i]))
        
    plt.legend()
    plt.savefig( title + str(_num) + ".png")
    plt.close()
    plt.clf()
    

def buildGraphMINIST(_regLambda, _learningRate, gd):
    '''
    Input: _data is x in the equation, dim by 784 flattened tensor
       _target is y in the equaion
       _regLambda is the wegithed decay coeff
       _learningRate is the epsilon
    '''
    _regLambda = tf.cast(_regLambda, dtype = tf.float64)
    #declare using a placeholder, feed in _data and _target to x ,y 
    X = tf.placeholder(tf.float64, shape=[None, 784], name='dataX')
    # W initialize to a gaussian distr, honestly anything would work
    W = tf.Variable(tf.truncated_normal(shape=[784, 10], stddev=0.1), name='weights')
    W = tf.cast(W, dtype=tf.float64)
    b = tf.Variable(0.0, name='biases')
    b = tf.cast(b, dtype=tf.float64)

    y_target = tf.placeholder(tf.float64, shape=[None, 10], name='targetY')
    
    #compute the current y_hat
    wtxb =  tf.matmul(X, W) + b
    y_hat = tf.sigmoid(wtxb)
    #compute the current loss
    crossEntropyErrorCurr = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_target, logits= y_hat))

    #compute the decay/regularization term
    regTerm = tf.multiply( tf.constant(0.50, dtype = tf.float64), tf.multiply(_regLambda, tf.reduce_mean(tf.square(W))))
    print("regTerm", regTerm)
    crossEntropyErrorCurr = tf.add(crossEntropyErrorCurr, regTerm)
    print("crossEntropyErrorCurr", crossEntropyErrorCurr)
    
    if gd == True:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate = _learningRate)
    else:
        optimizer = tf.train.AdamOptimizer(learning_rate = _learningRate)
    
    train = optimizer.minimize(loss=crossEntropyErrorCurr)

    return W, b, crossEntropyErrorCurr, y_hat, X, y_target, train



def runGraphMINST(trainData, trainTarget, validData, validTarget, testData, testTarget):
    regLambda = 0.01
    batchSize = 500
    learningRateArr = [0.005, 0.001, 0.0001] #we need to tune
    numIter = 5000
    numEpoch =int(np.ceil(trainData.shape[0]/7)) 
    
    crossEntropyArr = []
    mseLossArr = []
    
    for learningRate in learningRateArr:
        crossEntropyL = []
        mseLossL = []
        
        tf.reset_default_graph()
        W, b, crossEntropyErrorCurr, y_hat, X, y_target, train = buildGraphMINIST(regLambda, learningRate, True)
        y_hat_mse, target_mse, mseLoss = linearMSE()

        init = tf.global_variables_initializer()
        sess = tf.InteractiveSession()
        sess.run(init)
        initialW = sess.run(W)  
        initialb = sess.run(b)            
        #training model and iter through batches
        print("learningrate = ", learningRate)
        trainBatchSampler = BatchSampler(trainData, trainTarget, batchSize)

        for i in range(numIter):
            dataBatch, targetBatch = trainBatchSampler.get_batch()
            #dataBatch = tf.stack(dataBatch)
            #targetBatch = tf.stack(targetBatch)
            currentW, currentb, errTrain, y_predict, trainModel = sess.run([W, b, crossEntropyErrorCurr, y_hat, train], feed_dict={X: dataBatch, y_target: targetBatch})
            crossEntropyL.append(errTrain)
            mseLossVal = sess.run(mseLoss, feed_dict={y_hat_mse: y_predict, target_mse: targetBatch})
            #print("mselossVal", mseLossVal)
            mseLossL.append(mseLossVal)
            
            if i%3500 == 0:
                print("current err", errTrain)
                print("epoch ", i/3500)
        
        crossEntropyArr.append(crossEntropyL)
        mseLossArr.append(mseLossL)
        
    print("train done")
    plotFig(1, numIter, crossEntropyArr, learningRateArr,  title = "Q2-2 train crossEntropy loss vs number of epoches",\
            plotLabel="learning rate")
    plotFig(2, numIter, mseLossArr, learningRateArr, title = "Q2-2 train MSE loss vs number of epoches",\
           plotLabel = "learning rate")
    
    #test error and valid error
    validHat = sess.run(y_hat, feed_dict={X: validData, y_target: validTarget})
    testHat = sess.run(y_hat, feed_dict={X: testData, y_target: testTarget} )
    validError = linearMSE(validHat, validTarget)
    testError = lienrMSE(testHat, testTarget)

    print("valideRROR", validError, "testerror", testError)
    
     


        
    
if __name__ == '__main__':  
    
    trainData, trainTarget, validData, validTarget, testData, testTarget = loadNotMnist()
    trainData = arrFlatten(trainData)
    validData = arrFlatten(validData)
    testData = arrFlatten(testData)
    runGraphMINST(trainData, trainTarget, validData, validTarget, testData, testTarget)

regTerm Tensor("Mul_1:0", shape=(), dtype=float64)
crossEntropyErrorCurr Tensor("Add:0", shape=(), dtype=float64)
learningrate =  0.005
current err 2.33223189584
epoch  0.0
current err 1.76116270351
epoch  1.0
regTerm Tensor("Mul_1:0", shape=(), dtype=float64)
crossEntropyErrorCurr Tensor("Add:0", shape=(), dtype=float64)
learningrate =  0.001
current err 2.32930888935
epoch  0.0
current err 2.0164962958
epoch  1.0
regTerm Tensor("Mul_1:0", shape=(), dtype=float64)
crossEntropyErrorCurr Tensor("Add:0", shape=(), dtype=float64)
learningrate =  0.0001
current err 2.32324712802
epoch  0.0
current err 2.26235647752
epoch  1.0
train done
(3, 5000)
(3, 5000)


TypeError: linearMSE() takes 0 positional arguments but 2 were given

<matplotlib.figure.Figure at 0x1319cbd68>