In [68]:
import tensorflow as tf
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
import time

#Const def
ERROR_MSE ="mse"
ERROR_ENTROPY = "entropy"



class BatchSampler(object):
    '''
    A (very) simple wrapper to randomly sample batches without replacement.
    '''
    
    def __init__(self, data, targets, batch_size):
        self.num_points = data.shape[0]
        self.features = data.shape[1]
        np.random.shuffle(data)
        np.random.shuffle(targets)
        self.data = data
        self.targets = targets
        self.batch_size = batch_size
        self.indices = np.arange(self.num_points)
        self.epoch = int(np.floor(data.shape[0]/self.batch_size)) 
        self.i = 0
        
        
        
    def random_batch_indices(self):
        index1 = self.i * self.batch_size
        index2 = (self.i + 1) * self.batch_size
        
        if self.i == self.epoch:
            self.i = 0
        else:
            self.i +=1

        return int(index1), int(index2) 

    def get_batch(self, m=None):
        
        index1, index2 = self.random_batch_indices()
        X_batch = self.data[index1:index2]
        y_batch = self.targets[index1:index2]
        return X_batch, y_batch  


class classification:
    #def __init__(self):

    def __init__(self, _numClass = 1, _numPixel = 784, _regLambda = 0.01, _regLambdaArr = None, _batchSize = 500, \
                 _batchSizeArr = None,\
                 _learningRate = 0.001, _learningRateArr = None, _numIter = 5000, _gd = 0, _err =0, _qn=0):
        self.regLambda = _regLambda #_regLambda
        #self.regLambda = tf.cast(self.regLambda, dtype = tf.float64)

        self.batchSize = _batchSize
        self.learningRate = _learningRate
        self.numIter = _numIter
        self.numPixel = _numPixel
        #self.numEpoch =int(np.ceil(trainData.shape[0]/7)) 
        
        #arrays, default to [one value] if arr is None
        self.regLambdaArr =(_regLambdaArr, _regLambda)
        self.batchSizeArr = (_batchSizeArr, _batchSize)
        self.learningRateArr = (_learningRateArr, _learningRate)
        
        
        self.gd =  _gd  #0 for normal GD, 1 for adam 
        self.err = _err #for type of error to minimize, 1 for default corssEntropy, 0 MSE error
        self.qn = _qn #question type, logistic = 1, linear = 0
        self.numClass = _numClass #1 for binary, other for num of classes in multiclass
        
        #optional    
        self.crossEntropyArr = []
        self.mseLossArr = []

    def paramArrInit(self, paramArr, param):
        if paramArr is None:
            param = [param]
        return paramArr
        
    
    def linearMSE(self):
        '''
        y_hat, target will be fed

        shape = [dim by 1] for binary classification
        '''
        #print("y_hat",  y_hat)
        y_hat = tf.placeholder(tf.float64, shape=[None,self.numClass], name='y_hat')
        target = tf.placeholder(tf.float64, shape=[None, self.numClass], name='target')
    
        se_mat = tf.square(tf.subtract(y_hat, target))
        #print("msemst", mse_mat)
        mse_mat = tf.reduce_mean(se_mat)
        loss = tf.reduce_mean(mse_mat)
        loss = tf.div(loss, tf.constant(2.0, dtype = tf.float64))

        return y_hat, target, loss 
    
    def linearMSENoFeed(self, y_hat, target):
        '''
        a tensor function, no feed
        '''
        print("y_hat",  y_hat.shape)
        print("target", target.shape)
    
        y_hat = tf.convert_to_tensor(y_hat)
        target = tf.convert_to_tensor(target)
        
        target = tf.cast(target, dtype = 'float64')
        se_mat = tf.square(tf.subtract(y_hat, target))
        #print("msemst", mse_mat)
        mse_mat = tf.reduce_mean(se_mat)

        loss = tf.reduce_mean(mse_mat)

        loss = loss/2.0 #tf.div(loss, tf.constant(2.0))
        #print(mse_mat.eval())

        return loss  
    
    def crossEntropyError(self, y_hat, y_target):
        crossEntropyErrorCurr = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y_target, logits= y_hat))
        return crossEntropyErrorCurr
    
    def accuracy(y_hat, target, classType = 0):
        if classType == 0:
            correctCases = tf.equal(tf.cast(tf.greater_equal(y_hat, 0.5), tf.float64), tf.floor(target))
        else:
            correctCases = tf.equal(tf.argmax(y_hat, 1), tf.argmax(target, 1))

        accuracy = tf.reduce_mean(tf.cast(correctCases, dtype="float"))
    
        return accuracy
    
    def buildGraph(self):
        '''
        Input: _data is x in the equation, dim by 784 flattened tensor
               _target is y in the equaion
               _regLambda is the wegithed decay coeff
               _learningRate is the epsilon
               err is the type of error, 1 default corssEntropy
        '''

        #declare using a placeholder, feed in _data and _target to x ,y 
        X = tf.placeholder(tf.float64, shape=[None, self.numPixel], name='dataX')
        
        # W initialize to a gaussian distr, honestly anything would work
        W = tf.Variable(tf.truncated_normal(shape=[self.numPixel, self.numClass], stddev=0.1), name='weights')

        W = tf.cast(W, dtype=tf.float64)
        b = tf.Variable(0.0, name='biases')
        b = tf.cast(b, dtype=tf.float64)

        y_target = tf.placeholder(tf.float64, shape=[None, self.numClass], name='targetY')
        

        #compute the current y_hat
        if self.qn == 1: #logistic:
            wtxb =  tf.matmul(X, W) + b
            y_hat = tf.sigmoid(wtxb)
            regTerm = tf.multiply( tf.constant(0.50, dtype = tf.float64),\
                                  tf.multiply(tf.constant(self.regLambda,dtype = tf.float64), tf.reduce_mean(tf.square(W))))
            crossEntropyErrorCurr = self.crossEntropyError(y_hat, y_target)
            crossEntropyErrorCurr = tf.add(crossEntropyErrorCurr, regTerm)  
            errTerm = crossEntropyErrorCurr
            
        else:
            y_hat =  tf.matmul(X, W) + b
            mseCurr = self.linearMSENoFeed(y_hat, y_target)
            regTerm = tf.multiply( tf.constant(0.50, dtype = tf.float64), tf.multiply(tf.constant(self.regLambda,dtype = tf.float64), tf.reduce_mean(tf.square(W))))
            mseCurr = tf.add(mseCurr, regTerm)
            errTerm = mseCurr
        

        if self.gd == 0:
            optimizer = tf.train.GradientDescentOptimizer(learning_rate = self.learningRate)
        else:
            optimizer = tf.train.AdamOptimizer(learning_rate = self.learningRate)
    
        if self.err == 1:
            train = optimizer.minimize(loss=crossEntropyErrorCurr)
        else:
            train = optimizer.minimize(loss=mseCurr)

        return W, b, errTerm, y_hat, X, y_target, train
    
    def plotFig(self, _dim, y , addInfo, title="default", xLabel="xlabel", yLabel="yLabel", plotLabel ="plotLabel", _num =1 ):
        x = np.linspace(0, _dim, num=_dim)
        y = np.array(y)
        print("$$$$$$$$$$$$$$$ in plot fig$$$$$$$$$$$$$$$$$")
        print(y.shape)
        plt.figure(_num)
        plt.title(title)
        plt.xlabel(xLabel)
        plt.ylabel(yLabel)
        for i in range(y.shape[0]):
            #print("x", x)
            #print("y", y[i])
            plt.plot(x, y[i], label = plotLabel + str(addInfo[i]))
        
        plt.legend()
        plt.savefig( title + str(_num) + ".png")
        plt.close()
        plt.clf()

              
    def runLogisticGraphPart1(self, trainData, trainTarget, validData, validTarget,testData, testTarget,
                              plotOut):
        '''
        Input: _data,
        Output:
           required accuracy/epoch plots
           
        '''
        trainLossArr= []
        mseLossArr = []
        
            
        for learningRate in self.learningRateArr:
            trainLossL = []
            mseLossL = []
            tf.reset_default_graph()
            W, b, errTerm, y_hat, X, y_target, train = self.buildGraph()
            y_hat_mse, target_mse, mseLoss = self.linearMSE()
    
            init = tf.global_variables_initializer()
            sess = tf.InteractiveSession()
            sess.run(init)
            initialW = sess.run(W)  
            initialb = sess.run(b)            
            #training model and iter through batches
            print("learningrate = ", learningRate)
            trainBatchSampler = BatchSampler(trainData, trainTarget, self.batchSize)
        
            for i in range(self.numIter):
                dataBatch, targetBatch = trainBatchSampler.get_batch()
                currentW, currentb, errTrain, y_predict, trainModel = sess.run([W, b, errTerm, y_hat, train], feed_dict={X: dataBatch, y_target: targetBatch})
                trainLossL.append(errTrain)
            
                mseLossVal = sess.run(mseLoss, feed_dict={y_hat_mse: y_predict, target_mse: targetBatch})
                mseLossL.append(mseLossVal)
            
                if i%3500 == 0:
                    print("current entropy", errTrain)
                    #print("current mse", mseLoss)
                    print("epoch ", i/500)
        
            trainLossArr.append(trainLossL)
            mseLossArr.append(mseLossL)
        
        print("train done")
        #print("mselossall", mseLossAll)
        
        
        '''
        for i in range(len(plotOut)):
            self.plotFig(self.numIter, trainLossArr, self.learningRateArr,  title = plotOut[i],\
            plotLabel="learning rate", _num = i)
        '''
        
        print("train done")
        self.plotFig(self.numIter, trainLossArr, self.learningRateArr,  title = "test1",\
            plotLabel="learning rate")
        self.plotFig(self.numIter, mseLossArr, self.learningRateArr, title = "test2",\
           plotLabel = "learning rate")
    
        #test error and valid error
        validHat = sess.run(y_hat, feed_dict={X: validData, y_target: validTarget})
        testHat = sess.run(y_hat, feed_dict={X: testData, y_target: testTarget} )
    
    
        validError = sess.run(mseLoss, feed_dict={y_hat_mse: validHat, target_mse: validTarget}) 
        testError =  sess.run(mseLoss, feed_dict={y_hat_mse: testHat, target_mse: testTarget}) 

        print("valideRROR", validError, "testerror", testError)

class loadData:
    def __init__(self):
        self.flatten = True
        self.addOnes = False
        
        self.data_path = '/Users/vikuo/Documents/GitHub/ece521/assi/A1/data/data.npy'
        self.target_path = '/Users/vikuo/Documents/GitHub/ece521/assi/A1/data/target.npy'
    
    def arrFlatten(self, arr):
        '''
        type np array
        '''
        dataDim1, dum1, dum2 = arr.shape
        dum_sq = dum1 * dum2
        arr = np.reshape(arr, [ dataDim1 ,dum_sq ])
        return arr   
    def convertTarget(self, targetValues):
        numClasses = np.max(targetValues) + 1
        return np.eye(numClasses)[targetValues]
    
    def loadBinData(self):
    # import binary NOTMIST data set
        with np.load("notMNIST.npz") as data :
            Data, Target = data ["images"], data["labels"]
            posClass = 2
            negClass = 9
            dataIndx = (Target==posClass) + (Target==negClass)
            Data = Data[dataIndx]/255.
            Target = Target[dataIndx].reshape(-1, 1)
            Target[Target==posClass] = 1
            Target[Target==negClass] = 0
            
            if self.flatten:
                Data = self.arrFlatten(Data)
        
            if self.addOnes:
                Data = np.concatenate((np.ones((Data.shape[0], 1)),Data),axis=1)
        
        
            np.random.seed(521)
            randIndx = np.arange(len(Data))
            np.random.shuffle(randIndx)
            Data, Target = Data[randIndx], Target[randIndx]
            trainData, trainTarget = Data[:3500], Target[:3500]
            validData, validTarget = Data[3500:3600], Target[3500:3600]
            testData, testTarget = Data[3600:], Target[3600:]
        
        print("Data binary class Loaded")
        print("-------------------------------")
        return trainData, trainTarget,validData, validTarget, testData, testTarget
    
    
    def loadMultiData(self):
        with np.load("notMNIST.npz") as data:
            Data, Target = data ["images"], data["labels"]
            np.random.seed(521)
            randIndx = np.arange(len(Data))
            np.random.shuffle(randIndx)
            Data = Data[randIndx]/255.
            
            if self.flatten:
                Data = self.arrFlatten(Data)
            
            Target = Target[randIndx]
            trainData, trainTarget = Data[:15000], Target[:15000]
            validData, validTarget = Data[15000:16000], Target[15000:16000]
            testData, testTarget = Data[16000:], Target[16000:]
        
            trainTarget = self.convertTarget(trainTarget)
            validTarget = self.convertTarget(validTarget)
            testTarget = self.convertTarget(testTarget)
        return trainData, trainTarget, validData, validTarget, testData, testTarget
    
    
    def loadFaceData(self):
        task = 0
        # task = 0 >> select the name ID targets for face recognition task
        # task = 1 >> select the gender ID targets for gender recognition task data = np.load(data_path)/255
        data = np.load(self.data_path)/255
        data = np.reshape(data, [-1, 32*32])
        target = np.load(self.target_path)
        np.random.seed(45689)
        rnd_idx = np.arange(np.shape(data)[0])
        np.random.shuffle(rnd_idx)
        trBatch = int(0.8*len(rnd_idx))
        validBatch = int(0.1*len(rnd_idx))
 
        trainData, validData, testData = data[rnd_idx[1:trBatch],:], \
                                   data[rnd_idx[trBatch+1:trBatch + validBatch],:],\
                                   data[rnd_idx[trBatch + validBatch+1:-1],:]
        trainTarget, validTarget, testTarget = target[rnd_idx[1:trBatch], task], \
                              target[rnd_idx[trBatch+1:trBatch + validBatch], task],\
                              target[rnd_idx[trBatch + validBatch + 1:-1], task]
        #print("train data dim", trainData.shape, "valid data dim", validData.shape,
        #     "test data dim", testData.shape, "trainTarget shape", trainTarget.shape,
        #     "validTarget SHAPE", validTarget.shape, "testTarget shape", testTarget.shape)
        trainTarget = self.convertTarget(trainTarget)
        validTarget = self.convertTarget(validTarget)
        testTarget = self.convertTarget(testTarget)
        return trainData, trainTarget, validData, validTarget,testData, testTarget 
    
    
if __name__ == '__main__':
    dataLoader = loadData()
    trainData, trainTarget, validData, validTarget,testData, testTarget = dataLoader.loadBinData()
    
    #trainData, trainTarget, validData, validTarget,testData, testTarget = dataLoader.loadMultiData()
    #trainData, trainTarget, validData, validTarget,testData, testTarget  = dataLoader.loadFaceData()
    plotTitleArr = ["test"]

    #plotTitleArr = ["q2-3 Adam Opt lambda = 0 linear loss vs number of epoches"]
    linearClassifier = classification(_numClass = 1, _numPixel = 784, _regLambda = 0.001, _batchSize = 500, \
                 _learningRate = 0.001, _learningRateArr = [0.001], _numIter = 5000, _gd =1, _err=0, _qn=0)
    linearClassifier.runLogisticGraphPart1(trainData, trainTarget, validData, validTarget,testData, testTarget, plotTitleArr)
    #plotTitleArr = ["Q2-1 logistic loss vs number of epoches", "Q2-1 MSE loss vs number of epoches"]
    #logisticClassifier = classification(_numClass = 1, _numPixel = 784, _regLambda = 0.001, _batchSize = 500, \
    #             _learningRate = 0.001, _learningRateArr = [0.001], _numIter = 5000, _gd =1, _err=1, _qn=1)
    #logisticClassifier.runLogisticGraphPart1(trainData, trainTarget, validData, validTarget,testData, testTarget, plotTitleArr)

    #multiLogistic = classification(_numClass = 10,_numPixel = 784, _regLambda = 0.01, _batchSize = 500, \
    #            _learningRate = 0.001, _learningRateArr = [0.005, 0.001, 0.0001], _numIter = 5000, _gd =1, _err=1, _qn=1)
    #multiLogistic.runLogisticGraphPart1(trainData, trainTarget, validData, validTarget,\
    #                                    testData, testTarget, plotTitleArr)
    
    #faceLogistic = classification(_numClass = 6, _numPixel = 1024, _regLambda = 0.01, _batchSize = 300, \
    #      _learningRate = 0.001, _learningRateArr = [0.005, 0.001, 0.0001], _numIter = 5, _gd =1, _err=1, _qn=1)
    #faceLogistic.runLogisticGraphPart1(trainData, trainTarget, validData, validTarget,testData, testTarget, \
    #                                  plotTitleArr)
    
    

Data binary class Loaded
-------------------------------
y_hat (?, 1)
target (?, 1)
learningrate =  [0.001]
current entropy 1.05466389359
epoch  0.0
current entropy 0.106287197357
epoch  7.0
y_hat (?, 1)
target (?, 1)
learningrate =  0.001
current entropy 0.875107337451
epoch  0.0
current entropy 0.112776184714
epoch  7.0
train done
train done
$$$$$$$$$$$$$$$ in plot fig$$$$$$$$$$$$$$$$$
(2, 5000)
$$$$$$$$$$$$$$$ in plot fig$$$$$$$$$$$$$$$$$
(2, 5000)
valideRROR 0.143998445519 testerror 0.132588608208


<matplotlib.figure.Figure at 0x123de4940>

## TODO
- [ ] CANNOT reproduce multi minist valid/test errors
- [ ] plot accuracies
- [ ] organize the work flow (each question plot different things)
- [x] change sampler to epoch