In [11]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [12]:
#use temporarily until tf.train bug is fixed 
#violet's mini batch code
class BatchSampler(object):
    '''
    A (very) simple wrapper to randomly sample batches without replacement.
    '''
    
    def __init__(self, data, targets, batch_size):
        self.num_points = data.shape[0]
        self.features = data.shape[1]
        self.data = data
        self.targets = targets
        self.batch_size = batch_size
        self.indices = np.arange(self.num_points)

    def random_batch_indices(self, m=None):
        if m is None:
            indices = np.random.choice(self.indices, self.batch_size, replace=False)
        else:
            indices = np.random.choice(self.indices, m, replace=False)
        return indices 

    def get_batch(self, m=None):
        '''
        Get a random batch without replacement from the dataset.
        If m is given the batch will be of size m. 
        Otherwise will default to the class initialized value.
        '''
        indices = self.random_batch_indices(m)
        X_batch = np.take(self.data, indices, 0)
        y_batch = self.targets[indices]
        return X_batch, y_batch

In [13]:
#loading data
with np.load("notMNIST.npz") as data :
    Data, Target = data ["images"], data["labels"]
    posClass = 2
    negClass = 9
    dataIndx = (Target==posClass) + (Target==negClass)
    Data = Data[dataIndx]/255.
    Target = Target[dataIndx].reshape(-1, 1)
    Target[Target==posClass] = 1
    Target[Target==negClass] = 0
    np.random.seed(521)
    randIndx = np.arange(len(Data))
    np.random.shuffle(randIndx)
    Data, Target = Data[randIndx], Target[randIndx]
    trainData, trainTarget = Data[:3500], Target[:3500]
    validData, validTarget = Data[3500:3600], Target[3500:3600]
    testData, testTarget = Data[3600:], Target[3600:]

In [14]:
trainData = np.reshape(trainData, [trainData.shape[0], 28*28])
validData = np.reshape(validData, [validData.shape[0], 28*28])
testData = np.reshape(testData, [testData.shape[0], 28*28])

In [23]:
#hyperparameters

learning_rates = [0.005, 0.001, 0.0001]
# learning_rates = [0.001]
mini_batch = 500
reg_coeff = 0.01
train_iter = 5000
train_loss = []

for learning_rate in learning_rates:
    
    tf.reset_default_graph()
    train_loss_curr = []
    
    x = tf.placeholder(tf.float32, shape = [None, 784])
    y = tf.placeholder(tf.float32, shape = [None, 1])

    
    w = tf.Variable(tf.random_normal([784, 1], mean = 0, stddev = 0.1))
    
    b = tf.cast(tf.Variable(0), dtype = tf.float32)
    y_hat = tf.add(tf.matmul(x, w), b)
    yhat = tf.sigmoid(y_hat)

    cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = y, logits = yhat))
    
    regul = tf.multiply(reg_coeff, tf.reduce_mean(tf.square(w)))
    regul = tf.multiply(0.50, regul)
    cost = tf.add(cross_entropy, regul)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    
    for epoch in range(train_iter):
#         x_batch, y_batch = tf.train.batch([trainData, trainTarget], mini_batch)
        
        trainBatchSampler = BatchSampler(trainData, trainTarget, mini_batch)
        x_batch, y_batch = trainBatchSampler.get_batch()
        
        sess.run(optimizer, feed_dict = {x: x_batch, y:y_batch})
        w_curr, b_curr, c = sess.run([w, b, cost], feed_dict = {x: x_batch, y:y_batch})
        train_loss_curr.append(c)
        
        if epoch%500 is 0:
            print("epoch:", epoch/500, " -> curr loss:", c)
            
    train_loss.append(train_loss_curr)
    
print("done")

epoch: 0.0  -> curr loss: 0.767581
epoch: 1.0  -> curr loss: 0.559369
epoch: 2.0  -> curr loss: 0.523992
epoch: 3.0  -> curr loss: 0.539989
epoch: 4.0  -> curr loss: 0.514862
epoch: 5.0  -> curr loss: 0.50324
epoch: 6.0  -> curr loss: 0.508726
epoch: 7.0  -> curr loss: 0.521803
epoch: 8.0  -> curr loss: 0.520897
epoch: 9.0  -> curr loss: 0.51176
epoch: 0.0  -> curr loss: 0.666022
epoch: 1.0  -> curr loss: 0.590496
epoch: 2.0  -> curr loss: 0.55952
epoch: 3.0  -> curr loss: 0.546446
epoch: 4.0  -> curr loss: 0.549218
epoch: 5.0  -> curr loss: 0.552681
epoch: 6.0  -> curr loss: 0.544622
epoch: 7.0  -> curr loss: 0.568074
epoch: 8.0  -> curr loss: 0.532696
epoch: 9.0  -> curr loss: 0.536222
epoch: 0.0  -> curr loss: 0.756261
epoch: 1.0  -> curr loss: 0.731912
epoch: 2.0  -> curr loss: 0.713683
epoch: 3.0  -> curr loss: 0.709894
epoch: 4.0  -> curr loss: 0.698312
epoch: 5.0  -> curr loss: 0.676966
epoch: 6.0  -> curr loss: 0.664082
epoch: 7.0  -> curr loss: 0.660902
epoch: 8.0  -> curr los

In [24]:
plotx = np.linspace(0, train_iter, train_iter)
ploty = np.array(train_loss)

plt.figure(1)
for i in range(ploty.shape[0]):
    plt.plot(plotx, ploty[i])

plt.savefig("logistic_ques1.png")
plt.close()

In [25]:
#hyperparameters

learning_rates = 0.001
# learning_rates = [0.001]
mini_batch = 500
reg_coeff = 0.01
np.random.seed(2)
train_iter = 5000
train_loss = []


tf.reset_default_graph()
train_loss_curr = []
    
x = tf.placeholder(tf.float32, shape = [None, 784])
y = tf.placeholder(tf.float32, shape = [None, 1])
    
w = tf.Variable(tf.random_normal([784, 1], mean = 0, stddev = 0.1))
    
b = tf.cast(tf.Variable(0), dtype = tf.float32)
y_hat = tf.add(tf.matmul(x, w), b)
yhat = tf.sigmoid(y_hat)

cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels = y, logits = yhat)
cross_entropy = tf.reduce_mean(cross_entropy)
    
regul = tf.multiply(reg_coeff, tf.reduce_mean(tf.square(w)))
regul = tf.multiply(0.50, regul)
cost = tf.add(cross_entropy, regul)
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
    
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
    
for epoch in range(train_iter):
#     x_batch, y_batch = tf.train.batch([trainData, trainTarget], mini_batch)
        
    trainBatchSampler = BatchSampler(trainData, trainTarget, mini_batch)
    x_batch, y_batch = trainBatchSampler.get_batch()
        
    sess.run(optimizer, feed_dict = {x: x_batch, y:y_batch})
    w_curr, b_curr, c = sess.run([w, b, cost], feed_dict = {x: x_batch, y:y_batch})
    train_loss_curr.append(c)
        
    if epoch%500 is 0:
        print("epoch:", epoch/500, " -> curr loss:", c)
            
train_loss.append(train_loss_curr)
    
print("done")

epoch: 0.0  -> curr loss: 0.694329
epoch: 1.0  -> curr loss: 0.545058
epoch: 2.0  -> curr loss: 0.526127
epoch: 3.0  -> curr loss: 0.527051
epoch: 4.0  -> curr loss: 0.513873
epoch: 5.0  -> curr loss: 0.518628
epoch: 6.0  -> curr loss: 0.509437
epoch: 7.0  -> curr loss: 0.532809
epoch: 8.0  -> curr loss: 0.528536
epoch: 9.0  -> curr loss: 0.537141
done


In [26]:
plotx = np.linspace(0, train_iter, train_iter)
ploty = np.array(train_loss)

plt.figure(1)
for i in range(ploty.shape[0]):
    plt.plot(plotx, ploty[i])

plt.savefig("logistic_ques2.png")
plt.close()