In [4]:
import pandas as pd

In [5]:
data = pd.read_csv('org_data/mnist_test.csv',header=None,delimiter=',')
small_data = data.sample(n=1100)
mnist_train = small_data.iloc[0:1000]
mnist_test = small_data.iloc[1000:]

In [7]:
mnist_test = mnist_test.reset_index()
mnist_train = mnist_train.reset_index()

In [6]:
mnist_test.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
5780,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4099,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6374,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2445,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9511,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
mnist_train.to_csv('data/mnist_train.csv',header=False,index=False)

In [8]:
mnist_test.to_csv('data/mnist_test.csv',header=False,index=False)

In [1]:
import numpy as np

from models._base_network import _baseNetwork

from models.two_layer_nn import TwoLayerNet
import random
from models.sgd import SGD

from utils import load_mnist_trainval, load_mnist_test, generate_batched_data
from utils import train, evaluate

In [2]:
import matplotlib.pyplot as plt

In [2]:
class SoftmaxRegression(_baseNetwork):
    def __init__(self, input_size=28 * 28, num_classes=10):
        """
        A single layer softmax regression. The network is composed by:
        a linear layer without bias => (activation) => Softmax
        :param input_size: the input dimension
        :param num_classes: the number of classes in total
        """
        super().__init__(input_size, num_classes)
        self._weight_init()

    def _weight_init(self):
        '''
        initialize weights of the single layer regression network. No bias term included.
        :return: None; self.weights is filled based on method
        - W1: The weight matrix of the linear layer of shape (num_features, hidden_size)
        '''
        np.random.seed(1024)
        self.weights['W1'] = 0.001 * np.random.randn(self.input_size, self.num_classes)
        self.gradients['W1'] = np.zeros((self.input_size, self.num_classes))
                
    def one_hot(self,y):
        class_labels = [i for i in range(10)]
        one_hot = np.eye(self.num_classes)[np.vectorize(lambda c: class_labels[c])(y).reshape(-1)]
        for i in range(len(y)):
            one_hot[i] = one_hot[i] * y[i]
        return one_hot
    
    # def softmax(self, scores):
    #     f = np.exp(scores - np.max(scores))  # shift values
    #     return f / np.sum(f)

    def forward(self, X, y, mode='train'):
        """
        Compute loss and gradients using softmax with vectorization.

        :param X: a batch of image (N, 28x28)
        :param y: labels of images in the batch (N,)
        :return:
            loss: the loss associated with the batch
            accuracy: the accuracy of the batch
        """
        loss = None
        gradient = None
        accuracy = None
        #############################################################################
        # TODO:                                                                     #
        #    1) Implement the forward process and compute the Cross-Entropy loss    #
        #    2) Compute the gradient of the loss with respect to the weights        #
        # Hint:                                                                     #
        #   Store your intermediate outputs before ReLU for backwards               #
        #############################################################################

        # Z = X * W
        Z = np.matmul(X,self.weights['W1'])
        A = self.ReLU(Z)
        p = self.softmax(A)
        print("Label Shapes",p.shape,y.shape)
        accuracy = self.compute_accuracy(p,y)
        loss = self.cross_entropy_loss(p,y)
        
        


        if mode != 'train':
            return loss, accuracy

        #############################################################################
        # TODO:                                                                     #
        #    1) Implement the backward process:                                     #
        #        1) Compute gradients of each weight by chain rule                  #
        #        2) Store the gradients in self.gradients                           #
        #############################################################################
        
        error = p
        error[range(len(y)),y] -= 1
        
        
        dA = self.ReLU_dev(A) # derivative of ReLU activation with respect to Z
        dZ = 1/len(y)* error * dA 
        dW = np.dot(X.T, dZ)
        
        self.gradients['W1'] += dW # update gradients
        
        

        return loss, accuracy


model = SoftmaxRegression()
optimizer = SGD(learning_rate=0.1, reg=1e-3)
train_data, train_label, _, _ = load_mnist_trainval()
test_data, test_label = load_mnist_test()

batched_train_data, batched_train_label = generate_batched_data(train_data, train_label,
                                                                        batch_size=10, shuffle=True)


_, train_acc = train(1, batched_train_data, batched_train_label, model, optimizer, debug=False)



Loading training data...
Training data loaded with 100 images
Loading testing data...
Testing data loaded with 20 images
Label Shapes (10, 10) (10,)
Label Shapes (10, 10) (10,)
Label Shapes (10, 10) (10,)
Label Shapes (10, 10) (10,)
Label Shapes (10, 10) (10,)
Label Shapes (10, 10) (10,)
Label Shapes (10, 10) (10,)
Label Shapes (10, 10) (10,)


In [13]:
int(100/8)

12