In [19]:
"""
Follow the instructions provided in the writeup to completely
implement the class specifications for a basic MLP, optimizer, .
You will be able to test each section individually by submitting
to autolab after implementing what is required for that section
-- do not worry if some methods required are not implemented yet.

Notes:

The __call__ method is a special reserved method in
python that defines the behaviour of an object when it is
used as a function. For example, take the Linear activation
function whose implementation has been provided.

# >>> activation = Identity()
# >>> activation(3)
# 3
# >>> activation.forward(3)
# 3
"""

# Do not import any additional 3rd party external libraries as they will not
# be available to AutoLab and are not needed (or allowed)
import numpy as np
import os


class Activation(object):

    """
    Interface for activation functions (non-linearities).

    In all implementations, the state attribute must contain the result, i.e. the output of forward (it will be tested).
    """

    # No additional work is needed for this class, as it acts like an abstract base class for the others

    def __init__(self):
        self.state = None

    def __call__(self, x):
        return self.forward(x)

    def forward(self, x):
        raise NotImplemented

    def derivative(self):
        raise NotImplemented


class Identity(Activation):

    """
    Identity function (already implemented).
    """

    # This class is a gimme as it is already implemented for you as an example

    def __init__(self):
        super(Identity, self).__init__()

    def forward(self, x):
        self.state = x
        return x

    def derivative(self):
        return 1.0


class Sigmoid(Activation):

    """
    Sigmoid non-linearity
    """

    # Remember do not change the function signatures as those are needed to stay the same for AL

    def __init__(self):
        super(Sigmoid, self).__init__()

    def forward(self, x):

        # Might we need to store something before returning?
        sm = 1/(1+np.exp(-x))
        self.state = sm

        return sm

    def derivative(self):

        # Maybe something we need later in here...
        sm_d = self.state * (1-self.state)

        return sm_d


class Tanh(Activation):

    """
    Tanh non-linearity
    """

    # This one's all you!

    def __init__(self):
        super(Tanh, self).__init__()

    def forward(self, x):
        sm = (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
        self.state = sm

        return sm

    def derivative(self):
        
        return 1-self.state**2
    
    
class ReLU(Activation):

    """
    ReLU non-linearity
    """

    def __init__(self):
        super(ReLU, self).__init__()

    def forward(self, x):
        self.x = x
        a = np.copy(x)
        a[a<0]=0
        self.state = a
        return self.state

    def derivative(self):
        a = self.x
        a[a>=0]=1
        a[a<0]=0
        return a

# Ok now things get decidedly more interesting. The following Criterion class
# will be used again as the basis for a number of loss functions (which are in the
# form of classes so that they can be exchanged easily (it's how PyTorch and other
# ML libraries do it))


class Criterion(object):

    """
    Interface for loss functions.
    """

    # Nothing needs done to this class, it's used by the following Criterion classes

    def __init__(self):
        self.logits = None
        self.labels = None
        self.loss = None

    def __call__(self, x, y):
        return self.forward(x, y)

    def forward(self, x, y):
        raise NotImplemented

    def derivative(self):
        raise NotImplemented


class SoftmaxCrossEntropy(Criterion):

    """
    Softmax loss
    """

    def __init__(self):

        super(SoftmaxCrossEntropy, self).__init__()
        self.sm = None

    def forward(self, x, y):

        self.logits = x.astype(float)
        self.labels = y
        self.loss = np.zeros(len(x)).astype(float)
        
        
        #start my code
        

        for m in range(len(self.logits)):
            a = np.max(self.logits[m]) 
            e = 0
            for i in self.logits[m]:
                e += np.exp(i-a)
            a += np.log(e)
            deno = np.exp(a)
            aa = (np.exp(self.logits[m]))/deno
            self.logits[m][:] = aa
            loss = 0
            for b in range(len(aa)):
                if(self.labels[m][b]==1):
                    loss -= np.log(aa[b])
            self.loss[m] = loss
        self.state = self.logits
#         print(self.state)
        return self.loss

    def derivative(self):
#         print(self.logits)

        # self.sm might be useful here...

        return self.logits-self.labels

        
        
        
        

class BatchNorm(object):

    def __init__(self, fan_in, alpha=0.9):

        # You shouldn't need to edit anything in init

        self.alpha = alpha
        self.eps = 1e-8
        self.x = None
        self.norm = None
        self.out = None

        # The following attributes will be tested
        self.var = np.ones((1, fan_in))
        self.mean = np.zeros((1, fan_in))

        self.gamma = np.ones((1, fan_in))
        self.dgamma = np.zeros((1, fan_in))

        self.beta = np.zeros((1, fan_in))
        self.dbeta = np.zeros((1, fan_in))

        # inference parameters
        self.running_mean = np.zeros((1, fan_in))
        self.running_var = np.ones((1, fan_in))

    def __call__(self, x, eval=False):
        return self.forward(x, eval)

    def forward(self, x, eval=False):

        # if eval:
        #    # ???

        self.x = x

#         self.mean = 
        # self.var = # ???
        # self.norm = # ???
        # self.out = # ???

        # update running batch statistics
        # self.running_mean = # ???
        # self.running_var = # ???

        # ...

        raise NotImplemented

    def backward(self, delta):

        raise NotImplemented


# These are both easy one-liners, don't over-think them
def random_normal_weight_init(d0, d1):
    return  np.random.normal(size=(d0,d1))


def zeros_bias_init(d):
    return  np.random.normal(size=d)


class MLP(object):

    """
    A simple multilayer perceptron
    """

    def __init__(self, input_size, output_size, hiddens, activations, weight_init_fn, bias_init_fn, criterion, lr, momentum=0.0, num_bn_layers=0):

        # Don't change this -->
        self.train_mode = True
        self.num_bn_layers = num_bn_layers
        self.bn = num_bn_layers > 0
        self.nlayers = len(hiddens) + 1
        self.input_size = input_size
        self.output_size = output_size
        self.activations = activations
        self.criterion = criterion
        self.lr = lr
        self.momentum = momentum
        # <---------------------

        # Don't change the name of the following class attributes,
        # the autograder will check against these attributes. But you will need to change
        # the values in order to initialize them correctly

        
        
        self.W = []
        if len(hiddens) != 0 :
            self.W = None
            self.dW = None
            self.b = None
            self.db = None
        else:
            self.W = random_normal_weight_init(output_size,input_size)
            self.dW = random_normal_weight_init(output_size,input_size)
            self.b = zeros_bias_init(output_size)
            self.db = zeros_bias_init(output_size)
        # HINT: self.foo = [ bar(???) for ?? in ? ]

        # if batch norm, add batch norm parameters
        if self.bn:
            self.bn_layers = None

        # Feel free to add any other attributes useful to your implementation (input, output, ...)

    def forward(self, x):
        fwd = np.zeros((len(x),len(self.b)))
        if self.nlayers == 1 :
            count = 0
            for i in x:
                fwd[count] = (np.matmul(self.W ,i)+self.b) 
                count += 1
            
        
        return fwd

    def zero_grads(self):
        raise NotImplemented

    def step(self):
        raise NotImplemented

    def backward(self, labels):
        raise NotImplemented

    def __call__(self, x):
        return self.forward(x)

    def train(self):
        self.train_mode = True

    def eval(self):
        self.train_mode = False


def get_training_stats(mlp, dset, nepochs, batch_size):

    train, val, test = dset
    trainx, trainy = train
    valx, valy = val
    testx, testy = test

    idxs = np.arange(len(trainx))

    training_losses = []
    training_errors = []
    validation_losses = []
    validation_errors = []

    # Setup ...

    for e in range(nepochs):

        # Per epoch setup ...

        for b in range(0, len(trainx), batch_size):

            pass  # Remove this line when you start implementing this
            # Train ...

        for b in range(0, len(valx), batch_size):

            pass  # Remove this line when you start implementing this
            # Val ...

        # Accumulate data...

    # Cleanup ...

    for b in range(0, len(testx), batch_size):

        pass  # Remove this line when you start implementing this
        # Test ...

    # Return results ...

    # return (training_losses, training_errors, validation_losses, validation_errors)

    raise NotImplemented



In [20]:
t1 = np.array([[1,1,1],[2,1,3]])
t2 = np.array([[0,1,1],[1,1,1],[1,0,0],[0,1,0]])
b = np.array([1,2,3,4])

In [21]:
np.dot(t1[0],t2[0])

2

In [22]:
for i in t1:
    print(np.matmul(t2,i)+b)

[3 5 4 5]
[5 8 5 5]


In [23]:
mlp = MLP(3, 4,[], Identity(),None, None, SoftmaxCrossEntropy(), 0.008, momentum=0.9, num_bn_layers=0)

In [24]:
mlp.forward(t1)

[[-0.23894839  1.05466295  1.14479539]
 [ 1.27796734  1.0526529   0.5240967 ]
 [ 0.45230216  1.93907953 -0.07985998]
 [ 0.66221135 -0.14925807 -1.04239598]]
[-0.34483283 -1.4041975   0.45286166  0.45420925]


array([[ 1.61567712,  1.45051943,  2.76438337, -0.07523345],
       [ 3.66631951,  3.77668016,  3.05696556, -1.49781406]])

In [25]:
class MLP(object):

    """
    A simple multilayer perceptron
    """

    def __init__(self, input_size, output_size, hiddens, activations, weight_init_fn, bias_init_fn, criterion, lr, momentum=0.0, num_bn_layers=0):

        # Don't change this -->
        self.train_mode = True
        self.num_bn_layers = num_bn_layers
        self.bn = num_bn_layers > 0
        self.nlayers = len(hiddens) + 1
        self.input_size = input_size
        self.output_size = output_size
        self.activations = activations
        self.criterion = criterion
        self.lr = lr
        self.momentum = momentum
        # <---------------------

        # Don't change the name of the following class attributes,
        # the autograder will check against these attributes. But you will need to change
        # the values in order to initialize them correctly

        
        
        self.W = []
        self.W = random_normal_weight_init(output_size,input_size)
        self.dW = random_normal_weight_init(output_size,input_size)
        self.b = zeros_bias_init(output_size)
        self.db = zeros_bias_init(output_size)
#         self.W = np.zeros((output_size,input_size))
#         self.dW = np.zeros((output_size,input_size))
#         self.b = np.zeros(output_size)
#         self.db = np.zeros(output_size)
        # HINT: self.foo = [ bar(???) for ?? in ? ]

        # if batch norm, add batch norm parameters
        if self.bn:
            self.bn_layers = None

        # Feel free to add any other attributes useful to your implementation (input, output, ...)

    def forward(self, x):
        print(self.W)
        print(self.b)
        fwd = np.zeros((len(x),len(self.b)))
        if self.nlayers == 1 :
            count = 0
            for i in x:
                fwd[count] = (np.matmul(self.W ,i)+self.b) 
                count += 1  
        return fwd

    def zero_grads(self):
        self.dW = np.zeros((output_size,input_size))
        self.db = np.zeros(output_size)

    def step(self):
        raise NotImplemented

    def backward(self, labels):
        raise NotImplemented

    def __call__(self, x):
        return self.forward(x)

    def train(self):
        self.train_mode = True

    def eval(self):
        self.train_mode = False

In [26]:
t1 = np.array([[1,1,1],[2,1,3]])
t2 = np.array([[0,1,0],[1,0,0],[1,0,0],[0,1,0]])
b = np.array([1,2,3,4])

mlp = MLP(3, 4,[], Identity(),None, None, SoftmaxCrossEntropy(), 0.008, momentum=0.9, num_bn_layers=0)

mlp.forward(t1)

[[-0.44089364 -2.04987232  1.55329713]
 [-0.26374234  0.81856252 -1.70406089]
 [ 0.36002699 -0.60950001 -0.57045959]
 [ 0.9279269  -1.14135567  0.47059242]]
[-0.92877499  1.38675834 -0.84459488 -0.56107637]


array([[-1.86624381,  0.23751764, -1.6645275 , -0.30391272],
       [ 0.79945681, -3.43434648, -2.44541969,  1.565199  ]])

In [27]:
t1 = np.array([[1,1,1],[2,1,3]])
t2 = np.array([[0,0,1],[0,0,1]])


sme = SoftmaxCrossEntropy()
sme.forward(t1,t2)

array([1.09861229, 0.40760596])