In [1]:
import numpy as np
import activation_functions as AF
import wandb
from tqdm import tqdm
from keras.datasets import fashion_mnist

def get_act_by_name(name : str):
    if (name == 'linear'):
        return AF.Linear.value, AF.Linear.derivative
    elif (name == 'sigmoid'):
        return AF.Sigmoid.value, AF.Sigmoid.derivative 
    elif (name == 'tanh'):
        return AF.ReLu.value, AF.ReLu.derivative
    elif (name == 'relu'):
        return AF.TanH.value, AF.TanH.derivative
    else:
        raise Exception('NotImplemented'); exit(-1)

2023-02-12 17:44:08.659418: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-12 17:44:09.396697: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/vikram/miniconda3/envs/dlenv/lib/
2023-02-12 17:44:09.396786: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/vikram/miniconda3/envs/dlenv/lib/


In [5]:
# PATTERN MODE OF OPERATION - one example at a time
# USES ONLY SOFTMAX CURRENTLY
class ClassificationNeuralNetwork:

    def __init__(self, parameters : dict):
        self.no_of_hlayers = parameters['NUMBER_OF_HIDDEN_LAYERS']
        self.hlayer_sizes = parameters['HIDDEN_LAYER_SIZES']
        
        self.hlayer_types = parameters['HIDDEN_LAYER_ACTIVATIONS']
        self.outlayer_type = parameters['OUTPUT_LAYER_ACTIVATION']
        self.indim = parameters['INPUT_DIMENSION']
        self.outdim = parameters['NO_OF_CLASSES']
        self.eta = parameters['LEARNING_RATE']
        self.init_variables()

    # random small value init to ensure weights are in non-sat regions of tanh, sigmoid
    def init_variables(self):
        self.weights = dict()
        self.act_valuesderivs = dict()
        self.localgradients = dict()
        self.layer_outputs = dict()
        self.weight_changes = dict()
        self.hlayer_sizes[self.no_of_hlayers] = self.outdim
        self.hlayer_types[self.no_of_hlayers] = self.outlayer_type
        print(self.hlayer_sizes)
        for i in range(self.no_of_hlayers + 1):
            psize = self.hlayer_sizes[i-1] if i > 0 else self.indim
            self.weights[i] = np.random.randn(self.hlayer_sizes[i], psize + 1)
            self.act_valuesderivs[i] = np.zeros(self.hlayer_sizes[i])
            self.localgradients[i] = np.zeros(self.hlayer_sizes[i])
            self.layer_outputs[i] = np.zeros(self.hlayer_sizes[i])
            self.weight_changes[i] = np.zeros(self.hlayer_sizes[i])
            
    # weights matrix - neuron i's weights are in W[i, :] with bias as the first entry.
    def forward_one_layer(self, layeridx : int, input : np.array):
        f, df = get_act_by_name(self.hlayer_types[layeridx])
        input = np.concatenate(([1], input)) # for including bias
        act_values = np.matmul(self.weights[layeridx], input)
        output = np.array([f(a) for a in act_values])
        self.act_valuesderivs[layeridx] = np.array([df(a) for a in act_values]) # overwrite the act value derivates for this layer
        return output

    def safe_softmax(self, input : np.array):
        prob = np.copy(input)
        prob -= np.max(prob)
        prob = np.exp(prob) / (np.sum(prob))
        return prob

    # we store weight matrix, bias vector, activation function of each layer in a dict
    def forward(self, input : np.array):
        output = np.copy(input)
        self.layer_outputs[-1] = input
        for idx in range(self.no_of_hlayers + 1):
            output = self.forward_one_layer(idx, output)
            self.layer_outputs[idx] = np.copy(output)
        self.posterior_prob = self.safe_softmax(output)

    # backward prop on one **HIDDEN** layer
    def backward_one_layer(self, layer_idx):
        nl_weighted_deriv_sum = np.matmul((self.weights[layer_idx+1].T)[1:], self.localgradients[layer_idx+1])
        self.localgradients[layer_idx] = np.multiply(self.act_valuesderivs[layer_idx], nl_weighted_deriv_sum)
        prev_layer_output = np.concatenate(([1], self.layer_outputs[layer_idx - 1]))
        self.weight_changes[layer_idx] = self.eta * (np.outer(prev_layer_output, self.localgradients[layer_idx]).T)
    
    def compute_local_gradients_final_layer(self, target_label, posterior_prob):
        output = np.copy(posterior_prob)
        output = -output
        output[target_label] += 1.0
        return output

    def backward(self, target_label):
        # for output layer
        self.localgradients[self.no_of_hlayers] = self.compute_local_gradients_final_layer(target_label, self.posterior_prob)
        prev_layer_output = np.concatenate(([1], self.layer_outputs[self.no_of_hlayers - 1]))
        self.weight_changes[self.no_of_hlayers] = self.eta * (np.outer(prev_layer_output, self.localgradients[self.no_of_hlayers]).T)
        for idx in range(self.no_of_hlayers-1,-1,-1):
            self.backward_one_layer(idx)

        for idx in range(self.no_of_hlayers+1):
            self.weights[idx] += self.weight_changes[idx]
    
    def train(self, train_X, train_Y, epoches : int):
        for i in tqdm(range(epoches)):
            print(i)
            for (x, y) in zip(train_X, train_Y):
                self.forward(x)
                self.backward(y)

    def validate(self, test_X, test_Y):
        correct, sz = 0, test_Y.shape[0]
        loss = 0.0
        for i in range(sz):
            self.forward(test_X[i])
            pred = np.argmax(self.posterior_prob)
            if pred == test_Y[i]:
                correct += 1
            loss += (1.0/(i+1)) * (-np.log(self.posterior_prob[test_Y[i]]) - loss)
        print(f'accuracy = {correct/sz}; loss = {loss}')

In [3]:
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

train_count, test_count = X_train.shape[0], X_test.shape[0]
X_train = X_train.reshape((train_count, 784))
X_test = X_test.reshape((test_count, 784))

In [6]:
parameters = dict()
parameters['NUMBER_OF_HIDDEN_LAYERS'] = 2
parameters['HIDDEN_LAYER_SIZES'] = {0 : 32, 1 : 32}
parameters['HIDDEN_LAYER_ACTIVATIONS'] = {0 : 'tanh', 1 : 'tanh'}
parameters['OUTPUT_LAYER_ACTIVATION']  = 'tanh'
parameters['INPUT_DIMENSION'] = 784
parameters['LEARNING_RATE'] = 1e-5
parameters['NO_OF_CLASSES'] = 10
nn = ClassificationNeuralNetwork(parameters)
nn.train(X_train, y_train, 1000)

{0: 32, 1: 32, 2: 10}


  0%|          | 0/1000 [00:00<?, ?it/s]

0


  act_values = np.matmul(self.weights[layeridx], input)
  prob -= np.max(prob)
  0%|          | 1/1000 [00:42<11:49:40, 42.62s/it]

1


  0%|          | 1/1000 [00:56<15:42:09, 56.59s/it]


KeyboardInterrupt: 