# Class-based ANN with Tensorflow

```python
X, Y = getData()
model = ANN([2000, 1000. 500])
mode.fit(X, Y, show_fig=True)
```

We will create two classes:
* Hidden layer class
    * dimension of input and output 
    * weights and biases of this layer
    * forward function, calculates forward of this layer
    * (possibly) backpropagation function, calculates backpropagation of layer
* Neural Network class
    * a set of hidden layers
    * fit function to train the network
        * calculate forward through the whole network
        * calculate total loss function
        * optimize the total loss function w.r.t. parameters
    * make predictions on new data

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.utils import shuffle

In [None]:
def init_weight_and_bias(M_in, M_out):
    W = np.random.randn(M_in, M_out) / np.sqrt(M_in)
    b = np.zeros(M_out)
    return W, b

class HiddenLayer(object):
    def __init__(self, M_in, M_out, an_id):
        self.M_in = M_in
        self.M_out = M_out
        self.id = an_id
        
        W, b = init_weight_and_bias(M_in, K)
        self.W = tf.Variable(W.astype(np.float32))
        self.b = tf.Variable(b.astype(np.float32))
        self.params = [self.W, self.b]
        
    def forward(self, X):
        return tf.nn.relu(tf.add(tf.matmul(X, self.W) + self.b))
        
    

class ANN(object):
    def __init__(self, hidden_layer_sizes):
        self.hidden_layer_sizes = hidden_layer_sizes
    
    def fit(self, X, Y, learning_rate=10e-7, mu-0.99, decay-0.999, reg=10e-3, batch_sz=100, epochs=100):
        
        K = len(set(Y))
        
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = y2indicator(Y, K).astype(np.float32)
        
        Xvalid, Yvalid = X[-1000:0], Y[-1000:0]
        Yvalid_flat = np.argmax(Yvalid, axis=1)
        X, Y = X[0:-1000], Y[0:-1000]
        
        N, D = X.shape
            
        # initialize hidden layers
        M_in = D
        id_seq = 0
        self.hidden_layers = []
        for M_out in self.hidden_layer_sizes:
            h = HiddenLayer(M_in, M_out, id_seq)
            self.hidden_layers.append(h)
            M_in = M_out
            id_seq += 1
            
    
        W, b = init_weight_and_bias(M_in, K)
        self.W = tf.Variable(W.astype(np.float32))
        self.b = tf.Variable(W.astype(np.float32))
        self.params = [self.W, self.b]
        
        # collect parameters for regularization
        for h in self.hidden_layers:
            self.params += h.params
               
        # 
        X_P = tf.placeholder(tf.float32, shape=(None, D), name = 'X_P')
        T_P = tf.placeholder(tf.float32, shape=(None, K), name = 'T_P')
        
        logit = forward(X_P)
        
        prediction = predict(X_P)
        
        # Calculate regularization loss
        reg_loss = reg * sum([tf.nn.l2_loss(p) for p in self.params])
        
        # Calculate total loss
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=T_P)) + reg_loss
        
        # Create optimizer
        optimizer = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu)
        
        # Minimize the loss with the optimizer
        train_op = optimizer.minimize(loss)
        
        # To train the model within the Session
        costs = []
        init = tf.global_variables_initializer()
        with tf.Session with sess:
            sess.run(init)
            
            for itr in range(epochs):
                int t = 0
                for start_i in range(0, N, batch_sz):
                    t += 1
                    end_i = start_i + batch_sz
                    sess.run(train_op, feed_dict={X_P:X[start_i:end_i], Y_P:Y[start_i:end_i]})
                    if t % 20 == 0:
                        c = sess.run(loss, feed_dict={X_P:Xvalid, Y_P:Yvalid})
                        costs.append(c)
                        
                        p = sess.run(prediction, feed_dict={X_P:Xvalid})
                        e = error_rate(Yvalid_flat, p)
                        
                        print("itr:", itr, "batch:", t, "cost:", c, "error rate:", e)
                
            
    def forward(self, X):
        Z = X
        for h in self.hidden_layers:
            Z = h.forward(Z)
        return tf.matmul(Z, self.W) + self.b 

    def predict(self, X):
        logit = self.forward(X)
        return tf.argmax(logit, 1)


In [None]:
def getData(balance_ones=True):
    # images are 48x48 = 2304 size vectors
    Y = []
    X = []
    first = True
    for line in open('fer2013.csv'):
        if first:
            first = False
        else:
            row = line.split(',')
            Y.append(int(row[0]))
            X.append([int(p) for p in row[1].split()])

    X, Y = np.array(X) / 255.0, np.array(Y)

    if balance_ones:
        # balance the 1 class
        X0, Y0 = X[Y!=1, :], Y[Y!=1]
        X1 = X[Y==1, :]
        X1 = np.repeat(X1, 9, axis=0)
        X = np.vstack([X0, X1])
        Y = np.concatenate((Y0, [1]*len(X1)))

    return X, Y