**Tensor Deep Neural Network Framework**

In [18]:
import numpy as np

class Tensor(object):
    
    def __init__(self, data, creators=None, creation_op=None, autograd=False, id=None):
        self.data = np.array(data)
        self.creators = creators
        self.creation_op = creation_op
        self.grad = None
        self.autograd = autograd
        if(id == None):
            id = np.random.randint(0,100000)
        self.id = id
        self.children = {}
        if(creators is not None):
            for creator in creators:
                if self.id not in creator.children:
                    creator.children[self.id] = 1
                else:
                    creator.children[self.id] += 1    

    def backward(self, grad=None, grad_origin=None):
        if(self.autograd):
            if(grad_origin is not None):
                # if waiting to receive gradient, decrement counter
                if(self.children[grad_origin.id] != 0):
                    self.children[grad_origin.id] -= 1
                else:
                    raise Exception("Same child cannot backpropagate more than once!")

            # if this is the beginning of the backpropagtion chain
            if(grad is None):
                grad = Tensor(np.ones_like(self.data))

            # accumulate gradients from all the children 
            if(self.grad is None):
                self.grad = grad
            else:
                self.grad += grad    

            # backpropagate to creators if all gradients from children have been received or if gradients did not originate from another node
            if((self.creators is not None) and (self.received_grads_from_all_children() or (grad_origin is None))):
                if(self.creation_op == "add"):
                    new_grad = Tensor(self.grad.data)
                    self.creators[0].backward(new_grad, self)
                    self.creators[1].backward(new_grad, self)
                if(self.creation_op == "neg"):
                    new_grad = self.grad.__neg__()
                    self.creators[0].backward(new_grad, self)    
                if(self.creation_op == "sub"):
                    new_grad = Tensor(self.grad.data)
                    self.creators[0].backward(new_grad, self)
                    new_grad = self.grad.__neg__()
                    self.creators[1].backward(new_grad, self)    
                if(self.creation_op == "mul"):
                    new_grad = self.grad * self.creators[1]
                    self.creators[0].backward(new_grad, self)
                    new_grad = self.creators[0] * self.grad
                    self.creators[1].backward(new_grad, self)
                if(self.creation_op == "mm"):
                    new_grad = self.grad.mm(self.creators[1].transpose())
                    self.creators[0].backward(new_grad, self)
                    new_grad = (self.creators[0].transpose()).mm(self.grad)
                    self.creators[1].backward(new_grad, self)
                if(self.creation_op == "transpose"):
                    new_grad = self.grad.transpose()
                    self.creators[0].backward(new_grad, self)
                if(self.creation_op == "sigmoid"):
                    ones = Tensor(np.ones_like(self.grad.data))
                    # sigmoid derivative
                    new_grad = self.grad * (self * (ones - self))
                    self.creators[0].backward(new_grad, self)
                if(self.creation_op == "tanh"):
                    ones = Tensor(np.ones_like(self.grad.data))
                    # tanh derivative
                    new_grad = self.grad * (ones - self*self)
                    self.creators[0].backward(new_grad, self)
                if(self.creation_op == "relu"):
                    # relu derivative
                    new_grad = self.grad * (self.creators[0].data > 0)
                    self.creators[0].backward(new_grad, self)
                if("sum" in self.creation_op):
                    dim = int(self.creation_op.split("_")[1])
                    ds = self.creators[0].data.shape[dim]
                    self.creators[0].backward(self.grad.expand(dim,ds))
                if("expand" in self.creation_op):
                    dim = int(self.creation_op.split("_")[1])
                    self.creators[0].backward(self.grad.sum(dim))


    # check to see if this tensor has recieved gradients from all children, which is indicated by all children counts being zero
    def received_grads_from_all_children(self):
        for id,count in self.children.items():
            if (count != 0):
                return False
        return True     

    # Note: operations always return a new tensor object 

    # element-wise addition
    def __add__(self, other):
        # return a new tensor object containing the sum
        if(self.autograd and other.autograd):
            return Tensor(self.data + other.data, creators=[self,other], creation_op ="add", autograd=True)
        return Tensor(self.data + other.data)
    
    # element-wise negation
    def __neg__(self):
        # return a new tensor object containing the negation
        if(self.autograd):
            return Tensor(-1 * self.data, creators=[self], creation_op ="neg", autograd=True)
        return Tensor(-1 * self.data)

    # element-wise subtraction
    def __sub__(self, other):
        # return a new tensor object containing the subtraction
        if(self.autograd and other.autograd):
            return Tensor(self.data - other.data, creators=[self,other], creation_op ="sub", autograd=True)
        return Tensor(self.data - other.data)

    # element-wise multiplication
    def __mul__(self, other):
        # return a new tensor object containing the multiplication
        if(self.autograd and other.autograd):
            return Tensor(self.data * other.data, creators=[self,other], creation_op ="mul", autograd=True)
        return Tensor(self.data * other.data)
    
    # sum over all elements along given axis
    def sum(self, axis):
        # return a new tensor object containing the sum
        if(self.autograd):
            return Tensor(self.data.sum(axis), creators=[self], creation_op ="sum_"+str(axis), autograd=True)
        return Tensor(self.data.sum(axis))
    
    # expands the tensor along the given axis
    def expand(self, axis, copies):
        
        trans_cmd = list(range(0,len(self.data.shape)))
        trans_cmd.insert(axis, len(self.data.shape))
        
        new_shape = list(self.data.shape) + [copies]
        new_data = self.data.repeat(copies).reshape(new_shape)
        new_data = new_data.transpose(trans_cmd)
        
        if(self.autograd):
            return Tensor(new_data, autograd=True, creators=[self], creation_op="expand_"+str(axis))
        return Tensor(new_data)

    # transpose of matrix 
    def transpose(self):
        # return a new tensor object with the transposed tensor
        if(self.autograd):
            return Tensor(self.data.transpose(), creators=[self], creation_op ="transpose", autograd=True)
        return Tensor(self.data.transpose())

    # matrix multiplication
    def mm(self, other):
        # return a new tensor object containing the multiplication
        if(self.autograd and other.autograd):
            return Tensor(np.dot(self.data, other.data), creators=[self,other], creation_op ="mm", autograd=True)
        return Tensor(np.dot(self.data, other.data))

    def __str__(self):
        return str(self.data.__str__())
    
    def __repr__(self):
        return str(self.data.__repr__())

    # Non-linearity functions

    # sigmoid function
    def sigmoid(self):
        if(self.autograd):
            return Tensor(1.0 / (1.0 + np.exp(-self.data)), creators=[self], creation_op="sigmoid", autograd=True)
        return Tensor(1.0 / (1.0 + np.exp(self.data)))

    # tanh function
    def tanh(self):
        if(self.autograd):
            return Tensor(np.tanh(self.data), creators=[self], creation_op="sigmoid", autograd=True)
        return Tensor(np.tanh(self.data))
    
    # relu function
    def relu(self):
        if(self.autograd):
            return Tensor(self.data * (self.data > 0), creators=[self], creation_op="sigmoid", autograd=True)
        return Tensor(self.data * (self.data > 0))
    


# stochastic gradient descent optimizer    
class SGD_Optimizer(object):

    def __init__(self, parameters, alpha) -> None:
        self.parameters = parameters
        self.alpha = alpha    

    def zero(self):
        for p in self.parameters:
            p.grad.data *= 0

    def step(self, zero=True):
        for p in self.parameters:
            p.data -= self.alpha * p.grad.data

            if(zero):
                p.grad.data *= 0

# layer base class
class Layer(object):   
    def __init__(self) -> None:
        self.parameters = []

    def get_parameters(self):                     
        return self.parameters
    
# layer inherited classes
class Linear(Layer):
    def __init__(self, n_inputs, n_outputs) -> None:
        super().__init__()
        # initilize the weights
        W = np.random.randn(n_inputs, n_outputs) * np.sqrt(2.0/n_inputs)
        self.weight = Tensor(W, autograd=True)
        self.bias = Tensor(np.zeros(n_outputs), autograd=True)

        self.parameters.append(self.weight)
        self.parameters.append(self.bias)

    def forward(self, input):
        return input.mm(self.weight) + self.bias.expand(0,len(input.data))   

# sequential layer (layer of layers)
class Sequential(Layer):
    def __init__(self, layers = []) -> None:
        super().__init__()
        self.layers = layers

    def add(self, layer):
        self.layers.append(layer)

    def forward(self, input):
        for layer in self.layers:
            input = layer.forward(input)
        return input
    
    def get_parameters(self):
        params = []
        for layer in self.layers:
            params += layer.get_parameters()

        return params    
    
# means squared error loss function layer    
class MSELoss(Layer):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, pred, target):
        return ((pred-target) * (pred-target)).sum(0)




In [24]:
a = np.random.randn(3,3)
b = a*(a > 0)
c = 1-a
print(a)
print(b)
print(c)

[[-0.48456301  0.37588444  0.74804848]
 [-1.18845766  0.24361123 -2.0972406 ]
 [-0.44863063  0.47055959 -1.39857823]]
[[-0.          0.37588444  0.74804848]
 [-0.          0.24361123 -0.        ]
 [-0.          0.47055959 -0.        ]]
[[1.48456301 0.62411556 0.25195152]
 [2.18845766 0.75638877 3.0972406 ]
 [1.44863063 0.52944041 2.39857823]]


Example: Using the tensor object and autograd to traiun a simple two layer network

In [19]:
np.random.seed(1)
input_data = Tensor(np.array([[0,0], [0,1], [1,0], [1,1]]), autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True) 

input_neurons = input_data.data.shape[1]
hidden_neurons = 3
output_neurons = target.data.shape[1]

# initialize neural net layers
model = Sequential(layers=[Linear(input_neurons, hidden_neurons), Linear(hidden_neurons, output_neurons)])
loss_layer = MSELoss()

# initialize optimizer
optim = SGD_Optimizer(parameters=model.get_parameters(), alpha = 0.05) 

# training iterations
niters = 10
for iter in range(niters):

    # forward pass
    pred = model.forward(input_data)

    # compute loss
    loss = loss_layer.forward(pred, target)

    # backpropagation
    loss.backward()

    # optimization of weights
    optim.step()

    print(f"Iteration# {iter+1}, Loss: {loss}")


Iteration# 1, Loss: [12.2648028]
Iteration# 2, Loss: [9.54239642]
Iteration# 3, Loss: [0.65868523]
Iteration# 4, Loss: [0.44037858]
Iteration# 5, Loss: [0.30768909]
Iteration# 6, Loss: [0.2183136]
Iteration# 7, Loss: [0.15605316]
Iteration# 8, Loss: [0.11190144]
Iteration# 9, Loss: [0.08028195]
Iteration# 10, Loss: [0.05752987]


In [5]:
a = Tensor([1,2,3,4,5], autograd=True)
b = Tensor([2,2,2,2,2], autograd=True)
c = Tensor([3,3,3,3,3], autograd=True)
d = a + (-b)
e = (-b) + c
f = d + e

print(f"node(a), id: {a.id}, children: {a.children}, creators: {a.creators}")
print(f"node(b), id: {b.id}, children: {b.children}, creators: {b.creators}")
print(f"node(c), id: {c.id}, children: {c.children}, creators: {c.creators}")
print(f"node(d), id: {d.id}, children: {d.children}, creators: {d.creators}")
print(f"node(e), id: {e.id}, children: {e.children}, creators: {e.creators}")
print(f"node(f), id: {f.id}, children: {f.children}, creators: {f.creators}")

D = Tensor([1,1,1,1,1])
f.backward(grad = D)

print(f"f grad: {f.grad}")
print(f"e grad: {e.grad}")
print(f"d grad: {d.grad}")
print(f"c grad: {c.grad}")
print(f"b grad: {b.grad}")
print(f"a grad: {a.grad}")


node(a), id: 3605, children: {75608: 1}, creators: None
node(b), id: 68988, children: {96696: 1, 48336: 1}, creators: None
node(c), id: 21635, children: {77742: 1}, creators: None
node(d), id: 75608, children: {78390: 1}, creators: [array([1, 2, 3, 4, 5]), array([-2, -2, -2, -2, -2])]
node(e), id: 77742, children: {78390: 1}, creators: [array([-2, -2, -2, -2, -2]), array([3, 3, 3, 3, 3])]
node(f), id: 78390, children: {}, creators: [array([-1,  0,  1,  2,  3]), array([1, 1, 1, 1, 1])]
f grad: [1 1 1 1 1]
e grad: [1 1 1 1 1]
d grad: [1 1 1 1 1]
c grad: [1 1 1 1 1]
b grad: [-2 -2 -2 -2 -2]
a grad: [1 1 1 1 1]
