In [1]:
! pip install numpy
! pip install scipy
! pip install matplotlib
! pip install sklearn

Collecting sklearn
  Using cached sklearn-0.0.post12.tar.gz (2.6 kB)
  Preparing metadata (setup.py) ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py egg_info[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m [31m[15 lines of output][0m
  [31m   [0m The 'sklearn' PyPI package is deprecated, use 'scikit-learn'
  [31m   [0m rather than 'sklearn' for pip commands.
  [31m   [0m 
  [31m   [0m Here is how to fix this error in the main use cases:
  [31m   [0m - use 'pip install scikit-learn' rather than 'pip install sklearn'
  [31m   [0m - replace 'sklearn' by 'scikit-learn' in your pip requirements files
  [31m   [0m   (requirements.txt, setup.py, setup.cfg, Pipfile, etc ...)
  [31m   [0m - if the 'sklearn' package is used by one of your dependencies,
  [31m   [0m   it would be great if you take some time to track which package uses
  [31m   [0m   'sklearn' instead of 'scikit-le

In [2]:
import torch
import numpy as np
import math

In [3]:
# For loss function:
def Sigmoid(x):
    return 1 / (1 + np.exp(-x))

def Sigmoid_derivative(x):
        return x * (1 - x)

In [4]:
def tokenize(sentence: str):
    """
    In theory, tokenize(sentence) would also:
    This implementation does not take that into account yet.
    """
    return sentence.split()

In [6]:
# Functions

def activation(x):
    return np.tanh(x)

def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

def output(x):
    """Uses softmax."""
    return softmax(x)

In [7]:
class NeuralNetwork:

    """
    A neural network with one hidden layer.
    """

    def __init__(self, input_size, hidden_size, output_size,
                 input_weights=None, hidden_weights=None, 
                 input_bias=None, hidden_bias=None, output_bias=None):

        """
        Should store:
            - array of inputs H_{t}
            - array of weights W_{t} (going towards H_{t+1})
        """

        self.loss_value = 0

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Initialize with random weights.
        self.input_weights = input_weights if input_weights is not None else np.random.rand(self.input_size, self.hidden_size)
        self.hidden_weights = hidden_weights if hidden_weights is not None else np.random.rand(self.hidden_size,output_size)
        
        self.input_bias = input_bias #if input_bias is not None else np.random.rand(num_points, self.input_size)
        self.hidden_bias = hidden_bias #if hidden_bias is not None else np.random.rand(num_points, self.hidden_size)
        self.output_bias = output_bias #if output_bias is not None else np.random(num_points, self.output_size)

    def show_neural_network(self) -> None:
        print(self.input_weights)
        print(self.hidden_weights)

    def activate(self, x):
        return Sigmoid(x)
    
    def deriv_act(self, x):
        return Sigmoid_derivative(x)
    
    def loss(self, y_true, y_calc):
        return np.divide(np.sum(np.square(y_true - y_calc)), 2)

    def deriv_loss(self, y_true, y_calc):
        return y_calc - y_true

    def forward(self, data) -> None:
        # Update every bias.
        self.input_bias = data
        hidden_z = np.matmul(self.input_weights, self.input_bias)
        #print("hidden preac:\n", hidden_z)
        self.hidden_bias = self.activate(hidden_z)
        output_z = np.matmul(self.hidden_weights, self.hidden_bias)
        #print("output preac:\n", output_z)
        self.output_bias = self.activate(output_z)
        return self.output_bias
        
    def backward(self, y_true, learning_rate):

        # For T in T -> 1:
        # Derive d(L, Wi), d(L, H{i-1})

        #print(np.array([self.deriv_act(i) for i in self.output_bias])) #* self.deriv_loss(y_true, self.output_bias)
        #print(self.deriv_loss(y_true, self.output_bias))
        dsim_output = np.array([self.deriv_act(i) for i in self.output_bias]) * self.deriv_loss(y_true, self.output_bias)
        #print(dsim_output)
        #print(self.hidden_bias.shape)
        dsim_hidden_weights = np.matmul(dsim_output, np.transpose(self.hidden_bias))
        dsim_hidden_bias = np.matmul(np.transpose(self.hidden_weights), dsim_output)
        #print(dsim_hidden_weights)
        #print(dsim_hidden_bias)

        dsim_hidden = np.array([self.deriv_act(i) for i in self.hidden_bias]) * dsim_hidden_bias
        dsim_input_weights = np.matmul(dsim_hidden, np.transpose(self.input_bias))
        dsim_input_bias = np.matmul(np.transpose(self.input_weights), dsim_hidden)

        #print(dsim_input_weights)
        #print(dsim_input_bias)
        
        self.hidden_weights -= learning_rate * dsim_hidden_weights
        self.input_weights -= learning_rate * dsim_input_weights

        return 0
    

    def train_neural_network(self, train_x, train_y, learning_rate = 0.01, num_epochs=1):

        """
        Trains the neural network on a dataset.
        """
        result = 0
        for epochs in range(num_epochs):
            fore = self.forward(train_x)
            result = fore
            self.backward(learning_rate=learning_rate, y_true=train_y)
            self.loss_value = self.loss(fore, train_y)
            print(self.loss_value)

        return result


In [65]:
class RecurrentNNet:

    # Should also allow a bias

    def __init__(self, input_size, hidden_size, output_size, vector_length, 
                 bias_h=np.array([[-1.2],[0.8],[-0.8],[0.7]]),
                 bias_y=np.array([[1],[-1],[0.5]])):

        # h_0 -> h_t -> h_time
        self.time = vector_length
        self.h_init = np.array([[0.1],[0.1],[0.1],[0.1]])
        #np.random.randn(hidden_size, 1) * 0.001

        # Create nodes of input_size, (standard) hidden_size, output_size
        self.weight_x = np.array([[1,2,1,2,2],
                                  [2,1,4,3,-1],
                                  [3,3,5,1,0],
                                  [4,5,6,2,5]])
        # np.random.randn(hidden_size, input_size) # Gets passed through for nnet
        self.weight_y = np.array([[1,2,1,2],
                                       [2,1,4,3],
                                       [3,3,5,1]])
        #np.random.randn(hidden_size, hidden_size)
        self.weight_hidden = np.array([[0,1,4,5],
                                  [-1,2,3,4],
                                  [1,3,2,1],
                                  [2,4,1,2]])
        #np.random.randn(output_size, hidden_size) # Gets passed through for nnet
        
        self.bh = bias_h # np.zeros((hidden_size, 1))  # hidden bias
        self.by = bias_y # np.zeros((output_size, 1))  # output bias

    def forward(self, x_data, y_data):

        """
        x_data: [x_1, ..., x_t] where x_i is a token.
        y_data: [y_1, ..., y_t] where y_i is a tuple.
        """

        #assert(len(x_data[0]) == len(y_data[0]))
        
        self.hs = []
        self.ys = []
        self.d_ksi = []
        self.d_hidden = [0] * self.time

        loss = 0

        prev = None
        for i in range(self.time):
            if prev is None:
                prev = self.h_init

            #node.show()
            #print(x_data[i])
            #node.show()

            #print("prev", prev)
            #print("prev", prev.shape)
            #a = np.matmul(self.weight_hidden, prev)
            #print("a", a.shape)
            #print(node.weight_x.shape)
            #print(x_data[i].shape)
            #print(x_data[i])
            #print(self.weight_x)
            #b = np.matmul(self.weight_x, np.array([x_data[i]]).T)#+ self.bh
            #print("b", b.shape)

            z_t = np.matmul(self.weight_hidden, prev) +  np.matmul(self.weight_x, np.array([x_data[i]]).T) + self.bh

            #print(self.bh.shape)
            #print(z_t.shape)
            h_t = activation(z_t)

            #print(i)
            #print(h_t)
            #print(h_t.shape)

            ksi_t = np.matmul(self.weight_y, h_t) + self.by
            y_t = output(ksi_t)
            #print(y_t)


            self.hs.append(h_t)
            self.ys.append(y_t)

            #print(np.array([y_data[i]]).T.shape)
            #print(y_t.shape)
            d_ksi_t = y_t - np.array([y_data[i]]).T
            self.d_ksi.append(d_ksi_t)
            #print("shape", d_hidden_t.shape)

            prev = h_t

            loss -= np.dot(y_data[i], (ksi_t - math.log(sum([math.exp(ks) for ks in ksi_t]))))
            
        print(loss)
        print(self.d_ksi)


    def backward(self, x_data, y_data):
        d_hidden_t = np.matmul(self.weight_y.T, self.d_ksi[self.time-1])
        self.d_hidden.append(d_hidden_t)
        
        d_wy, d_by, d_wh, d_bh, d_wx = np.zeros((3, 4)), np.zeros((3, 1)), np.zeros((4, 4)), np.zeros((4, 1)), np.zeros((4, 5))
        for i in range(self.time - 1, 0, -1):
            jacobian = np.ones((4, 1)) - self.hs[i] ** 2 # Change hardcoded later
            self.d_hidden[i] = np.matmul(self.weight_hidden.T, np.multiply(self.d_hidden[i+1], jacobian)) + \
                np.matmul(self.weight_y.T, self.d_ksi[i])
            #print("ht", self.d_hidden[i])
            d_wy += np.matmul(self.d_ksi[i], self.d_hidden[i].T)
            
            d_by += self.d_ksi[i]

            #print(jacobian.shape, self.d_hidden[i].shape, self.h_init.shape)
            #print("result", np.multiply(jacobian, self.d_hidden[i]))

            if i == 0:
                a = np.matmul(np.multiply(jacobian, self.d_hidden[i]), self.h_init.T)
                #print(a.shape)
                d_wh += a
            else:
                b = np.matmul(np.multiply(jacobian, self.d_hidden[i]), self.hs[i-1].T)
                #print(b.shape)
                d_wh += b
                
            d_bh += np.multiply(jacobian, self.d_hidden[i])
            #print(np.multiply(jacobian, self.d_hidden[i]).shape)
            #print(np.array([x_data[i]]).shape)
            d_wx += np.matmul(np.multiply(jacobian, self.d_hidden[i]), np.array([x_data[i]]))

        print(d_wh)

               
        

In [66]:
temp = RecurrentNNet(5, 4, 3, 3)


x_data=np.array([[0.1, 0.2, 0.4],
        [0.2, 0.1, 0.6],
        [0.3, 0.5, 0.1],
        [0.1, 0.5, 0.5],
        [0.3, 0.4, 0.7]])
y_data=np.array(
    [[1,0,0],
        [0,1,0],
        [0,0,1]])
print(x_data.shape)

temp.forward(x_data=x_data.T,
             y_data=y_data.T)
temp.backward(x_data=x_data.T,
              y_data=y_data.T)


(5, 3)
[8.80888043]
[array([[-0.99470748],
       [ 0.03331418],
       [ 0.9613933 ]]), array([[ 0.0039513 ],
       [-0.97080359],
       [ 0.96685229]]), array([[ 0.0039513 ],
       [ 0.02919641],
       [-0.03314771]])]
[[ 4.00499518e-10  4.50977775e-10  4.45080052e-10  4.52479953e-10]
 [ 7.21827487e-11  8.51899845e-11  8.36702595e-11  8.55770656e-11]
 [ 7.56794754e-09  8.55087173e-09  8.43602991e-09  8.58012248e-09]
 [-1.98077898e-14 -2.22985697e-14 -2.20075547e-14 -2.23726926e-14]]
