In [1]:
import numpy as np
import gzip, pickle
import sys
import copy
from tqdm import tqdm

In [7]:
class ANN:
    
    
    def __init__(self, train, valid, test, batch_size, epochs, learning_rate, nr_labels, neurons_per_layer: list, activation, 
                 optimizer, regularization, lambd, dropout):
        
        self.batch_size = batch_size; self.epochs = epochs; self.etha = learning_rate; self.nr_labels = nr_labels
        self.neurons_per_layer = neurons_per_layer
        
        self.X_train = train[0]; self.Y_train = self.reshape_Y(train[1])
        self.X_valid = valid[0]; self.Y_valid = self.reshape_Y(valid[1])
        self.X_test = test[0];   self.Y_test = self.reshape_Y(test[1])
        self.activation = activation; self.optimizer = optimizer.lower()
        self.regularization = regularization; self.lambd = lambd
        
        if dropout is not None:
            for key in dropout.keys():
                dropout[key] = np.random.random_integers(low=0, high=self.neurons_per_layer[key]-1,
                                                         size=(int)(dropout[key] * self.neurons_per_layer[key]))
        self.dropout = dropout
        
        self.batches = self.split_in_batches(self.X_train, self.Y_train, self.batch_size)
        
        self.a = [None for _ in range(0, len(self.neurons_per_layer))]
        self.W = [np.random.normal(0, np.sqrt(1/self.neurons_per_layer[i]), (self.neurons_per_layer[i+1], self.neurons_per_layer[i]))
                  for i in range(0, len(self.neurons_per_layer) - 1)]
        self.W.insert(0, None)
        
        if self.optimizer == 'momentum':
            self.gamma, self.history = self.auxiliar_vars('momentum')
        elif self.optimizer == 'adagrad':
            self.epsilon, self.history = self.auxiliar_vars('adagrad')
        elif self.optimizer == 'rmsprop':
            self.epsilon, self.beta, self.history = self.auxiliar_vars('rmsprop')
    
    
    def auxiliar_vars(self, optimizer):
        history = [0 for _ in range(0, len(self.W))]
        epsilon = 0.01
        if optimizer == 'momentum':
            gamma = 0.9
            return (gamma, history)
        if optimizer == 'rmsprop':
            beta = 0.99
            return (epsilon, beta, history)
        return (epsilon, history)
    
    
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    
    def sigmoid_deriv(self, a):
        return np.multiply(a, (1 - a))
    
    
    def tanh(self, z):
        return (np.exp(z) - np.exp(-z)) / (np.exp(z) + np.exp(-z))
    
    
    def tanh_deriv(self, z):
        return 1 - self.tanh(z) ** 2
    
    
    def relu(self, z):
        return np.array([list(map(lambda elem : 0 if elem <= 0 else elem, line)) for line in z])
    
    
    def relu_deriv(self, a):
        return np.array([list(map(lambda elem: 0 if elem == 0 else 1, line)) for line in a])
    
    
    def softmax(self, z):
        return np.exp(z) / np.sum(np.exp(z), axis=0)
    
    
    def softmax_deriv(self, a):
        return np.multiply(a, 1 - a)
    
    
    def logistic_loss(self, last_layer, real_outputs):
        rez = 0 - np.multiply(real_outputs, np.log(last_layer)) + np.multiply((1 - real_outputs), np.log(1 - last_layer))
        return sum(np.sum(rez, axis=0))
    
    
    def logistic_loss_deriv(self, last_layer, real_outputs):
        return (last_layer - real_outputs) / np.multiply(last_layer, 1 - last_layer)
    
    
    def forward_propagation(self, X):
        self.a[0] = X
        for i in range(1, len(self.a) - 1):
            z = np.dot(self.W[i], self.a[i-1])
            if self.activation == 'sigmoid':
                self.a[i] = self.sigmoid(z)
            elif self.activation == 'tanh':
                self.a[i] = self.tanh(z)
            elif self.activation == 'relu':
                self.a[i] = self.relu(z)
        i += 1
        z = np.dot(self.W[i], self.a[i-1])
        self.a[i] = self.softmax(z)

        
    def back_propagation(self, real_outputs):
        W_before = copy.deepcopy(self.W)
        gradients = []
        error = (1 / self.batch_size) * (self.a[-1] - real_outputs)
        if self.regularization == 'L2':
            gradient = np.dot(error, self.a[-2].T) + self.lambd * (1 / self.batch_size) * self.W[-1]
        elif self.regularization == 'L1':
            gradient = np.dot(error, self.a[-2].T) + self.lambd * (1 / self.batch_size) * np.sign(self.W[-1])
        gradients.append(gradient)
        
        for i in range(len(self.a) - 2, 0, -1):
            if self.activation == 'sigmoid':
                error = np.multiply(np.dot(error.T, self.W[i+1]).T, self.sigmoid_deriv(self.a[i]))
            elif self.activation == 'tanh':
                error = np.multiply(np.dot(error.T, self.W[i+1]).T, self.tanh_deriv(self.a[i]))
            elif self.activation == 'relu':
                error = np.multiply(np.dot(error.T, self.W[i+1]).T, self.relu_deriv(self.a[i]))
            if self.regularization == 'L2':
                gradient = np.dot(error, self.a[i-1].T) + self.lambd * (1 / self.batch_size) * self.W[i]
            elif self.regularization == 'L1':
                gradient = np.dot(error, self.a[i-1].T) + self.lambd * (1 / self.batch_size) * np.sign(self.W[i])
            gradients.append(gradient)
        
        gradients = list(reversed(gradients))
        gradients.insert(0, None)
        
        for i in range(1, len(self.a)):
            if self.optimizer == 'momentum':
                self.history[i] = self.gamma * self.history[i] + self.etha * gradients[i]
                self.W[i] -= self.history[i]
            else:
                self.W[i] -=  (self.etha / np.sqrt(self.history[i] + self.epsilon)) * gradients[i]
                if self.optimizer == 'adagrad':
                    self.history[i] += gradients[i] ** 2
                elif self.optimizer == 'rmsprop':
                    self.history[i] = self.beta * self.history[i] + (1 - self.beta) * gradients[i] ** 2
        
        if self.dropout is not None:
            for i in range(1, len(self.a) - 1):
                if i in self.dropout.keys():
                    for neuron in self.dropout[i]:
                        self.W[i + 1][:, neuron] = copy.deepcopy(W_before[i + 1][:, neuron])
                        self.W[i][neuron, :] = copy.deepcopy(W_before[i][neuron, :])
        
        
    def fit(self):
        for epoch in range(0, self.epochs):
            for batch in self.batches:
                self.forward_propagation(batch[0])
                self.back_propagation(batch[1])
            self.accuracy(self.X_train, self.Y_train, "Train accuracy: ")
            self.accuracy(self.X_valid, self.Y_valid, "Valid accuracy: ")
    
    
    def accuracy(self, X, Y, msg):
        self.forward_propagation(X)
        real = np.argmax(Y, axis=0)
        predicted = np.argmax(self.a[-1], axis=0)
        nr = 0
        for i in range(0, real.size):
            if real[i] == predicted[i]:
                nr += 1
        print(msg + str(nr / X.shape[1]))
    
    
    def reshape_Y(self, vec):    
        new_Y = np.zeros((self.nr_labels, len(vec)))
        for i in range(0, len(vec)):
            new_Y[vec[i], i] = 1
        return new_Y
    
    
    def split_in_batches(self, X, Y, batch_size):
        batches = []
        for i in range(0, X.shape[1], batch_size):
            batches.append((X[:, i:i+batch_size], Y[:, i:i+batch_size]))
        return batches

    
if __name__ == "__main__":
    
    f = gzip.open(r'C:\Users\Tudor\Downloads\mnist.pkl.gz', 'rb')
    u = pickle._Unpickler(f)
    u.encoding = 'latin1'
    train, valid, test = u.load()
    f.close()

    var = ANN((train[0].T, train[1]), (valid[0].T, valid[1]), (test[0].T, test[1]), 
              batch_size=10, epochs=3, learning_rate=0.1, nr_labels=10, neurons_per_layer=[784,100,10], activation='relu',
              optimizer='adagrad', regularization='L2', lambd=0.01, dropout=None)
    var.fit()

Train accuracy: 0.94792
Valid accuracy: 0.9538
Train accuracy: 0.95738
Valid accuracy: 0.9605
Train accuracy: 0.96268
Valid accuracy: 0.9623


In [111]:
                 
class RNN_M2M:
    
    def __init__(self, sentences, neurons_per_hl, nr_hls, epochs, learning_rate):    
        self.sentences = sentences
        self.preprocess_train()
        self.neurons_per_hl = neurons_per_hl; self.nr_hls = nr_hls
        self.epochs = epochs; self.etha = learning_rate
        
        self.s = [None for _ in range(0, nr_hls + 1)]
        self.s[0] = np.zeros((neurons_per_hl, 1))
        self.o = [None for _ in range(0, nr_hls + 1)]
        
        self.U = np.random.normal(0, np.sqrt(1 / self.X_train[0].shape[0]), (neurons_per_hl, len(self.vocab)))
        self.W = np.random.normal(0, np.sqrt(1 / neurons_per_hl), (neurons_per_hl, neurons_per_hl))
        self.V = np.random.normal(0, np.sqrt(1 / neurons_per_hl), (len(self.vocab), neurons_per_hl))
        
        
    def preprocess_train(self):
        self.vocab = set()
        for sentence in self.sentences:
            self.vocab.update(sentence)
        self.vocab.add("")
        self.vocab = list(self.vocab)
        print(self.vocab)

        self.X_train = []
        self.Y_train = []
        for sentence in self.sentences:
            x = []
            for word in sentence:
                one_hot = [0 for _ in range(0, len(self.vocab))]
                one_hot[self.vocab.index(word)] = 1
                x.append(one_hot)
            self.X_train.append(np.array(x).T)
            one_hot = [0 for _ in range(0, len(self.vocab))]
            one_hot[self.vocab.index("")] = 1
            y = copy.deepcopy(x[1:])
            y.append(one_hot)
            self.Y_train.append(np.array(y).T)

            
    def tanh(self, z):
        return (np.exp(z) - np.exp(-z)) / (np.exp(z) + np.exp(-z))
    
    
    def tanh_deriv(self, a):
        return 1 - np.multiply(a, a)
    
    
    def softmax(self, z):
        return np.exp(z) / np.sum(np.exp(z), axis=0)
    
        
    def forward_propagation(self, X):
        for i in range(1, X.shape[1] + 1):
            z = np.dot(self.W, self.s[i - 1]) + np.dot(self.U, X[:, i - 1].reshape((len(X[:, i - 1]), 1)))
            self.s[i] = self.tanh(z)
            z = np.dot(self.V, self.s[i])
            self.o[i] = self.softmax(z)
    
    
    def back_propagation_TT(self, X, Y):
        gradient_U = 0; gradient_W = 0; gradient_V = 0
        next_horizontal = 0
        for i in range(len(self.o) - 1, 0, -1):
            error = self.o[i] - Y[:, i - 1].reshape(len(Y[:, i - 1]), 1)
            gradient_V += np.dot(error, self.s[i].T)
            error = np.multiply(np.dot(error.T, self.V).T + next_horizontal, self.tanh_deriv(self.s[i]))
            gradient_W += np.dot(error, self.s[i - 1].T)
            gradient_U += np.dot(error, X[:, i - 1].reshape(1, len(X[:, i - 1])))
            next_horizontal = np.dot(error.T, self.W).T
        self.V -= self.etha * gradient_V
        self.W -= self.etha * gradient_W
        self.U -= self.etha * gradient_U
        
        
    def fit(self):
        for epoch in range(0, self.epochs):
            for i in range(0, len(self.X_train)):
                self.forward_propagation(self.X_train[i])
                self.back_propagation_TT(self.X_train[i], self.Y_train[i])


    def test(self, word):
        one_hot = [0 for _ in range(0, len(self.vocab))]
        one_hot[self.vocab.index(word)] = 1
        one_hot = np.array(one_hot).reshape((len(one_hot), 1))
        for i in range(1, len(self.o)):
            z = np.dot(self.W, self.s[i - 1]) + np.dot(self.U, one_hot)
            self.s[i] = self.tanh(z)
            z = np.dot(self.V, self.s[i])
            self.o[i] = self.softmax(z)
            one_hot = copy.deepcopy(self.o[i])
    
        
        for i in range(1, len(self.o)):
            print(self.vocab[np.argmax(self.o[i])])
            print(self.o[i])

            
if __name__ == "__main__":
    
    sentences = [["i", "love", "you", "now"],
                 ["i", "hate", "you", "tomorrow"],
                 ["i", "beg", "you", "today"],
                 ["i", "call", "you", "alywas"]]

    var = RNN_M2M(sentences, 100, len(sentences[0]), 5, 0.1)
    var.fit()
    var.test('i')

['tomorrow', 'i', 'beg', 'you', 'call', '', 'now', 'today', 'hate', 'alywas', 'love']
call
[[0.0194261 ]
 [0.02425846]
 [0.22431047]
 [0.01155636]
 [0.303862  ]
 [0.02321062]
 [0.02357418]
 [0.02554114]
 [0.1783664 ]
 [0.01704568]
 [0.1488486 ]]
you
[[0.00306088]
 [0.00383662]
 [0.00206356]
 [0.97426685]
 [0.00259822]
 [0.00336771]
 [0.00325459]
 [0.00109777]
 [0.00187907]
 [0.00163794]
 [0.00293678]]
alywas
[[0.09942075]
 [0.01118215]
 [0.00845937]
 [0.00320009]
 [0.0094709 ]
 [0.00353116]
 [0.08172235]
 [0.25830025]
 [0.00797018]
 [0.50558486]
 [0.01115794]]

[[1.35767337e-04]
 [1.46856917e-03]
 [1.03758827e-03]
 [4.02995365e-04]
 [1.32667765e-03]
 [9.90168893e-01]
 [9.63206430e-04]
 [1.44433448e-03]
 [1.33633194e-03]
 [5.16427063e-04]
 [1.19920968e-03]]


In [399]:
import numpy
class RNN_M2M:
    
    def __init__(self, data, seq_length, neurons_in_hl, etha):
        
        self.data = data; self.vocab = list(set(data)); self.vocab_size = len(self.vocab); self.seq_length = seq_length
        self.neurons_in_hl = neurons_in_hl; self.learning_rate = etha
        
        self.s = [None for i in range(0, self.seq_length + 1)]
        self.s[0] = np.reshape(np.array([0 for i in range(0, self.neurons_in_hl)]), (self.neurons_in_hl, 1))
        self.x = [np.array([None for i in range(0, self.vocab_size)]).reshape(self.vocab_size, 1) for j in range(0, self.seq_length)]
        self.x.insert(0, None)
        self.y = [np.array([None for i in range(0, self.vocab_size)]).reshape(self.vocab_size, 1) for j in range(0, self.seq_length)]
        self.y.insert(0, None)
        self.a = [np.array([0 for i in range(0, self.vocab_size)]).reshape(self.vocab_size, 1) for j in range(0, self.seq_length)]
        self.a.insert(0, None)
        
        self.W = np.random.normal(0, np.sqrt(1/self.neurons_in_hl), (self.s[0].shape[0], self.s[0].shape[0]))
        self.U = np.random.normal(0, np.sqrt(1/self.vocab_size), (self.s[0].shape[0], self.vocab_size))
        self.V = np.random.normal(0, np.sqrt(1/self.neurons_in_hl), (self.vocab_size, self.s[0].shape[0]))
        
        self.input = []; self.target = []
        for poz in range(0, len(self.data), self.seq_length):
            if poz + self.seq_length + 1 < len(self.data):
                self.input.append([self.vocab.index(self.data[i]) for i in range(poz, poz + self.seq_length)])
                self.target.append([self.vocab.index(self.data[i]) for i in range(poz + 1, poz + self.seq_length + 1)])
     
    
    def tanh(self, z):
        return (np.exp(z) - np.exp(-z)) / (np.exp(z) + np.exp(-z))
    
    
    def relu(self, z: np.array):
        return np.array([list(map(lambda e: max(e, 0.01 * e), z[i])) for i in range(0, len(z))])
    
    
    def deriv_relu(self, z):
        return np.array([list(map(lambda e: 0 if e<0 else 1, elem)) for elem in z])
    
    
    def deriv_tanh(self, s):
        return 1 - np.multiply(self.tanh(s), self.tanh(s))
    
    
    def softmax(self, z):
        return np.exp(z) / np.sum(np.exp(z), axis = 0) 
    
    
    def BPTT(self, chars, target):
        for i in range(0, len(chars)):
            self.x[i + 1] = np.zeros((self.vocab_size, 1))
            self.x[i + 1][chars[i]] = 1
            self.y[i + 1] = np.zeros((self.vocab_size, 1))
            self.y[i + 1][target[i]] = 1

        for i in range(1, self.seq_length + 1):
            z = np.dot(self.W, self.s[i-1]) + np.dot(self.U, self.x[i])
            self.s[i] = self.relu(z)
            z = np.dot(self.V, self.s[i])
            self.a[i] = self.softmax(z)

        gradient_W = np.zeros(self.W.shape)
        gradient_U = np.zeros(self.U.shape)
        gradient_V = np.zeros(self.V.shape)
        gradientS_next = np.zeros(self.s[0].shape)

        for i in range(self.seq_length, 0, -1):
            error = self.a[i] - self.y[i]
            gradient_V += np.dot(error, self.s[i].T)
            new_error = np.multiply((np.dot(self.V.T, error) + gradientS_next), self.deriv_relu(self.s[i]))
            gradient_W += np.dot(new_error, self.s[i-1].T)
            gradient_U += np.dot(new_error, self.x[i].T)
            gradientS_next = np.dot(self.W.T, new_error)
            
        for dparam in [gradient_V, gradient_W, gradient_U, ]:
            np.clip(dparam, -5, 5, out=dparam)              
            
        self.W -= self.learning_rate * gradient_W
        self.V -= self.learning_rate * gradient_V
        self.U -= self.learning_rate * gradient_U

        self.s[0] = self.s[self.seq_length]

    
    def train_algo(self):
        for i in range(0, len(self.input)):
            self.BPTT(self.input[i], self.target[i])
            
    def test(self, text):
        for i in range(0, len(text)):
            self.x[i + 1] = np.zeros((self.vocab_size, 1))
            self.x[i + 1][self.vocab.index(text[i])] = 1
        for i in range(1, self.seq_length + 1):
            z = np.dot(self.W, self.s[i-1]) + np.dot(self.U, self.x[i])
            self.s[i] = self.relu(z)
            z = np.dot(self.V, self.s[i])
            self.a[i] = self.softmax(z)
        #print(self.a[self.seq_length])
        return self.vocab[int(np.argmax(self.a[-1], axis = 0))]
        
if __name__ == "__main__":
#     f = open(r"C:\Users\Tudor\Desktop\DocForRNN.txt", "r").read()
    f = "hello   hello   hello   hello   hello   hello   hello   hello   hello   hello   hello   hello   hello   hello   hello"
    d = {"h":0, "e":0, "l":0, "o": 0, " ": 0}
    for _ in range(100):
        var = RNN_M2M(data=f, seq_length=4, neurons_in_hl=10, etha=0.05)
        var.train_algo()
        d[var.test("hell")] += 1
    print(d)

{'h': 4, 'e': 3, 'l': 20, 'o': 60, ' ': 13}


In [1]:
import numpy as np
import gensim
import copy

In [61]:

class RNN_M2M:
    def __init__(self, vocab, neurons_per_hl, hls, learning_rate, epochs, X, Y):
        self.vocab = vocab; self.neurons_per_hl = neurons_per_hl; self.hls = hls
        self.etha = learning_rate; self.epochs = epochs; self.X = X; self.Y = Y
        
        self.o = [np.zeros((len(self.vocab), 1)) for _ in range(0, self.hls + 1)]
        self.o[0] = None
        
        self.s = [np.zeros((self.neurons_per_hl, 1)) for _ in range(0, self.hls + 1)]
        
        self.W = np.random.normal(0, np.sqrt(1 / (self.neurons_per_hl - 1)), (self.neurons_per_hl, self.neurons_per_hl))
        self.U = np.random.normal(0, np.sqrt(1 / (len(self.vocab) - 1)), (self.neurons_per_hl, len(self.vocab)))
        self.V = np.random.normal(0, np.sqrt(1 / (self.neurons_per_hl - 1)), (len(self.vocab), self.neurons_per_hl))
    
    def relu(self, z):
        return np.array([list(map(lambda e: max(e, 0.01 * e), z[i])) for i in range(0, len(z))])
    
    def relu_deriv(self, z):
        return np.array([list(map(lambda e: 0 if e < 0 else 1, elem)) for elem in z])
    
    def softmax(self, z):
        return np.exp(z) / np.sum(np.exp(z), axis = 0) 
        
    def forward_propagation(self, X):
        for i in range(1, len(X)):
            z1 = np.dot(self.U, X[i]) + np.dot(self.W, self.s[i - 1])
            self.s[i] = self.relu(z1)
            z2 = np.dot(self.V, self.s[i])
            self.o[i] = self.softmax(z2)
    
    def back_propagation(self, X, Y):
        gradient_W = 0; gradient_U = 0; gradient_V = 0
        gradient_next = np.zeros((self.neurons_per_hl, 1))
        for i in range(len(X) - 1, 0, -1):
            error = (self.o[i] - Y[i])
            gradient_V += np.dot(error, self.s[i].T)
            next_error = np.multiply((np.dot(self.V.T, error) + gradient_next), self.relu_deriv(self.s[i]))
            gradient_W += np.dot(next_error, self.s[i - 1].T)
            gradient_U += np.dot(next_error, X[i].T)
            gradient_next = np.dot(self.W.T, next_error) 
        
        self.V -= self.etha * gradient_V
        self.W -= self.etha * gradient_W
        self.U -= self.etha * gradient_U  
        
#         self.s[0] = copy.deepcopy(self.s[-1])

            
    def fit(self):
        for epoch in range(0, self.epochs):
            for i in range(0, len(self.X)):
                self.forward_propagation(self.X[i])
                self.back_propagation(self.X[i], self.Y[i])
        
    def predict(self):
        test = ("i", "envy", "you", "and", "hate")
        x = []
        x.insert(0, None)
        for word in test:
            one_hot_vec = np.zeros((len(self.vocab), 1))
            one_hot_vec[self.vocab.index(word)] = 1
            x.append(one_hot_vec)
        #------------------------------
        self.forward_propagation(x)
        #------------------------------
        for j in range(1, len(test) + 1):
            aux_l =[]
            for elem in self.o[j].tolist():
                aux_l.extend(elem)
            print("after", test[j - 1], "comes:", self.vocab[np.random.choice([_ for _ in range(len(self.vocab))], p=aux_l)])
        
        
        # Give the first word only, generate a sentence
        test = "i"
        one_hot_vec = np.zeros((len(self.vocab), 1))
        one_hot_vec[self.vocab.index(test)] = 1
        for j in range(1, len(self.X[0])):
            z = np.dot(self.U, one_hot_vec) + np.dot(self.W, self.s[j - 1])
            act = self.relu(z)
            z = np.dot(self.V, act)
            act = self.softmax(z)
            aux_l = []
            for elem in act:
                aux_l.extend(elem)
            print(self.vocab[np.random.choice([_ for _ in range(0, len(self.vocab))], p=aux_l)])
            one_hot_vec = np.zeros((len(self.vocab), 1))
            one_hot_vec[np.random.choice([_ for _ in range(0, len(self.vocab))], p=aux_l)] = 1

        
        
if __name__ == "__main__":
    
    
    sentences = [("i", "love", "you", "and", "think", "her"),
                 ("i", "hate", "you", "and", "miss", "her"), 
                 ("i", "lose", "you", "and", "trust", "him"),
                 ("i", "envy", "you", "and", "hate", "him"),
                 ("i", "kill", "you", "and", "miss", "them"),
                 ("i", "trust", "you", "and", "think", "her")]
                
    
    vocab = set()
    for sentence in sentences:
        vocab.update(sentence)
    vocab = list(vocab)
    vocab.append("")
    print(vocab)
          
    X = []; Y = []
    for sentence in sentences:
        #----- construct X -----#
        x = []
        x.insert(0, None)
        for word in sentence:
            one_hot_vec = np.zeros((len(vocab), 1))
            one_hot_vec[vocab.index(word)] = 1
            x.append(one_hot_vec)
        X.append(x)
        #----- construct Y now -----#
        y = []
        y.insert(0, None)
        for i in range(0, len(sentence) - 1):
            one_hot_vec = np.zeros((len(vocab), 1))
            one_hot_vec[vocab.index(sentence[i + 1])] = 1
            y.append(one_hot_vec)
        one_hot_vec = np.zeros((len(vocab), 1))
        one_hot_vec[vocab.index("")] = 1
        y.append(one_hot_vec)
        Y.append(y)
        
    model = RNN_M2M(vocab=vocab, neurons_per_hl=10, hls=len(sentences[0]), learning_rate=0.05, epochs=30, X=X, Y=Y)
    model.fit()
    model.predict()


['you', 'him', 'miss', 'them', 'lose', 'think', 'envy', 'love', 'i', 'and', 'hate', 'trust', 'kill', 'her', '']
after i comes: envy
after envy comes: you
after you comes: and
after and comes: think
after hate comes: them
hate
you
and
miss
them



In [51]:

class RNN_M2O:
    
    def __init__(self, nr_hls, neurons_per_layer, learning_rate, epochs, vocab, X, Y):
        self.nr_hls = nr_hls; self.neurons_per_layer = neurons_per_layer; self.etha = learning_rate; self.epochs = epochs;
        self.vocab = vocab; self.X = X; self.Y = Y
        
        self.s = [np.zeros((self.neurons_per_layer, 1)) for _ in range(0, self.nr_hls + 1)]
        self.o = None
        
        self.W = np.random.normal(0, np.sqrt(1 / (self.neurons_per_layer - 1)), (self.neurons_per_layer, self.neurons_per_layer))
        self.U = np.random.normal(0, np.sqrt(1 / (len(self.vocab) - 1)), (self.neurons_per_layer, len(self.vocab)))
        self.V = np.random.normal(0, np.sqrt(1 / (self.neurons_per_layer - 1)), (2, self.neurons_per_layer))
        
    def relu(self, z):
        return np.array([np.array(list(map(lambda e : max(0, e), line))) for line in z])
    
    def relu_deriv(self, z):
        return  np.array([np.array(list(map(lambda e : 1 if e >=0 else 0.01 * e, line))) for line in z])
    
    def softmax(self, z):
        return np.exp(z) / np.sum(np.exp(z), axis=0)
    
    def forward_propagation(self, X):
        for i in range(1, len(X)):
            z = np.dot(self.W, self.s[i - 1]) + np.dot(self.U, X[i])
            self.s[i] = self.relu(z)
        z = np.dot(self.V, self.s[i])
        self.o = self.softmax(z)
    
    def back_propagation(self, X, Y):
        error = self.o - Y
        self.V -= self.etha * np.dot(error, self.s[-1].T)
        
        gradient_W = 0; gradient_U = 0
        
        error = np.multiply(np.dot(self.V.T, error), self.relu_deriv(self.s[-1]))
        gradient_W += np.dot(error, self.s[-2].T)
        gradient_U += np.dot(error, X[-1].T)

        for i in range(len(X) - 2, 0, -1):
            error = np.multiply(np.dot(self.W.T, error), self.relu_deriv(self.s[i]))
            gradient_W += np.dot(error, self.s[i - 1].T)
            gradient_U += np.dot(error, X[i].T)
        
        self.W -= self.etha * gradient_W
        self.U -= self.etha * gradient_U
        
#         self.s[0] = copy.deepcopy(self.s[-1])
        
    def fit(self):
        for epoch in range(0, self.epochs):
            for i in range(0, len(self.X)):
                self.forward_propagation(self.X[i])
                self.back_propagation(self.X[i], self.Y[i])
    
    def predict(self):
        sentence = ("beautiful", "love", "nice")
        x = []
        x.insert(0, None)
        for word in sentence:
            one_hot_vec = np.zeros((len(self.vocab), 1))
            one_hot_vec[self.vocab.index(word)] = 1
            x.append(one_hot_vec)
                
        self.forward_propagation(x)
        print(self.o)
                
                
                
if __name__ == "__main__":
    
    sentences = [("beautiful", "nice", "love"),
                 ("nice", "love", "nice"),
                 ("love", "love", "love"),
                 ("ugly", "hate", "ugly"),
                 ("hate", "hate", "hate")]
    
    Y = [np.array((1, 0)).reshape((2,1)), 
         np.array((1, 0)).reshape((2,1)), 
         np.array((1, 0)).reshape((2,1)),
         np.array((0, 1)).reshape((2,1)), 
         np.array((0, 1)).reshape((2,1))]
    
    vocab = set()
    for sentence in sentences:
        vocab.update(sentence)
    vocab = list(vocab)
    print("Vocabulary is:", vocab)
          
    X = []
    for sentence in sentences:
        #----- construct X -----#
        x = []
        x.insert(0, None)
        for word in sentence:
            one_hot_vec = np.zeros((len(vocab), 1))
            one_hot_vec[vocab.index(word)] = 1
            x.append(one_hot_vec)
        X.append(x)
    
    obj = RNN_M2O(nr_hls=len(sentences[0]), neurons_per_layer=5, learning_rate=0.1, epochs=30, vocab=vocab, 
                  X=X, Y=Y)
    obj.fit()
    obj.predict()

Vocabulary is: ['beautiful', 'nice', 'love', 'ugly', 'hate']
[[0.98538404]
 [0.01461596]]
