In [40]:
!pip install emoji



In [41]:
import os, sys 
from google.colab import drive 
drive.mount('/content/mnt')


Drive already mounted at /content/mnt; to attempt to forcibly remount, call drive.mount("/content/mnt", force_remount=True).


In [42]:
#data loader
import csv
import numpy as np
import emoji
import pandas as pd

def read_glove_vecs(glove_file):
    with open(glove_file, 'r', encoding="utf8") as f:
        words = set()
        word_to_vec_map = {}
        for line in f:
            line = line.strip().split()
            curr_word = line[0]
            words.add(curr_word)
            word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)
        
        i = 1
        words_to_index = {}
        index_to_words = {}
        for w in sorted(words):
            words_to_index[w] = i
            index_to_words[i] = w
            i = i + 1
    return words_to_index, index_to_words, word_to_vec_map


def read_csv(filename = '/content/mnt/My Drive/ugrp/emojify_data.csv'):
    phrase = []
    emoji = []

    with open (filename) as csvDataFile:
        csvReader = csv.reader(csvDataFile)

        for row in csvReader:
            phrase.append(row[0])
            emoji.append(row[1])

    X = np.asarray(phrase)
    Y = np.asarray(emoji, dtype=int)

    return X, Y


emoji_dictionary = {"0": "\u2764\uFE0F",    # :heart: prints a black instead of red heart depending on the font
                    "1": ":baseball:",
                    "2": ":smile:",
                    "3": ":disappointed:",
                    "4": ":fork_and_knife:"}

def label_to_emoji(label):
    """
    Converts a label (int or string) into the corresponding emoji code (string) ready to be printed
    """
    return emoji.emojize(emoji_dictionary[str(label)], use_aliases=True)
              
    
def print_predictions(X, pred):
    print()
    for i in range(X.shape[0]):
        print(X[i], label_to_emoji(int(pred[i])))

words_to_index, index_to_words, word_to_vec_map = read_glove_vecs('/content/mnt/My Drive/ugrp/glove.6B.50d.txt')
X, Y = read_csv()
label_to_emoji(Y[0])

'🍴'

In [43]:
def to_one_hot(Y):
    b = np.zeros((Y.size, Y.max()+1),dtype='i')
    b[np.arange(Y.size),Y] = 1
    return b

print(to_one_hot(Y)[1])
print(Y[1])

[0 0 0 1 0]
3


In [44]:
X_train_seq, Y_train = read_csv('/content/mnt/My Drive/ugrp/train_emoji.csv')
X_test_seq, Y_test = read_csv('/content/mnt/My Drive/ugrp/test_emoji.csv')
# X_test, Y_test = read_csv('data/tesss.csv')
print('X_train: %i, Y_train: %i, X_test: %i, Y_test: %i' % (len(X_train_seq),len(Y_train),len(X_test_seq),len(Y_test)))
Max_len = len(max(X_train_seq, key=len).split())
print(Max_len)

Y_train = to_one_hot(Y_train)
Y_test = to_one_hot(Y_test)
def sent2idx(X, words_to_index, max_len):
    global word_to_vec_map
    X_idx = np.zeros((X.shape[0] ,max_len),dtype='i')
    for i in range(X.shape[0]):                              
        sents =[wrd.lower() for wrd in X[i].split(" ") if wrd.lower() in word_to_vec_map]
        for wrd in range(len(sents)):
            X_idx[i, wrd] = words_to_index[sents[wrd]]
    return X_idx

X_train = sent2idx(X_train_seq,words_to_index,len(max(X_train_seq, key=len).split()))
X_test = sent2idx(X_test_seq,words_to_index,len(max(X_test_seq, key=len).split()))
print('X:train: ',X_train)

import random
def split_validation(X_train, n=0.2):
  #X_train = random.shuffle(X_train)
  num = int(X_train.shape[0] * n)
  print(num)
  return X_train[num:], X_train[:num], Y_train[num:], Y_train[:num] #train, vallidation

X_train1, X_train_valid, Y_train1, Y_train_valid = split_validation(X_train,n=0.2)
print(X_train1.shape, X_train_valid.shape)
print(Y_train1.shape, Y_train_valid.shape)

X_train: 132, Y_train: 132, X_test: 56, Y_test: 56
10
X:train:  [[259914 352214 360915 ...      0      0      0]
 [185457  52943 293982 ...      0      0      0]
 [193716 192973 357266 ... 222138      0      0]
 ...
 [386307 192973 390470 ...      0      0      0]
 [185457 226278 394475 ...      0      0      0]
 [166369 198213      0 ...      0      0      0]]
26
(106, 10) (26, 10)
(106, 5) (26, 5)


In [45]:
class SGD:
  def __init__(self, lr = 0.1):
    self.lr = lr
  def update(self, weights, gradients):
    for w in range(len(weights)):
      weights[w] -= self.lr * gradients[w]
    return weights[w]
    '''for w in weights:
      weights[w] -= self.lr * gradients[w]
    return weights[w]'''

In [46]:
#referrence: https://github.com/WegraLee/deep-learning-from-scratch-2/blob/master/common/util.py
def make_emb(emb_dim):   
    global word_to_vec_map, words_to_index 
    vocab = len(words_to_index) + 1
    emb = np.zeros((vocab, emb_dim))
    for wrd, idx in words_to_index.items():
        emb[idx, :] = word_to_vec_map[wrd]    
    return emb


def revise_grad(grads, max_grad=0.25):
    #print('clip_before: ',grads)
    summing = 0
    for grad in grads:
        summing += np.sum(grad ** 2)
    rev = max_grad / (np.sqrt(summing) + 1e-6) 
    if rev < 1:#max_grad보다 gradient가 더 크면
        for grad in grads:
            grad *= rev
    #print('clip_after: ',grads)
    return grads

def collect_parameters(weights, grads):
    weights, grads = weights[:], grads[:]
    while True:
        k = False
        for i in range(0, len(weights) - 1):
            for j in range(i + 1, len(weights)):
                if weights[i].ndim == 2 and weights[j].ndim == 2:
                  if weights[j].shape==weights[i].T.shape and np.all(weights[i].T == weights[j]):
                    grads[i] += grads[j].T
                    k = True
                    weights.pop(j)
                    grads.pop(j)
                if k: break
            if k: break
        if not k: break

    return weights, grads


In [47]:
#Reference: https://github.com/WegraLee/deep-learning-from-scratch-2/blob/master/common/time_layers.py
import numpy as np
class vanilla_RNN_unit:
    def __init__(self, Wx, Wh, b):
        self.Wx = Wx
        self.Wh = Wh
        self.b = b #tuple로 하면 값 update 불가

        self.weights = [Wx, Wh, b]
        self.grads = [np.zeros_like(Wx), np.zeros_like(Wh), np.zeros_like(b)]
        self.x, self.h_inp, self.h_oup = None,None,None

    def forward(self, x,h_inp):
        #print('vanilla_RNN_u forward')
        #print('h_inp: ',h_inp.shape,'self.Wh: ',self.Wh.shape,'self.Wx: ',self.Wx.shape)
        #print('x: ',x.shape, 'self.b: ',self.b.shape)
        '''h_inp:  (10, 128) self.Wh:  (128, 128) self.Wx:  (50, 128)
            x:  (10, 50) self.b:  (128,)'''
        h_oup = np.dot(h_inp, self.weights[1]) + np.dot(x, self.weights[0]) + self.weights[2]
        h_oup = np.tanh(h_oup)
        self.x =x
        self.h_inp = h_inp
        self.h_oup = h_oup
        return h_oup

    def backward(self, dh_oup):
        doutp = dh_oup * (1 - self.h_oup ** 2)
        db = np.sum(doutp, axis=0)
        dWh = np.dot(self.h_inp.T, doutp)
        dh_inp = np.dot(doutp, self.weights[1].T)
        dWx = np.dot(self.x.T, doutp)
        dx = np.dot(doutp, self.weights[0].T)
        self.grads[0][...] = dWx
        self.grads[1][...] = dWh
        self.grads[2][...] = db

        return dx, dh_inp


class vanilla_RNN:
    def __init__(self, Wx, Wh, b, connect=False, out_seq = True):
        self.weights = [Wx, Wh, b]
        self.grads = [np.zeros_like(Wx), np.zeros_like(Wh), np.zeros_like(b)]
        self.out_seq = out_seq
        self.h, self.dh,self.hs,self.hidden, self.layers = None, None, None, None, None
        self.connect = connect

    def forward(self, batch_x):
        self.layers = []
        hs = np.empty((batch_x.shape[0], batch_x.shape[1], self.weights[0].shape[1]), dtype='f')
        if not self.connect or self.h is None:
            self.h = np.zeros((batch_x.shape[0], self.weights[0].shape[1]), dtype='f')
        for t in range(batch_x.shape[1]):
            layer = vanilla_RNN_unit(self.weights[0],self.weights[1],self.weights[2])
            self.h = layer.forward(batch_x[:, t, :], self.h)
            hs[:, t, :] = self.h
            self.layers.append(layer)
        self.hidden = hs
        if self.out_seq:
          return hs
        else:
          self.hs = hs
          return hs[:,-1,:]

    def backward(self, dhs):
        dxs = np.empty((self.hidden.shape[0], self.hidden.shape[1], self.weights[0].shape[0]), dtype='f')
        dh = 0
        grads = [0, 0, 0]
        if not self.out_seq:
          #print('LSTM out_seq=False backward')
          dh = dhs+dh
          for t in reversed(range(self.hidden.shape[1])):
            layer = self.layers[t]
            dx, dh = layer.backward(dh)
            dxs[:, t, :] = dx
            for i, grad in enumerate(layer.grads):
                grads[i] += grad
        else:
          for t in reversed(range(self.hidden.shape[1])):
              layer = self.layers[t]
              dx, dh = layer.backward(dhs[:, t, :] + dh)
              dxs[:, t, :] = dx
              for i, grad in enumerate(layer.grads):
                  grads[i] += grad
        for i, grad in enumerate(grads):
            self.grads[i][...] = grad
        self.dh = dh

        return dxs

class LSTM_unit:
    def __init__(self, Wx, Wh, b):
        self.weights = [Wx, Wh, b] #tuple로 하면 값 update X
        self.grads = [np.zeros_like(Wx), np.zeros_like(Wh), np.zeros_like(b)]
        self.x, self.h_inp, self.c_inp, self.i, self.f, self.g, self.o, self.c_out = None, None, None, None, None, None, None, None

    def forward(self, x, h, c):
        #print('LSTM forward')
        #print('input: ',x.shape,'h_prev: ',h_prev.shape,'c_prev: ',c_prev.shape)
        num_batch, hidden = h.shape
        #print(h_prev.shape, Wx.shape,Wh.shape,b.shape) #(50, 128) (128, 128) (128,)
        lstm_ = np.dot(x, self.weights[0]) + np.dot(h, self.weights[1]) + self.weights[2]
        f = lstm_[:, :hidden]
        g = lstm_[:, hidden:2*hidden]
        i = lstm_[:, 2*hidden:3*hidden]
        o = lstm_[:, 3*hidden:]
        #print('f,g,i,o: ',f,g,i,o)

        f = 1 / (1 + np.exp(-f))
        g = np.tanh(g)
        i = 1 / (1 + np.exp(-i))
        o = 1 / (1 + np.exp(-o))
        #print('f: ',f.shape,'c_prev: ',c_prev.shape,'g: ',g,'i: ',i)
        '''input x:  (21, 50) h_prev:  (21, 128) c_prev:  (21, 128)
            A:  (21, 128)
            f:  (21, 128) c_prev:  (21, 128) g:  (21, 0) i:  (21, 0)'''
        c_out = f * c + g * i
        h_out = o * np.tanh(c_out)
        self.x = x
        self.h_inp = h
        self.c_inp = c
        self.i = i
        self.f = f
        self.g = g
        self.o = o
        self.c_out = c_out
        return h_out, c_out

    def backward(self, dh, dc):
        #print('LSTM backward')
        x, h_inp, c_inp, i, f, g, o, c_out = self.x, self.h_inp, self.c_inp, self.i, self.f, self.g, self.o, self.c_out
        ds = dc + (dh * o) * (1 - np.tanh(c_out) ** 2)
        #print('ds',ds)
        dc_inp = ds * f
        #print('dc_prev: ',dc_prev)
        di = ds * g
        df = ds * c_inp
        do = dh * np.tanh(c_out)
        dg = ds * i
        #print('di,df,do,dg: ',di,df,do,dg)
        di *= i * (1 - i)
        df *= f * (1 - f)
        do *= o * (1 - o)
        dg *= (1 - g ** 2)
        dlstm_back = np.hstack((df, dg, di, do))
        dWh = np.dot(h_inp.T, dlstm_back)
        dWx = np.dot(x.T, dlstm_back)
        db = dlstm_back.sum(axis=0)
        self.grads[0][...] = dWx
        self.grads[1][...] = dWh
        self.grads[2][...] = db
        dx = np.dot(dlstm_back, self.weights[0].T)
        dh_inp = np.dot(dlstm_back, self.weights[1].T)

        return dx, dh_inp, dc_inp


class LSTM:
    def __init__(self, Wx, Wh, b, connect=False, out_seq = True):
        #print('LSTM init')
        self.weights = [Wx, Wh, b]
        self.grads = [np.zeros_like(Wx), np.zeros_like(Wh), np.zeros_like(b)]
        self.h, self.c,self.dh,self.hidden,self.layers = None, None,None,None,None
        self.connect = connect
        self.out_seq = out_seq

    def forward(self, batch_x):
        #print('LSTM forward')
        #print('LSTM, N,T,D: ',N,T,D)
        #print('self.h: ',self.h)
        hs = np.empty((batch_x.shape[0], batch_x.shape[1],self.weights[1].shape[0]), dtype='f')
        if not self.connect or self.h is None:
            #print('not stateful에 들어옴')
            self.h = np.zeros((batch_x.shape[0], self.weights[1].shape[0]), dtype='f')
        if not self.connect or self.c is None:
            self.c = np.zeros((batch_x.shape[0], self.weights[1].shape[0]), dtype='f')
        #print('Wx,Wh,b,N,T,D,H,self.h: ',Wx.shape,Wh.shape,b.shape,N,T,D,H,self.h.shape)
        self.layers = []
        for t in range(batch_x.shape[1]):
            layer = LSTM_unit(*self.weights)
            self.h, self.c = layer.forward(batch_x[:, t, :], self.h, self.c)
            hs[:, t, :] = self.h
            self.layers.append(layer)
        self.hidden = hs
        if self.out_seq: #True
          #print('out: ',hs.shape)
          return hs
        #print('out: ',hs[:,-1,:].shape)
        return hs[:,-1,:]

    def backward(self, dhs):
        #print('LSTM backward')
        dxs = np.empty((self.hidden.shape[0], self.hidden.shape[1], self.weights[0].shape[0]), dtype='f')
        grads = [0, 0, 0]
        dh, dc = 0, 0
        if not self.out_seq:
          #print('LSTM out_seq=False backward')
          dh = dhs+dh
          for t in reversed(range(self.hidden.shape[1])):
            layer = self.layers[t]
            dx, dh, dc = layer.backward(dh, dc)
            dxs[:, t, :] = dx
            for i, grad in enumerate(layer.grads):
                grads[i] += grad
        else:
          for t in reversed(range(self.hidden.shape[1])):
              layer = self.layers[t]
              dx, dh, dc = layer.backward(dhs[:, t, :] + dh, dc)
              dxs[:, t, :] = dx
              for i, grad in enumerate(layer.grads):
                  grads[i] += grad
        for i, grad in enumerate(grads):
            self.grads[i][...] = grad
        self.dh = dh
        #print('out: ',dxs.shape)
        #print('LSTM out: ',dxs)
        return dxs
class Dropout:
    def __init__(self, dropout_ratio=0.5):
        #print('TimeDropout init')
        self.weights, self.grads = [], []
        self.d = dropout_ratio
        self.m = None
        self.train = True

    def forward(self, batch_x):
        #print('TimeDropout forward')
        if self.train:
            self.m = (np.random.rand(*batch_x.shape) > self.d).astype(np.float32) *(1 / (1.0 - self.d))
            #print('out: ',(xs*self.m).shape)
            return batch_x * self.m
        #print('out: ',batch_x.shape)
        return batch_x

    def backward(self, back):
        #print('out: ',(dout*self.m).shape)
        return back * self.m

In [48]:
'''class Linear: 
  def __init__(self, w, b):
    self.w = w
    self.b = b
    self.dw = 0
    self.db = 0
    self.x = 0
    self.input_shape = None

  def forward(self, x):
    #print('Linear.forward')
    #print('Linear.forward.x: ',x.shape)
    #print('Linear.forward.w: ',self.W.shape)
    self.input_shape = x.shape
    self.x = x.reshape(x.shape[0], -1)
    #print('after: ',x.shape)
    #print('self.b: ',self.b.shape)

    return np.dot(self.x, self.w) + self.b

  def backward(self, back):
    #print('Linear.backward')
    self.dw = np.dot(self.x.T, back)
    self.db = np.sum(back, axis=0)
    return np.dot(back, self.w.T).reshape(*self.input_shape)'''

class Linear:
  def __init__(self, W, b):
    #print('Linear init')        
    self.x = None
    self.original_x_shape = None
    self.dW = None
    self.db = None
    self.weights = [W, b]
    self.grads=[np.zeros_like(self.weights[0]),np.zeros_like(self.weights[1])]
    self.original_x_shape = None

  def forward(self, x):
    #print('Linear.forward')
    #print('Linear.forward.x: ',x.shape)
    #print('Linear.forward.w: ',self.weights[0].shape)
    self.original_x_shape = x.shape
    x = x.reshape(x.shape[0], -1)
    #print('after: ',x.shape)
    #print('self.b: ',self.b.shape)
    self.x = x

    out = np.dot(self.x, self.weights[0]) + self.weights[1]
    #print('output: ',out.shape)
    return out

  def backward(self, dout):
    #print('Linear.backward')
    iis = np.isnan(dout)
    if True in iis:
      print('Linear Nan here')
    dx = np.dot(dout, self.weights[0].T)
    self.dW = np.dot(self.x.T, dout)
    self.db = np.sum(dout, axis=0)
    #print('self.grads: ',self.grads)
    self.grads[0][...] = self.dW
    self.grads[1][...] = self.db
    dx = dx.reshape(*self.original_x_shape)
    #print('output: ',dx.shape)
    return dx

def softmax(x):
  #print('_softmax')
  '''#if x.ndim == 2:
  x = x.T
  x = x - np.max(x, axis=0)
  y = np.exp(x) / np.sum(np.exp(x), axis=0)
  return y.T'''
  if x.ndim == 2:
    x = x.T
    x = x - np.max(x, axis=0)
    y = np.exp(x) / np.sum(np.exp(x), axis=0)
    return y.T 

  x = x - np.max(x) # 오버플로 대책
  return np.exp(x) / np.sum(np.exp(x))
def cross_entropy_error(p,y):
  #cross_entropy_error.p:  (100, 40, 13, 10)
  #cross_entropy_error.y:  (100, 10)
  #print('cross_entropy_error')
  #print('cross_entropy_error.y: ',y.shape)
  if p.ndim == 1:
      y = y.reshape(1, y.size)
      p = p.reshape(1, p.size)
  #print('y: ',y)
  if y.size == p.size:
      y = y.argmax(axis=1) #dim ==1
  #print('y: ',y)
             
  batch_size = p.shape[0]
  #print('cross_entropy_error.p: ',p.shape)
  #print('cross_entropy_error.y: ',y.shape)
  #print('p[np.arange(batch_size), y]: ',p[np.arange(batch_size), y])
  return -np.sum(np.log(p[np.arange(batch_size), y] + 1e-7)) / batch_size


class Softmax_Cross_Entropy_Error:
  def __init__(self):
    self.loss = 0
    self.p = 0
    self.y = 0

  def forward(self, p, y):
    #print('Softmax_Cross_Entropy_Error_forward')
    self.y = y
    self.p = softmax(p)
    loss = cross_entropy_error(self.p, self.y)
    #print('out: ',loss)
    self.loss = loss
    return loss

  def backward(self, back = 1):
    #print('Softmax_Cross_Entropy_Error_backward')
    batch_size = self.y.shape[0]
    return (self.p - self.y) / batch_size


class Embedding_unit:
    def __init__(self, W):
      #print('Embedding_unit init')
      self.weights = [W] #(vocab_size, emb_size)
      self.grads =  [np.zeros_like(W)]
      self.wrd_idx = None

    def forward(self, wrd_idx):
      #print('Embedding_unit forward')
      self.wrd_idx = wrd_idx
      out = self.weights[0][wrd_idx]
      return out
      '''#print('Embedding_unit forward')
      #print('word_idx: ',word_idx)
        #print('weight: ',self.weight)
      l = word_idx.shape
      word_vec = self.weights[0][word_idx]
      #print('word_vec: ',word_vec.shape)
      #assert (word_vec.shape == (self.weight.shape[1],l))
      self.word_idx = word_idx
      #print('word_idx: ',word_idx)
      return word_vec
      #out = self.weight[word_idx]
      #self.word_idx = word_idx'''

    def backward(self, dback):
      #print('Embedding_unit backward')
      #print('dback: ',dback)
      self.grads[0][...] = 0
      #print('dw: ',dW.shape)
      #print('word_idx: ',self.word_idx.shape)
      #print('dback: ',dback.shape)
      #print('db: ',dback)
      np.add.at(self.grads[0], self.wrd_idx, dback) #np.add.at(A, idx, B)는 B를 A의 idx 번째 행에 더하는 연산
      #print('emb_unit_grads: ',self.grads)
      return None

class Embedding:
    def __init__(self, vocab_size, emb_size,emb_pre = True):
        #print('Embedding init')
        global word_to_vec_map, words_to_index
        if emb_pre is True:
          self.emb_w = make_emb(emb_size)
        else:
          self.emb_w = (0.01* np.random.randn(vocab_size, emb_size)).astype('f')
        self.weights = [self.emb_w]
        self.grads = [np.zeros_like(self.emb_w)]
        self.W = self.emb_w
        self.layers = None

    def forward(self, batch_x):
        #print('Embedding forward')
        #print('N: ',N,'T: ',T,'D: ',D) #N:  6 T:  10 D:  50
        out = np.empty((batch_x.shape[0], batch_x.shape[1], self.W.shape[1]), dtype='f')
        self.layers = []
        for t in range(batch_x.shape[1]):
            layer = Embedding_unit(self.W)
            out[:,t, :] = layer.forward(batch_x[:,t])
            self.layers.append(layer)
        #print('out: ',out.shape)
        return out
    def backward(self, back):
        #print('Embedding backward')
        iis = np.isnan(back)
        if True in iis:
          print('Embedding_Nan here')
        grad = 0
        for t in range(back.shape[1]):
            layer = self.layers[t]
            layer.backward(back[:, t, :])
            grad += layer.grads[0]
        #print('Final_Embedding_grad: ',grad)
        self.grads[0][...] = grad
        return None
class ADAM:
    def __init__(self, lr=0.001, b1=0.9, b2=0.999):
        self.lr = lr
        self.b = [b1,b2]
        self.i = 0
        self.alpha = None
        self.bet = None
        
    def update(self, weights, grads):
        if self.alpha is None:
            self.alpha, self.bet = [], []
            for w in weights:
                self.alpha.append(np.zeros_like(w))
                self.bet.append(np.zeros_like(w))
        
        self.i += 1
        _adam = self.lr * np.sqrt(1.0 - self.b[1]**self.i) / (1.0 - self.b[0]**self.i)

        for i in range(len(weights)):
            self.alpha[i] += (1 - self.b[0]) * (grads[i] - self.alpha[i])
            self.bet[i] += (1 - self.b[1]) * (grads[i]**2 - self.bet[i])
            weights[i] -= _adam * self.alpha[i] / (np.sqrt(self.bet[i]) + 1e-7)

In [49]:
class LSTM_ADAM_Model():
    def __init__(self,lr, vocab_size=400000, emb_size=50,hidden_size=128, dropout_ratio=0.5):
        self.Wx = (np.random.randn(emb_size, 4*hidden_size) / np.sqrt(emb_size)).astype('f')
        self.Wh = (np.random.randn(hidden_size, 4*hidden_size) / np.sqrt(hidden_size)).astype('f')
        self.b = np.zeros(4*hidden_size).astype('f')
        self.Wx_ = (np.random.randn(hidden_size, 4*hidden_size) / np.sqrt(hidden_size)).astype('f')
        self.Wh_ = (np.random.randn(hidden_size, 4*hidden_size) / np.sqrt(hidden_size)).astype('f')
        self.b_ = np.zeros(4*hidden_size).astype('f')
        self.linear_w = np.zeros((hidden_size,5)).astype('f')
        self.linear_b = np.zeros(5).astype('f')
        self.optimizer = ADAM(lr)

        self.layers = [
            Embedding(vocab_size, emb_size,emb_pre=True),
            LSTM(self.Wx, self.Wh, self.b, connect=True,out_seq=True),
            #Dropout(dropout_ratio),
            LSTM(self.Wx_, self.Wh_, self.b_, connect=True,out_seq = False),
            #Dropout(dropout_ratio),
            Linear(self.linear_w, self.linear_b)
        ]
        self.last_layer = Softmax_Cross_Entropy_Error()

        self.weights, self.grads = [], []
        for layer in self.layers:
            self.weights += layer.weights
            self.grads += layer.grads

    def forward(self,batch_x, batch_y, train=True):
        if train:
          if len(self.layers) != 4: #dropout없어
            self.layers[2].train = train
            self.layers[4].train = train
          for layer in self.layers: #Dropout = Train보내
              batch_x = layer.forward(batch_x)
          score = batch_x
          loss = self.last_layer.forward(score, batch_y)
          return loss
        else: #test시
          if len(self.layers) != 4: #dropout없어
            self.layers[2].train = False
            self.layers[4].train = False
          for layer in self.layers:
              batch_x = layer.forward(batch_x)
          return batch_x

    def back_propagation(self, back=1):
        back = self.last_layer.backward(back)
        for layer in reversed(self.layers):
            back = layer.backward(back)
        return back

In [50]:
class LSTM_SGD_Model():
    def __init__(self,lr, vocab_size=400000, emb_size=50,hidden_size=128, dropout_ratio=0.5):
        self.Wx = (np.random.randn(emb_size, 4*hidden_size) / np.sqrt(emb_size)).astype('f')
        self.Wh = (np.random.randn(hidden_size, 4*hidden_size) / np.sqrt(hidden_size)).astype('f')
        self.b = np.zeros(4*hidden_size).astype('f')
        self.Wx_ = (np.random.randn(hidden_size, 4*hidden_size) / np.sqrt(hidden_size)).astype('f')
        self.Wh_ = (np.random.randn(hidden_size, 4*hidden_size) / np.sqrt(hidden_size)).astype('f')
        self.b_ = np.zeros(4*hidden_size).astype('f')
        self.linear_w = np.zeros((hidden_size,5)).astype('f')
        self.linear_b = np.zeros(5).astype('f')
        self.optimizer = SGD(lr)

        self.layers = [
            Embedding(vocab_size, emb_size,emb_pre=True),
            LSTM(self.Wx, self.Wh, self.b, connect=True,out_seq=True),
            #Dropout(dropout_ratio),
            LSTM(self.Wx_, self.Wh_, self.b_, connect=True,out_seq = False),
            #Dropout(dropout_ratio),
            Linear(self.linear_w, self.linear_b)
        ]
        self.last_layer = Softmax_Cross_Entropy_Error()

        self.weights, self.grads = [], []
        for layer in self.layers: #update하기 위해서 각 레이어의 weight와 grad를 객체로 모으기
            self.weights += layer.weights
            self.grads += layer.grads

    def forward(self,batch_x, batch_y, train=True):
        if train:
          if len(self.layers) != 4: #dropout없어
            self.layers[2].train = train
            self.layers[4].train = train
          for layer in self.layers: #Dropout = Train보내
              batch_x = layer.forward(batch_x)
          score = batch_x
          loss = self.last_layer.forward(score, batch_y)
          return loss
        else: #test시
          if len(self.layers) != 4: #dropout없어
            self.layers[2].train = False
            self.layers[4].train = False
          for layer in self.layers:
              batch_x = layer.forward(batch_x)
          return batch_x

    def back_propagation(self, back=1):
        back = self.last_layer.backward(back)
        for layer in reversed(self.layers):
            back = layer.backward(back)
        return back

In [51]:
class LSTM_SGD_Drop_Model():
    def __init__(self,lr, vocab_size=400000, emb_size=50,hidden_size=128, dropout_ratio=0.5):
        self.Wx = (np.random.randn(emb_size, 4*hidden_size) / np.sqrt(emb_size)).astype('f')
        self.Wh = (np.random.randn(hidden_size, 4*hidden_size) / np.sqrt(hidden_size)).astype('f')
        self.b = np.zeros(4*hidden_size).astype('f')
        self.Wx_ = (np.random.randn(hidden_size, 4*hidden_size) / np.sqrt(hidden_size)).astype('f')
        self.Wh_ = (np.random.randn(hidden_size, 4*hidden_size) / np.sqrt(hidden_size)).astype('f')
        self.b_ = np.zeros(4*hidden_size).astype('f')
        self.linear_w = np.zeros((hidden_size,5)).astype('f')
        self.linear_b = np.zeros(5).astype('f')
        self.optimizer = SGD(lr)

        self.layers = [
            Embedding(vocab_size, emb_size,emb_pre=True),
            LSTM(self.Wx, self.Wh, self.b, connect=True,out_seq=True),
            Dropout(dropout_ratio),
            LSTM(self.Wx_, self.Wh_, self.b_, connect=True,out_seq = False),
            Dropout(dropout_ratio),
            Linear(self.linear_w, self.linear_b)
        ]
        self.last_layer = Softmax_Cross_Entropy_Error()

        self.weights, self.grads = [], []
        for layer in self.layers: #update하기 위해서 각 레이어의 weight와 grad를 객체로 모으기
            self.weights += layer.weights
            self.grads += layer.grads

    def forward(self,batch_x, batch_y, train=True):
        if train:
          if len(self.layers) != 4: #dropout없어
            self.layers[2].train = train
            self.layers[4].train = train
          for layer in self.layers: #Dropout = Train보내
              batch_x = layer.forward(batch_x)
          score = batch_x
          loss = self.last_layer.forward(score, batch_y)
          return loss
        else: #test시
          if len(self.layers) != 4: #dropout없어
            self.layers[2].train = False
            self.layers[4].train = False
          for layer in self.layers:
              batch_x = layer.forward(batch_x)
          return batch_x

    def back_propagation(self, back=1):
        back = self.last_layer.backward(back)
        for layer in reversed(self.layers):
            back = layer.backward(back)
        return back

In [None]:
%%time

mini_batch = 2
lr = 0.7

total_loss = 0
loss_count = 0
train_loss = []
valid_loss = []
v_loss = 0

model = LSTM_SGD_Drop_Model(lr,emb_size = 50) #50dim
idx = (len(X_train1) - 1) // mini_batch #131/6 = 21 #2개의 batch 묶음을 만들겠다!
offsets = [i * idx for i in range(mini_batch)]
v_x = np.concatenate((X_train_valid, X_train1[:idx-len(X_train_valid)]), axis=0)
v_y = np.concatenate((Y_train_valid, Y_train1[:idx-len(X_train_valid)]), axis=0)
print('validation_x: ',v_x.shape,'validation_y: ',v_y.shape)
for epoch in range(20): #epoch 수
    print('epoch: ',epoch)
    total_loss = 0
    for i, offset in enumerate(offsets):
      #print(i,offset, X_train1[offset:offset+idx+1].shape)
      batch_x = X_train1[offset:offset+idx]
      batch_trg = Y_train1[offset:offset+idx]    
      #print('batch_x: ',batch_x,'batch_trg: ',batch_trg)
      #print('before_weights: ',model.weights[0][0])
      loss = model.forward(batch_x, batch_trg)
      total_loss +=loss
      print('Loss=--------------: ',loss)
      model.back_propagation()
      weights,grads = collect_parameters(model.weights, model.grads)
      #print('weights: ',len(weights),'grads: ',len(grads)) #(9,9)
      #for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
        #np.clip(dparam, -5, 5, out=dparam)
        #weights,grads = model.weights, model.grads
      grads=revise_grad(grads, max_grad=0.7) 
      model.optimizer.update(weights, grads)
      #print('after_weights: ',model.weights[0][0])
    v = model.forward(v_x,v_y,train=False) #pred
    v_loss = model.last_layer.forward(v[:26],v_y[:26])
    print('v_loss: ',v_loss)
    valid_loss.append(v_loss)
    train_loss.append(total_loss / len(offsets)) #총 배치 수

validation_x:  (52, 10) validation_y:  (52, 5)
epoch:  0
Loss=--------------:  1.6094372089092548
Loss=--------------:  1.6025477189284105
v_loss:  1.5653721736027644
epoch:  1
Loss=--------------:  1.585380114041842
Loss=--------------:  1.5911727318396935
v_loss:  1.5371839083158052
epoch:  2
Loss=--------------:  1.5722540341890776
Loss=--------------:  1.587131353525015
v_loss:  1.520013222327599
epoch:  3
Loss=--------------:  1.5633697509765625
Loss=--------------:  1.5813258244441106
v_loss:  1.506598399235652
epoch:  4
Loss=--------------:  1.5534654764028697
Loss=--------------:  1.577284886286809
v_loss:  1.4953016134408803
epoch:  5
Loss=--------------:  1.5529826237605169
Loss=--------------:  1.5779939798208384
v_loss:  1.4909908588115985
epoch:  6
Loss=--------------:  1.5503653012789214
Loss=--------------:  1.5740980001596303
v_loss:  1.4855851393479567
epoch:  7
Loss=--------------:  1.5425369556133564
Loss=--------------:  1.5732581798846905
v_loss:  1.479680868295523

In [None]:
import matplotlib.pyplot as plt

plt.plot(train_loss)
plt.plot(valid_loss)
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train_loss','valid_loss'])
plt.title('LSTM+ADAM+50d model Loss graph')
plt.show()

In [None]:
#test data accuracy
print(X_test.shape)
new_split_X_test = np.concatenate((X_test[52:], X_test[:48]), axis=0) #56+50=106
print(new_split_X_test.shape)
p = np.argmax(model.forward(X_test[:52],X_test[:52],train=False), axis = 1)  #batch_y는 그냥 무시
p2 = np.argmax(model.forward(new_split_X_test,new_split_X_test,train=False), axis = 1)[:4]
p = np.concatenate((p,p2), axis = 0)
#one_hot encoding이니까 if문 적용 x.!!
y = np.argmax(Y_test, axis=1) #행 (1,batch_size)
print("correct: ",np.sum(y == p))
accuracy = np.sum(y == p) / 56 # 전체개수
print(accuracy)
print(len(p))
for i in range(len(p)):
  print(X_test_seq[i],label_to_emoji(p[i]),label_to_emoji(np.argmax(Y_test[i])))

In [None]:
#RNN용
class RNN:
    def __init__(self, vocab_size, emb_size, hidden_size=128, emb_pre = True,dropout_ratio=0.5):
        Wx = (np.random.randn(emb_size, hidden_size) / np.sqrt(emb_size)).astype('f')
        Wh = (np.random.randn(hidden_size, hidden_size) / np.sqrt(hidden_size)).astype('f')
        b = np.zeros(hidden_size).astype('f')
        Wx_2 = (np.random.randn(hidden_size, hidden_size)/ np.sqrt(hidden_size)).astype('f')
        Wh_2 = (np.random.randn(hidden_size, hidden_size)/ np.sqrt(hidden_size)).astype('f')
        b_2 = np.zeros(hidden_size).astype('f')
        linear_W = (np.random.randn(hidden_size, 5)).astype('f') #5는 Y_train.shape
        linear_b = np.zeros(5).astype('f')

        self.layers = [Embedding(vocab_size, emb_size,emb_pre), 
                       vanilla_RNN(Wx, Wh, b,out_seq=True,connect = True),
                       #Dropout(dropout_ratio),
                       vanilla_RNN(Wx_2, Wh_2, b_2,out_seq=False,connect = True),
                       #Dropout(dropout_ratio),
                       Linear(linear_W, linear_b)
                       ]
        self.last_layer = Softmax_Cross_Entropy_Error() 
        #update위해서
        self.weights, self.grads = [], []
        for layer in self.layers:
            #print('layer.weights: ',len(layer.weights),'layer.grads: ',len(layer.grads))
            self.weights+=layer.weights #append랑 같음
            self.grads+=layer.grads
        #print('model.weights: ',len(self.weights),'model.grads: ',len(self.grads))
    
    def forward(self,batch_x, batch_y, train=True):
        if train:
          if len(self.layers) != 4: #dropout없어
            self.layers[2].train = train
            self.layers[4].train = train
          for layer in self.layers: #Dropout = Train보내
              batch_x = layer.forward(batch_x)
          score = batch_x
          loss = self.last_layer.forward(score, batch_y)
          return loss
        else: #test시
          if len(self.layers) != 4: #dropout없어
            self.layers[2].train = False
            self.layers[4].train = False
          for layer in self.layers:
              batch_x = layer.forward(batch_x)
          return batch_x

    def back_propagation(self, back=1):
        back = self.last_layer.backward(back)
        for layer in reversed(self.layers):
            back = layer.backward(back)
        return back

In [None]:
%%time

mini_batch = 2
lr = 0.7

total_loss = 0
loss_count = 0
train_loss = []
valid_loss = []
v_loss = 0

model = RNN(len(word_to_vec_map), 50, 128) #50dim
optimizer = SGD(lr)
idx = (len(X_train1) - 1) // mini_batch #131/6 = 21 #2개의 batch 묶음을 만들겠다!
print('idx: ',idx)
offsets = [i * idx for i in range(mini_batch)]
v_x = np.concatenate((X_train_valid, X_train1[:idx-len(X_train_valid)]), axis=0)
v_y = np.concatenate((Y_train_valid, Y_train1[:idx-len(X_train_valid)]), axis=0)
print('validation_x: ',v_x.shape,'validation_y: ',v_y.shape)
offsets = [i * idx for i in range(mini_batch)]
for epoch in range(20): #epoch 수
    print('epoch: ',epoch)
    total_loss = 0
    for i, offset in enumerate(offsets):
      #print(i,offset, X_train1[offset:offset+idx+1].shape)
      batch_x = X_train1[offset:offset+idx] #vocab으로 바꿔
      batch_trg = Y_train1[offset:offset+idx]    
      #print('batch_x: ',batch_x,'batch_trg: ',batch_trg)
      #print('before_weights: ',model.weights[0][0])
      loss = model.forward(batch_x, batch_trg)
      total_loss +=loss
      print('Loss=--------------: ',loss)
      model.back_propagation()
      weights,grads = collect_parameters(model.weights, model.grads)
      #print('weights: ',len(weights),'grads: ',len(grads)) #(9,9)
      #for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
        #np.clip(dparam, -5, 5, out=dparam)
        #weights,grads = model.weights, model.grads
      grads=revise_grad(grads, max_grad=0.7)
      optimizer.update(weights, grads)
      #print('after_weights: ',model.weights[0][0])
    v = model.forward(v_x,v_y,train=False)
    v_loss = model.last_layer.forward(v[:26],v_y[:26])
    print('v_loss: ',v_loss)
    valid_loss.append(v_loss)
    train_loss.append(total_loss / len(offsets)) #총 배치 수

In [None]:
import matplotlib.pyplot as plt

plt.plot(train_loss)
plt.plot(valid_loss)
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train_loss','valid_loss'])
plt.title('RNN+SGD+50d model Loss graph')
plt.show()

In [None]:
#test data accuracy
print(X_test.shape)
new_split_X_test = np.concatenate((X_test[52:], X_test[:48]), axis=0) #56+50=106
print(new_split_X_test.shape)
p = np.argmax(model.forward(X_test[:52],X_test[:52],train=False), axis = 1) #argmax는 가장 큰 값의 인덱스 값을 반환한다.
p2 = np.argmax(model.forward(new_split_X_test,new_split_X_test,train=False), axis = 1)[:4] #처음 4개만
p = np.concatenate((p,p2), axis = 0)
#one_hot encoding이니까 if문 적용 x.!!
y = np.argmax(Y_test, axis=1) #행 (1,batch_size)
print("correct: ",np.sum(y == p))
accuracy = np.sum(y == p) / 56 # 전체개수
print('accuracy: ',accuracy)
print('test_set 개수:',len(p))
for i in range(len(p)):
  print(X_test_seq[i],label_to_emoji(p[i]),label_to_emoji(np.argmax(Y_test[i])))