<a href="https://colab.research.google.com/github/yanoooooo/translation_ML/blob/colab/Encoder_Decoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1680]:
# Import Chainer 
from chainer import Chain, Variable, optimizers, serializers, datasets, training, cuda
from chainer.training import extensions
import chainer.functions as F
import chainer.links as L
from chainer import Variable
import chainer

import numpy as np
import math
import pandas as pd

from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


Variables

In [0]:
is_debug = True
batch_size = 2
epoch_num = 10

Models

In [0]:
class LyricsEncoder(chainer.Chain):
  def __init__(self, vocab_size):
    super(LyricsEncoder, self).__init__()
    with self.init_scope():
      self.embed = L.EmbedID(vocab_size, 8)
      self.lstm1 = L.LSTM(None, 32)
      self.l1= L.Linear(None, 16)
      
  def reset_state():
    self.lstm1.reset_state()
      
  def forward(self, x_data):
    x = Variable(cuda.to_gpu(x_data))
#     print('x: ', x.shape)
    h = F.tanh(self.embed(x))
#     print('a: ', h.shape)
    h = self.lstm1(h)
#     print('b: ', h.shape)
    h = self.l1(h)
#     print('c: ', h.shape)
    
    return h

In [0]:
class MoraEncoder(chainer.Chain):
  def __init__(self, vocab_size):
    super(MoraEncoder, self).__init__()
    with self.init_scope():
      self.embed = L.EmbedID(vocab_size, 8)
      self.lstm1 = L.LSTM(None, 32)
      self.l1= L.Linear(None, 16)
      
  def reset_state():
    self.lstm1.reset_state()
      
  def forward(self, x_data):
    x = Variable(cuda.to_gpu(x_data))
    h = F.tanh(self.embed(x))
    h = self.lstm1(h)
    h = self.l1(h)
    
    return h

In [0]:
class LyricsDecoder(chainer.Chain):
  def __init__(self, vocab_size):
    super(LyricsDecoder, self).__init__()
    with self.init_scope():
      self.embed = L.EmbedID(vocab_size, 16)
      self.lstm1 = L.LSTM(None, 32)
      self.l1= L.Linear(None, vocab_size)
      
  def reset_state():
    self.lstm1.reset_state()
      
  def forward(self, h, t_data):
    t = Variable(cuda.to_gpu(t_data))
#     print('t: ', t.shape)
    h = self.lstm1(h)
#     print('h: ', h.shape)
    y = self.l1(h)
#     print('y: ', y.shape)
    
#     print(y)
#     print(np.argmax(y.data, axis=1))
#     print(t)
#     print(np.argmax(t.data))
    return F.softmax_cross_entropy(y, t), F.accuracy(y, t)

In [0]:
class MoraDecoder(chainer.Chain):
  def __init__(self, vocab_size):
    super(MoraDecoder, self).__init__()
    with self.init_scope():
      self.embed = L.EmbedID(vocab_size, 8)
      self.lstm1 = L.LSTM(None, 32)
      self.l1= L.Linear(None, vocab_size)
      
  def reset_state():
    self.lstm1.reset_state()
      
  def forward(self, h, t_data):
    t = Variable(cuda.to_gpu(t_data))
#     h = F.tanh(self.embed(x))
    h = self.lstm1(h)
    y = self.l1(h)
    
    return F.softmax_cross_entropy(y, t), F.accuracy(y, t)

Data Utils

In [0]:
def get_word_list(data):
  result = {}
  for i in range(len(data)):
    lt = data[i].split()
#     data[i] = lt
    for w in lt:
      if w not in result:
        result[w] = len(result)
  result['<eos>'] = len(result)
  return result

In [0]:
def get_split_list(data):
  result = []
  for i in range(len(data)):
    lt = data[i].split()
    if(len(lt) > 0 and not 'None' in lt):
      lt.append('<eos>')
    else:
      lt = []
    result.append(lt)
  return result

In [0]:
def get_batch(data, batch_size):
  result = []
  for index in range(0, math.ceil(len(data) / batch_size)):
    tune = []
    batch = data[index*batch_size:min([index * batch_size + batch_size, len(data)])]
    max_length = max([len(st) for st in batch])
    tune_flg = False
    for j in range(max_length):
      phrase = []
      for s in batch:
        # 曲の区切りには空の配列を挿入
        if len(s) == 0:
          tune_flg = True
          continue
        if j < len(s):
          phrase.append(s[j])
        else:
          phrase.append(-1)
      tune.append(phrase)
      # tune.append([s[j] if j < len(s) else None for s in batch])
    if tune_flg:
        result.append([])
    result.append(tune)
  return result

In [0]:
def get_zeros_vector(batch, dict):
  result = []
  # 入力文章数、単語数の0ベクトル配列を作成
  for i in range(0, len(batch)):
    xi = np.zeros((len(batch[i]), len(dict)), np.int32)
    for k, word in enumerate((batch[i])):
      if word is not None: xi[k, dict[word]] = 1
    result.append(xi)
  return np.array(result, dtype=np.int32)

In [0]:
def get_target_labels(batch, dict):
  result = []
  # 入力文章数、単語数の0ベクトル配列を作成
  for i in range(0, len(batch)):
    xi = []
    for k, word in enumerate((batch[i])):
      if word is not None: xi.append(dict[word])
    result.append(xi)
  return np.array(result, dtype=np.int32)

Trainning Section

In [0]:
def train(ja_file_path, en_file_path):
  ja_data = pd.read_csv(ja_file_path)
  en_data = pd.read_csv(en_file_path)
  
  # 学習に必要なリストの取得
  lyrics_list = {'en': get_split_list(en_data['Lyrics']), 'ja': get_split_list(ja_data['Lyrics'])}
  mora_list = {'en': get_split_list(en_data['Mora']), 'ja': get_split_list(ja_data['Mora'])}
  lyrics_vocab_list = {'en': get_word_list(en_data['Lyrics']), 'ja': get_word_list(ja_data['Lyrics'])}
  mora_vocab_list = {'en': get_word_list(en_data['Mora']), 'ja': get_word_list(ja_data['Mora'])}
  
  # モデルの作成
  lyrics_encoder = LyricsEncoder(len(lyrics_vocab_list['en']))
  mora_encoder = MoraEncoder(len(mora_vocab_list['en']))
  lyrics_decoder = LyricsDecoder(len(lyrics_vocab_list['ja']))
  mora_decoder = MoraDecoder(len(mora_vocab_list['ja']))
  
  # gpuの使用
  lyrics_encoder.to_gpu()
  mora_encoder.to_gpu()
  lyrics_decoder.to_gpu()
  mora_decoder.to_gpu()
  
  # optimizerの作成
  optimizers_list = {
      'en_lyrics': optimizers.Adam().setup(lyrics_encoder),
      'en_mora': optimizers.Adam().setup(mora_encoder),
      'ja_lyric': optimizers.Adam().setup(lyrics_decoder),
      'ja_mora': optimizers.Adam().setup(mora_decoder),
  }
  
  for epoch in range(0, epoch_num):
    print('epoch: ', epoch+1)
    l_sum_accuracy, l_sum_loss, l_sum_batch_size = 0, 0, 0
    m_sum_accuracy, m_sum_loss, m_sum_batch_size = 0, 0, 0
    # 曲が違う場合はリセットしなければならない
#     lyrics_encoder.reset_state()
#     mora_encoder.reset_state()

    # create batch
    en_lyrics_batch = get_batch(lyrics_list['en'], batch_size)
    en_mora_batch = get_batch(mora_list['en'], batch_size)
    ja_lyrics_batch = get_batch(lyrics_list['ja'], batch_size)
    ja_mora_batch = get_batch(mora_list['ja'], batch_size)
    
    for i, (x_l_batch, x_m_batch, t_l_batch, t_m_batch) in enumerate(zip(en_lyrics_batch, en_mora_batch, ja_lyrics_batch, ja_mora_batch)):
      # ------- Encoder ------
#       print(x_l_batch)
      x_l_batch = get_zeros_vector(x_l_batch, lyrics_vocab_list['en'])
      x_m_batch = get_zeros_vector(x_m_batch, mora_vocab_list['en'])
      t_l_batch = get_target_labels(t_l_batch, lyrics_vocab_list['ja'])
      t_m_batch = get_target_labels(t_m_batch, mora_vocab_list['ja'])
      
      for x_l, x_m, t_l, t_m in zip(x_l_batch, x_m_batch, t_l_batch, t_m_batch):
        lyrics_y = lyrics_encoder.forward(x_l)
        mora_y = mora_encoder.forward(x_m)
    
        y = F.tanh(lyrics_y + mora_y)
      
        l_loss, l_acc = lyrics_decoder.forward(y, t_l)
        m_loss, m_acc = mora_decoder.forward(y, t_m)
        
        
        lyrics_encoder.cleargrads()
        mora_encoder.cleargrads()
        lyrics_decoder.cleargrads()
        mora_decoder.cleargrads()
        l_loss.backward()
        l_loss.unchain_backward()
        m_loss.backward()
        m_loss.unchain_backward()
        
        for optimizer in optimizers_list.values():
          optimizer.update()
          
        l_sum_loss += float(l_loss.data) * len(x_l)
        l_sum_accuracy += float(l_acc.data) * len(x_l)
        l_sum_batch_size += len(x_l)
        m_sum_loss += float(m_loss.data) * len(x_m)
        m_sum_accuracy += float(m_acc.data) * len(x_m)
        m_sum_batch_size += len(x_m)

    # show training data loss and accuracy
    l_loss = l_sum_loss / l_sum_batch_size
    l_accuracy = l_sum_accuracy / l_sum_batch_size
    m_loss = m_sum_loss / m_sum_batch_size
    m_accuracy = m_sum_accuracy / m_sum_batch_size

    print('train lyrics mean loss={}, accuracy={}'.format(l_loss, l_accuracy))
    print('train mora mean loss={}, accuracy={}'.format(m_loss, m_accuracy))
    

Main Function

In [1692]:
if __name__=="__main__":
  if is_debug:
    ja_file_path = '/gdrive/My Drive/Research/LyricsTranslation/NeuralNetwork/translation_ML/data/ja.csv'
    en_file_path = '/gdrive/My Drive/Research/LyricsTranslation/NeuralNetwork/translation_ML/data/en.csv'
  else:
    file_path = '/gdrive/My Drive/DeepLearning/GestureRecognition/data/formatted/train_classification.csv'

  train(ja_file_path, en_file_path)
  
  print('Done!')

epoch:  1
train lyrics mean loss=2.556262819390548, accuracy=0.0
train mora mean loss=1.5360374701650519, accuracy=0.0
epoch:  2
train lyrics mean loss=2.437440972579153, accuracy=0.15789473684210525
train mora mean loss=1.349625593737552, accuracy=0.42105263157894735
epoch:  3
train lyrics mean loss=2.4011656485105815, accuracy=0.21052631578947367
train mora mean loss=1.2218687973524396, accuracy=0.47368421052631576
epoch:  4
train lyrics mean loss=2.3894752954181873, accuracy=0.15789473684210525
train mora mean loss=1.1744432794420343, accuracy=0.47368421052631576
epoch:  5
train lyrics mean loss=2.392615054783068, accuracy=0.15789473684210525
train mora mean loss=1.143401114564193, accuracy=0.47368421052631576
epoch:  6
train lyrics mean loss=2.3926997435720345, accuracy=0.15789473684210525
train mora mean loss=1.1175746729499416, accuracy=0.47368421052631576
epoch:  7
train lyrics mean loss=2.38263884343599, accuracy=0.15789473684210525
train mora mean loss=1.0883429646492004, accu