<a href="https://colab.research.google.com/github/respect5716/Deep-Learning-Paper-Implementation/blob/master/03_NLP/SeqGAN%20_%20Sequence%20Generative%20Adversarial%20Nets%20with%20Policy%20Gradient.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SeqGAN _ Sequence Generative Adversarial Nets with Policy Gradient

## 0. Paper

### Info
* TItle : SeqGAN _ Sequence Generative Adversarial Nets with Policy Gradient
* Author : Lantao Yu et al.
* Publication : AAAI 2017

### Summary
* GAN과 Policy gradient를 사용하여 discrete sequence 생성 모델 학습
* 완전히 생성된 문장을 평가함으로써 더욱 사실적인 문장 생성 가능

### Differences
* Dataset : political speech -> Naver sentiment movie corpus, [link](https://github.com/e9t/nsmc/)
* Pretrain : True -> False
* Discriminator highway network : True -> False

## 1. Setting

In [0]:
# install mecab and konlpy
!git clone https://github.com/SOMJANG/Mecab-ko-for-Google-Colab.git /content/mecab
!bash mecab/install_mecab-ko_on_colab190912.sh
!pip install -q konlpy

In [0]:
# Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [0]:
# Libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from collections import Counter
from konlpy.tag import Mecab

import tensorflow as tf

In [0]:
# GPU Setting
!nvidia-smi

print(f'tensorflow version : {tf.__version__}')
print(f'available GPU list : {tf.config.list_physical_devices("GPU")}')

Mon Jun  1 12:47:20 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.82       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   39C    P0    28W / 250W |      0MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [0]:
# Hyperparameters
CONFIG = {
    'base_dir' : '/content/drive/Shared drives/Yoon/Project/Doing/Deep Learning Paper Implementation',
    'seq_len' : 16,
    'model_dim' : 128,
    'generator_lr' : 1e-3,
    'discriminator_lr' : 1e-3,
    'neg_ratio' : 3,
    'g_step_size' : 2,
    'd_step_size' : 5,
    'd_epoch_size' : 2,
    'batch_size' : 64,
    'epoch_size' : 100
}

## 2. Data

In [0]:
data_path = os.path.join(CONFIG['base_dir'], 'data/naver_sentiment_movie_corpus.zip')
!unzip $"{data_path}" -d '/content/data'

In [0]:
train_data = pd.read_table('/content/data/ratings_train.txt')
test_data = pd.read_table('/content/data/ratings_test.txt')

train_data = train_data.dropna()
test_data = test_data.dropna()

In [0]:
mecab = Mecab()

In [0]:
train_data['document'] = train_data['document'].apply(lambda x : x.replace('/', ' ').replace(' ', '_'))
train_data['token'] = train_data['document'].apply(lambda x : ['/'.join(i) for i in mecab.pos(x)])
train_data['label'] = train_data['label'].map({0:'[NEG]', 1:'[POS]'})

In [0]:
vocab = Counter()
_ = [vocab.update(i) for i in train_data['token']]

vocab = [i for i in vocab if vocab[i] >= 10]
vocab = ['[PAD]', '[UNK]', '[EOS]', '[POS]', '[NEG]'] + vocab

len(vocab)

11757

In [0]:
train_data.head()

Unnamed: 0,id,document,label,token
0,9976970,아_더빙.._진짜_짜증나네요_목소리,[NEG],"[아/IC, _/SY, 더/MAG, 빙/MAG, ./SF, ._/SY, 진짜/MAG..."
1,3819312,흠...포스터보고_초딩영화줄....오버연기조차_가볍지_않구나,[POS],"[흠/IC, ./SF, ../SY, 포스터/NNP, 보고/NNG, _/SY, 초딩/..."
2,10265843,너무재밓었다그래서보는것을추천한다,[NEG],"[너무/MAG, 재/XPN, 밓었다그래서보는것을추천한다/UNKNOWN]"
3,9045019,교도소_이야기구먼_..솔직히_재미는_없다..평점_조정,[NEG],"[교도소/NNG, _/SY, 이야기/NNG, 구먼/VCP+EF, _../SY, 솔직..."
4,6483659,사이몬페그의_익살스런_연기가_돋보였던_영화!스파이더맨에서_늙어보이기만_했던_커스틴_...,[POS],"[사이몬페그/NNP, 의/JKG, _/SY, 익살/NNG, 스런/XSA+ETM, _..."


## 3. Model

In [0]:
def pad_seq(seq):
    seq = [i[:CONFIG['seq_len']] for i in seq]
    seq = np.stack([np.pad(i, (0, CONFIG['seq_len']-len(i)), 'constant') for i in seq])
    return seq

def generate_init_state():
    if np.random.rand() < 0.5:
        token = np.array([[3]]) # [POS]
    else:
        token = np.array([[4]]) # [NEG]
    state_h = np.random.normal(size=(1, CONFIG['model_dim']))
    state_c = np.random.normal(size=(1, CONFIG['model_dim']))
    return token, state_h, state_c

In [0]:
class Tokenizer(object):
    def __init__(self, tagger, vocab):
        self.tagger = tagger
        self.vocab = vocab
        self.token2idx = {j:i for i,j in enumerate(vocab)}
        self.idx2token = {i:j for i,j in enumerate(vocab)}
        self.vocab_size = len(vocab)
    
    def seq_to_idx(self, seq):
        return [self.token2idx[i] if i in self.vocab else self.token2idx['[UNK]'] for i in seq]
    
    def encode(self, sentence):
        sentence = sentence.replace('/', ' ').replace(' ', '_')
        tokens = self.tagger.pos(sentence)
        tokens = ['/'.join(i) for i in tokens]
        tokens = seq_to_idx(tokens)
        return tokens
    
    def decode(self, tokens):
        tokens = [self.idx2token[i] for i in tokens]
        tokens = [i.split('/')[0] for i in tokens]
        sentence = ''.join(tokens)
        sentence = sentence.replace('_', ' ')
        return sentence

In [0]:
class TrueDataset(tf.keras.utils.Sequence):
    def __init__(self, data, tokenizer):
        self.data = data
        self.data_len = len(data)
        self.tokenizer = tokenizer

        self.idx = 0
        self.on_epoch_end()
    
    def __len__(self):
        return np.ceil(len(self.data) / CONFIG['batch_size']).astype(np.int32)
    
    def on_epoch_end(self):
        self.indices = np.random.permutation(self.data_len)
    
    def __getitem__(self, idx):
        batch_idx = self.indices[CONFIG['batch_size']*idx : CONFIG['batch_size']*(idx+1)]
        x = self.data.iloc[batch_idx].apply(lambda x : [x['label']] + x['token'], axis=1)
        x = [i[:CONFIG['seq_len']] + ['[EOS]'] if len(i) < CONFIG['seq_len'] else i[:CONFIG['seq_len']-1] + ['[EOS]'] for i in x]
        x = [self.tokenizer.seq_to_idx(i) for i in x]
        x = np.stack(pad_seq(x))
        y = np.ones(CONFIG['batch_size'])
        return x, y
    
    def next(self):
        if self.idx == self.__len__():
            self.on_epoch_end()
            self.idx = 0
        x, y = self.__getitem__(self.idx)
        self.idx += 1
        return x, y

In [0]:
class Generator(object):
    def __init__(self, tokenizer):
        self.tokenizer = tokenizer
        self.network, self.optimizer = self.build_network()
    
    def build_network(self):
        input_token = tf.keras.Input(shape=(None,))
        input_state_h = tf.keras.Input(shape=(CONFIG['model_dim'],))
        input_state_c = tf.keras.Input(shape=(CONFIG['model_dim'],))
        embed_token = tf.keras.layers.Embedding(self.tokenizer.vocab_size, CONFIG['model_dim'])(input_token)
        x, state_h, state_c = tf.keras.layers.LSTM(CONFIG['model_dim'], return_state=True)(embed_token, initial_state=[input_state_h, input_state_c])
        output_token = tf.keras.layers.Dense(self.tokenizer.vocab_size, activation='softmax')(x)

        network = tf.keras.Model([input_token, input_state_h, input_state_c], [output_token, state_h, state_c])
        optimizer = tf.keras.optimizers.Adam(CONFIG['generator_lr'])
        return network, optimizer
    
    def write_token(self, token, state_h, state_c):
        token, state_h, state_c = self.network([token, state_h, state_c])
        token = tf.random.categorical(token, 1).numpy() # (1, 1)
        return token, state_h, state_c
    
    def write_sentence(self, token, state_h, state_c):
        sentence = list(token[0])
        while token[0][0] != self.tokenizer.token2idx['[EOS]'] and len(sentence) < CONFIG['seq_len']:
            token, state_h, state_c = self.write_token(token, state_h, state_c)
            word = token[0][0]
            sentence.append(word)
        return sentence
    
    def write_sentence_init(self):
        token, state_h, state_c = generate_init_state()
        sentence = self.write_sentence(token, state_h, state_c)
        return sentence

    def train(self, token, state_h, state_c, next_token, reward):
        with tf.GradientTape() as g:
            policy, state_h, state_c = self.network([token, state_h, state_c])
            policy = tf.reduce_sum(policy * tf.one_hot(next_token, depth=self.tokenizer.vocab_size), axis=-1)
            loss = -tf.reduce_mean(policy * reward)
        
        gradients = g.gradient(loss, self.network.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.network.trainable_variables))       

In [0]:
class Discriminator(object):
    def __init__(self, tokenizer):
        self.tokenizer = tokenizer
        self.network = self.build_network()
    
    def build_network(self):
        inputs = tf.keras.layers.Input((CONFIG['seq_len'],))
        x = tf.keras.layers.Embedding(self.tokenizer.vocab_size, CONFIG['model_dim'])(inputs)
        x = tf.keras.layers.Conv1D(32, 3, padding='same', activation='relu')(x)
        x = tf.keras.layers.MaxPool1D(2)(x)
        x = tf.keras.layers.Conv1D(64, 3, padding='same', activation='relu')(x)
        x = tf.keras.layers.MaxPool1D(2)(x)
        x = tf.keras.layers.Conv1D(128, 3, padding='same', activation='relu')(x)
        x = tf.keras.layers.MaxPool1D(2)(x)
        x = tf.keras.layers.Conv1D(256, 3, padding='same', activation='relu')(x)
        x = tf.keras.layers.Flatten()(x)
        x = tf.keras.layers.Dense(256, activation='relu')(x)
        outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)
        
        network = tf.keras.Model(inputs, outputs)
        network.compile(
            loss = 'binary_crossentropy',
            optimizer = tf.keras.optimizers.Adam(CONFIG['discriminator_lr']),
            metrics = ['acc']
        )
        return network
    
    def give_reward(self, x):
        return tf.round(self.network(x)).numpy()

## 4. Train

In [0]:
tokenizer = Tokenizer(mecab, vocab)
generator = Generator(tokenizer)
discriminator = Discriminator(tokenizer)
dataset = TrueDataset(train_data, tokenizer)

In [0]:
for ep in range(CONFIG['epoch_size']):
    for g in range(CONFIG['g_step_size']):
        token, state_h, state_c = generate_init_state()
        sentence = generator.write_sentence(token, state_h, state_c)

        for i in range(1, len(sentence)):
            subsentence = np.array([sentence[:i]])
            next_token = sentence[i]

            if i < len(sentence):
                generated = [generator.write_sentence(subsentence, state_h, state_c) for _ in range(CONFIG['batch_size'])]
                generated = np.stack(pad_seq(generated))
                reward = np.mean(discriminator.give_reward(generated))
            else:
                reward = discriminator.give_reward(np.array([sentence]))
                reward = np.array([reward])

            generator.train(subsentence, state_h, state_c, next_token, reward)

    for d in range(CONFIG['d_step_size']):
        x_true, y_true = dataset.next()
        x_false = [generator.write_sentence_init() for _ in range(CONFIG['batch_size'] * CONFIG['neg_ratio'])]
        x_false = pad_seq(x_false)
        y_false = np.zeros(CONFIG['batch_size'] * CONFIG['neg_ratio'])
        x = np.append(x_true, x_false, axis=0)
        y = np.append(y_true, y_false, axis=0)
        discriminator.network.fit(x, y, batch_size=CONFIG['batch_size'], epochs=CONFIG['d_epoch_size'], verbose=0)
    
    sentence = tokenizer.decode(sentence)
    print(f'EP : {str(ep).zfill(3)} | Reward : {str(int(reward.sum())).zfill(3)} | Sentence : {sentence}')

## 5. Test

In [0]:
test_data['document'] = test_data['document'].apply(lambda x : x.replace('/', ' ').replace(' ', '_'))
test_data['token'] = test_data['document'].apply(lambda x : ['/'.join(i) for i in mecab.pos(x)])
test_data['label'] = test_data['label'].map({0:'[NEG]', 1:'[POS]'})
test_dataset = TrueDataset(test_data, tokenizer)

In [0]:
x_true, y_true = test_dataset.next()
x_false = [generator.write_sentence_init() for _ in range(CONFIG['batch_size'])]
x_false = pad_seq(x_false)
y_false = np.zeros(CONFIG['batch_size'])
x = np.append(x_true, x_false, axis=0)
y = np.append(y_true, y_false, axis=0)

loss, acc = discriminator.network.evaluate(x, y)

In [0]:
for i in range(10):
    sentence = generator.write_sentence_init()
    sentence = tokenizer.decode(sentence)
    print(sentence)