# 第9回講義 宿題

## 課題. RNNを用いてIMDbのsentiment analysisを実装せよ

### 注意

- homework関数を完成させて提出してください
    - 訓練データはtrain_X, train_y, テストデータはtest_Xで与えられます
    - train_Xとtrain_yをtrain_X, train_yとvalid_X, valid_yに分けるなどしてモデルを学習させてください
    - test_Xに対して予想ラベルpred_yを作り, homework関数の戻り値としてください
    - pred_yは1次元のlistとしてください
- pred_yのtest_yに対する精度(F値)で評価します
- 全体の実行時間がiLect上で60分を超えないようにしてください
- homework関数の外には何も書かないでください (必要なものは全てhomework関数に入れてください)
- 解答提出時には Answer Cell の内容のみを提出してください

次のセルのhomework関数を完成させて提出してください

# Answer Cell

In [None]:
def homework(train_X, train_y, test_X):
    global num_words # =10000
    # WRITE ME!
    # HINT: keras内の関数、pad_sequences は利用可能です。
    rng = np.random.RandomState(1234)
    # Embedding
    class Embedding:
        def __init__(self, vocab_size, emb_dim, scale=0.08):
            self.V = tf.Variable(rng.randn(vocab_size, emb_dim).astype('float32') * scale, name='V')

        def f_prop(self, x):
            return tf.nn.embedding_lookup(self.V, x)
    
    # Random orthogonal initializer
    def orthogonal_initializer(shape, scale = 1.0):
        a = np.random.normal(0.0, 1.0, shape).astype(np.float32)
        u, _, v = np.linalg.svd(a, full_matrices=False)
        q = u if u.shape == shape else v
        return scale * q
    
    # RNN layer
    class RNN:
        def __init__(self, in_dim, hid_dim, m, scale=0.08):
            self.in_dim = in_dim
            self.hid_dim = hid_dim
            # Xavier initializer
            self.W_in = tf.Variable(rng.uniform(
                            low=-np.sqrt(6/(in_dim + hid_dim)),
                            high=np.sqrt(6/(in_dim + hid_dim)),
                            size=(in_dim, hid_dim)
                        ).astype('float32'), name='W_in')
            # Random orthogonal initializer
            self.W_re = tf.Variable(orthogonal_initializer((hid_dim, hid_dim)), name='W_re')
            self.b_re = tf.Variable(tf.zeros([hid_dim], dtype=tf.float32), name='b_re')
            self.m = m

        def f_prop(self, x):
            def fn(h_tm1, x_and_m):
                x = x_and_m[0]
                m = x_and_m[1]
                h_t = tf.nn.relu(tf.matmul(h_tm1, self.W_re) + tf.matmul(x, self.W_in) + self.b_re) # 順伝播計算を書く
                return m[:, np.newaxis] * h_t + (1 - m[:, np.newaxis]) * h_tm1

            # shape: [batch_size, sentence_length, in_dim] -> shape: [sentence_length, batch_size, in_dim]
            _x = tf.transpose(x, perm=[1, 0, 2])
            # shape: [batch_size, sentence_length] -> shape: [sentence_length, batch_size]
            _m = tf.transpose(self.m)
            h_0 = tf.matmul(x[:, 0, :], tf.zeros([self.in_dim, self.hid_dim])) # Initial state

            h = tf.scan(fn=fn, elems=[_x, _m], initializer=h_0)

            return h[-1] # Take the last state
        
    # Dense layer
    class Dense:
        def __init__(self, in_dim, out_dim, function=lambda x: x):
            # Xavier initializer
            self.W = tf.Variable(rng.uniform(
                            low=-np.sqrt(6/(in_dim + out_dim)),
                            high=np.sqrt(6/(in_dim + out_dim)),
                            size=(in_dim, out_dim)
                        ).astype('float32'), name='W')
            self.b = tf.Variable(np.zeros([out_dim]).astype('float32'))
            self.function = function

        def f_prop(self, x):
            return self.function(tf.matmul(x, self.W) + self.b)
    
    # Main Setting
    emb_dim = 100
    hid_dim = 50

    x = tf.placeholder(tf.int32, [None, None], name='x')
    m = tf.cast(tf.not_equal(x, -1), tf.float32) # Mask. Paddingの部分(-1)は0, 他の値は1
    t = tf.placeholder(tf.float32, [None, None], name='t')

    layers = [
        Embedding(num_words, emb_dim),
        RNN(emb_dim, hid_dim, m=m),
        Dense(hid_dim, 1, tf.nn.sigmoid)
    ]

    def f_props(layers, x):
        for i, layer in enumerate(layers):
            x = layer.f_prop(x)
        return x

    y = f_props(layers, x)

    cost = tf.reduce_mean(-t*tf.log(tf.clip_by_value(y, 1e-10, 1.0)) - (1. - t)*tf.log(tf.clip_by_value(1.-y, 1e-10, 1.0)))

    train = tf.train.AdamOptimizer().minimize(cost)
    test = tf.round(y)
    
    # Sort train data according to its length
    train_X_lens = [len(com) for com in train_X]
    sorted_train_indexes = sorted(range(len(train_X_lens)), key=lambda x: -train_X_lens[x])

    train_X = [train_X[ind] for ind in sorted_train_indexes]
    train_y = [train_y[ind] for ind in sorted_train_indexes]
    
    # Learning
    n_epochs = 3
    batch_size = 100
    n_batches_train = len(train_X) // batch_size

    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)
        for epoch in range(n_epochs):
            # Train
            train_costs = []
            for i in range(n_batches_train):
                start = i * batch_size
                end = start + batch_size

                train_X_mb = np.array(pad_sequences(train_X[start:end], padding='post', value=-1)) # Padding
                train_y_mb = np.array(train_y[start:end])[:, np.newaxis]

                _, train_cost = sess.run([train, cost], feed_dict={x: train_X_mb, t: train_y_mb})
                train_costs.append(train_cost)
#             print('EPOCH: %i, Training cost: %.3f, ' % (epoch+1, np.mean(train_costs)))
        
        # prediction
        batch_size = 1000
        n_batches_test = len(test_X) // batch_size
        pred_y = []
        for i in range(n_batches_test):
            start = i * batch_size
            end = start + batch_size
            test_X_mb = np.array(pad_sequences(test_X[start:end], padding='post', value=-1)) # Padding
            pred = sess.run(test, feed_dict={x: test_X_mb})
            pred_y += pred.flatten().tolist()
#         test_X_mb = np.array(pad_sequences(test_X, padding='post', value=-1))
#         pred_y = sess.run(test, feed_dict={x: test_X_mb})    
        
    return pred_y

- 以下のvalidate_homework関数を用いてエラーが起きないか動作確認をして下さい。
- 提出に際して、以下のscore_homework関数で60分で実行が終わることを確認して下さい。
- 評価は以下のscore_homework関数で行われますが、random_stateの値は変更されます。

# Checker Cell (for student)

In [None]:
import sys
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences

del [
    tf.app,
    tf.compat,
    tf.contrib,
    tf.errors,
    tf.gfile,
    tf.graph_util,
    tf.image,
    tf.layers,
    tf.logging,
    tf.losses,
    tf.metrics,
    tf.python_io,
    tf.resource_loader,
    tf.saved_model,
    tf.sdca,
    tf.sets,
    tf.summary,
    tf.sysconfig,
    tf.test
]

sys.modules['keras'] = None

def validate_homework():
    global num_words

    num_words = 10000
    (train_X, train_y), (test_X, test_y) = imdb.load_data(num_words=num_words, seed=42, start_char=0, oov_char=1, index_from=2)
    
    # validate for small dataset
    train_X_mini = train_X[:100]
    train_y_mini = train_y[:100]
    test_X_mini = test_X[:100]
    test_y_mini = test_y[:100]

    pred_y = homework(train_X_mini, train_y_mini, test_X_mini)  
    true_y =  test_y_mini.tolist()

    print(f1_score(true_y, pred_y, average='macro'))

def score_homework():
    global num_words
    num_words = 10000
    
    (train_X, train_y), (test_X, test_y) = imdb.load_data(num_words=num_words, seed=42, start_char=0, oov_char=1, index_from=2)

    pred_y = homework(train_X, train_y, test_X)
    true_y =  test_y.tolist()

    print(f1_score(true_y, pred_y, average='macro'))

In [None]:
validate_homework()
#score_homework()