# 第6回講義 宿題

## 課題. Stacked Denoising Autoencoderで事前学習をした上で, MNISTを多層パーセプトロンで学習せよ

### 注意

- homework関数を完成させて提出してください
    - 訓練データはtrain_X, train_y, テストデータはtest_Xで与えられます
    - train_Xとtrain_yをtrain_X, train_yとvalid_X, valid_yに分けるなどしてモデルを学習させてください
    - test_Xに対して予想ラベルpred_yを作り, homework関数の戻り値としてください\
- pred_yのtest_yに対する精度(F値)で評価します
- 全体の実行時間がiLect上で60分を超えないようにしてください
- homework関数の外には何も書かないでください (必要なものは全てhomework関数に入れてください)
- 解答提出時には Answer Cell の内容のみを提出してください

- CNNは使わないでください

**`tf` の以下のモジュールはこの回では使用できないように制限されています. 注意してください.**
```python
tf.app
tf.compat
tf.contrib
tf.erros
tf.gfile
tf.graph_util
tf.image
tf.layers
tf.logging
tf.losses
tf.metrics
tf.python_io
tf.resource_loader
tf.saved_model
tf.sdca
tf.sets
tf.summary
tf.sysconfig
tf.test
tf.train
```

次のセルを完成させて提出してください

# Answer Cell

In [None]:
def homework(train_X, train_y, test_X):
    random_state = 42
    rng = np.random.RandomState(1234)
    class Autoencoder:
        def __init__(self, vis_dim, hid_dim, W, function=lambda x: x):
            self.W = W
            self.a = tf.Variable(np.zeros(vis_dim).astype('float32'), name='a')
            self.b = tf.Variable(np.zeros(hid_dim).astype('float32'), name='b')
            self.function = function
            self.params = [self.W, self.a, self.b]

        def encode(self, x):
            u = tf.matmul(x, self.W) + self.b
            return self.function(u)

        def decode(self, x):
            u = tf.matmul(x, tf.transpose(self.W)) + self.a
            return self.function(u)

        def f_prop(self, x):
            y = self.encode(x)
            return self.decode(y)

        def reconst_error(self, x, noise):
            tilde_x = x * noise
            reconst_x = self.f_prop(tilde_x)
            error = -tf.reduce_mean(tf.reduce_sum(x * tf.log(reconst_x) + (1. - x) * tf.log(1. - reconst_x), axis=1))
            return error, reconst_x
        
    class Dense:
        def __init__(self, in_dim, out_dim, function):
            self.W = tf.Variable(rng.uniform(low=-0.08, high=0.08, size=(in_dim, out_dim)).astype('float32'), name='W')
            self.b = tf.Variable(np.zeros([out_dim]).astype('float32'))
            self.function = function
            self.params = [self.W, self.b]

            self.ae = Autoencoder(in_dim, out_dim, self.W, self.function)

        def f_prop(self, x):
            u = tf.matmul(x, self.W) + self.b
            self.z = self.function(u)
            return self.z

        def pretrain(self, x, noise):
            cost, reconst_x = self.ae.reconst_error(x, noise)
            return cost, reconst_x
        
    layers = [
    Dense(784, 500, tf.nn.sigmoid),
    Dense(500, 500, tf.nn.sigmoid),
    Dense(500, 10, tf.nn.softmax)
    ]
    
    # pre-training
    def sgd(cost, params, eps=np.float32(0.1)):
        g_params = tf.gradients(cost, params)
        updates = []
        for param, g_param in zip(params, g_params):
            if g_param != None:
                updates.append(param.assign_add(-eps*g_param))
        return updates
    
    X = np.copy(train_X)

    sess = tf.Session()
    init = tf.global_variables_initializer()
    sess.run(init)
    for l, layer in enumerate(layers[:-1]):
        corruption_level = np.float(0.3)
        batch_size = 100
        n_batches = X.shape[0] // batch_size
        n_epochs = 10

        x = tf.placeholder(tf.float32)
        noise = tf.placeholder(tf.float32)

        cost, reconst_x = layer.pretrain(x, noise)
        params = layer.params
        train = sgd(cost, params)
        encode = layer.f_prop(x)

        for epoch in range(n_epochs):
            X = shuffle(X, random_state=random_state)
            err_all = []
            for i in range(n_batches):
                start = i * batch_size
                end = start + batch_size

                _noise = rng.binomial(size=X[start:end].shape, n=1, p=1-corruption_level)
                _, err = sess.run([train, cost], feed_dict={x: X[start:end], noise: _noise})
                err_all.append(err)
            # print('Pretraining:: layer: %d, Epoch: %d, Error: %lf' % (l+1, epoch+1, np.mean(err)))
        X = sess.run(encode, feed_dict={x: X})
    
    # set graph and params
    x = tf.placeholder(tf.float32, [None, 784])
    t = tf.placeholder(tf.float32, [None, 10])

    def f_props(layers, x):
        params = []
        for layer in layers:
            x = layer.f_prop(x)
            params += layer.params
        return x, params

    y, params = f_props(layers, x)

    cost = -tf.reduce_mean(tf.reduce_sum(t * tf.log(y), 1))
    updates = sgd(cost, params)

    train = tf.group(*updates)
    valid = tf.argmax(y, 1)
    
    # learning
    n_epochs = 432
    batch_size = 100
    n_batches = train_X.shape[0] // batch_size

    for epoch in range(n_epochs):
        train_X, train_y = shuffle(train_X, train_y, random_state=random_state)
        for i in range(n_batches):
            start = i * batch_size
            end = start + batch_size
            sess.run(train, feed_dict={x: train_X[start:end], t: train_y[start:end]})
    
    pred_y = sess.run(valid, feed_dict={x: test_X})

    return pred_y

- 以下のvalidate_homework関数を用いてエラーが起きないか動作確認をして下さい。
- 提出に際して、以下のscore_homework関数で60分で実行が終わることを確認して下さい。
- 評価は以下のscore_homework関数で行われますが、random_stateの値は変更されます。

# Checker Cell (for student)

In [None]:
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from tensorflow.examples.tutorials.mnist import input_data

import numpy as np
import tensorflow as tf

del [
    tf.app,
    tf.compat,
    tf.contrib,
    tf.errors,
    tf.gfile,
    tf.graph_util,
    tf.image,
    tf.layers,
    tf.logging,
    tf.losses,
    tf.metrics,
    tf.python_io,
    tf.resource_loader,
    tf.saved_model,
    tf.sdca,
    tf.sets,
    tf.summary,
    tf.sysconfig,
    tf.test,
    tf.train
]

def load_mnist():
    mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
    mnist_X = np.r_[mnist.train.images, mnist.test.images]
    mnist_y = np.r_[mnist.train.labels, mnist.test.labels]
    return train_test_split(mnist_X, mnist_y, test_size=0.2, random_state=42)

def validate_homework():
    train_X, test_X, train_y, test_y = load_mnist()

    # validate for small dataset
    train_X_mini = train_X[:100]
    train_y_mini = train_y[:100]
    test_X_mini = test_X[:100]
    test_y_mini = test_y[:100]

    pred_y = homework(train_X_mini, train_y_mini, test_X_mini)
    print(f1_score(np.argmax(test_y_mini, 1), pred_y, average='macro'))

def score_homework():
    train_X, test_X, train_y, test_y = load_mnist()
    pred_y = homework(train_X, train_y, test_X)
    print(f1_score(np.argmax(test_y, 1), pred_y, average='macro'))

In [None]:
validate_homework()
#score_homework()