# Neural Architecture Search with Reinforcement Learning

## 0. Paper

### Info
* TItle : Neural Architecture Search with Reinforcement Learning
* Author : Barret Zoph, Quoc V Le
* Link : https://arxiv.org/pdf/1611.01578.pdf

### Summary
* RNN 기반의 control network가 child network의 모델 구조 decoding
* child network의 validation accuracy를 reward로 사용
* REINFORCE 알고리즘으로 control network 학습

### Features
* Skip connection 제외
* Search space 축소
* child model 메모리 문제 확인 필요

## 1. Setting

In [2]:
# Libraries
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow.keras.backend as K

In [29]:
# Hyperparameters
CONFIG = {
    'base_dir' : '/content/drive/Shared drives/Yoon/Project/Doing/Deep Learning Paper Implementation',
    'baseline_decay' : 0.9,
    'batch_size' : 128,
    'epoch_size' : 10,
    'step_size' : 30
}

## 2. Data

In [4]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

In [5]:
x_train = x_train.astype(np.float32) / 255
x_test = x_test.astype(np.float32) / 255

In [6]:
random_indices = np.random.permutation(len(x_train))
val_indices = random_indices[:5000]
train_indices = random_indices[5000:]
x_val, y_val = x_train[val_indices], y_train[val_indices]
x_train, y_train = x_train[train_indices], y_train[train_indices]

## 3. Model

In [8]:
class Model(tf.keras.Model):
    def __init__(self, num_layer, search_dict):
        super().__init__()
        self.num_layer = num_layer
        self.search_dict = search_dict
        self.search_list = search_dict_to_list(search_dict)
        self.key_idx = search_dict_to_key_idx(search_dict)

        self.embedding = tf.keras.layers.Embedding(len(self.search_list), 35)
        self.lstm1 = tf.keras.layers.LSTM(35, return_sequences=True)
        self.lstm2 = tf.keras.layers.LSTM(35)
        self.fc = tf.keras.layers.Dense(len(self.search_list))
    
    def call(self, x):
        x = self.embedding(x)
        x = self.lstm1(x)
        x = self.lstm2(x)
        x = self.fc(x)
        return x

    def _decode(self, inputs, key):
        inputs = np.array(inputs)
        prob = self(inputs)

        key_idx = self.key_idx[key]
        mask = np.ones((1, len(self.search_list))).astype(np.float32)
        mask[0][key_idx[0]:key_idx[1]] = 0.0

        prob += mask * -1e7
        output = tf.random.categorical(prob, 1)[0][0].numpy()
        value = int(self.search_list[output].split('_')[-1])
        return output, value
    
    def decode(self):
        inputs = [[0]]
        child_model = tf.keras.Sequential()
        child_model.add(tf.keras.layers.Input((32, 32, 3)))

        for i in range(self.num_layer):
            kwargs = {}
            for k in self.search_dict.keys():
                output, value = self._decode(inputs, k)
                inputs[0].append(output)
                kwargs[k] = value

            layer = output_to_layer(**kwargs)
            child_model.add(layer)
            child_model.add(tf.keras.layers.MaxPool2D())

        child_model.add(tf.keras.layers.Flatten())
        child_model.add(tf.keras.layers.Dense(64, activation='relu'))
        child_model.add(tf.keras.layers.Dense(10, activation='softmax'))

        child_model.compile(
            loss = 'sparse_categorical_crossentropy',
            metrics = ['acc'],
            optimizer = tf.keras.optimizers.Adam()
        )
        inputs = np.array(inputs)
        return child_model, inputs
        
def search_dict_to_list(search_dict):
    search_list = ['[START]']
    for k, v in search_dict.items():
        for _v in v:
            search_list.append(f'{k}_{_v}')
    return search_list

def output_to_layer(filters, kernel_width, kernel_height):
    return tf.keras.layers.Conv2D(filters, (kernel_width, kernel_height), padding='same', activation='relu')

def search_dict_to_key_idx(search_dict):
    key_idx = {}
    idx = 1
    for k, v in search_dict.items():
        key_idx[k] = [idx, idx+len(v)]
        idx += len(v)
    return key_idx

## 4. Train

In [30]:
@tf.function
def train_step(model, optimizer, inputs, reward):
    
    with tf.GradientTape() as tape:
        loss = []
        for i in range(1, len(inputs[0])+1):
            _inputs = inputs[:,:i]
            _prob = model(_inputs)
            _loss = -_prob * reward
            loss.append(_loss)
        loss = tf.reduce_sum(loss)

    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

In [31]:
num_layer = 5
search_dict = {
    'filters' : [24, 36],
    'kernel_width' : [1, 3, 5],
    'kernel_height' : [1, 3, 5],
    }

model = Model(num_layer, search_dict)
optimizer = tf.keras.optimizers.Adam()

In [None]:
baseline = 0.0
hist = {'loss':[], 'child_acc':[]}

for st in range(CONFIG['step_size']):
    child_model, inputs = model.decode()
    child_model.fit(x_train, y_train, batch_size=CONFIG['batch_size'], epochs=CONFIG['epoch_size'], verbose=0)
    child_loss, child_acc = child_model.evaluate(x_val, y_val, batch_size=CONFIG['batch_size'], verbose=0)
    
    reward = child_acc - baseline
    base_line = CONFIG['baseline_decay']*baseline + (1-CONFIG['baseline_decay']) * child_acc
    loss = train_step(model, optimizer, inputs, reward)
    hist['loss'].append(loss)
    hist['child_acc'].append(child_acc)
    print(f'STEP : {str(st).zfill(2)} | Loss : {loss:.3f} | Child ACC : {child_acc:.3f}')

    K.clear_session()


In [None]:
fig, ax = plt.subplots(1, 2, figsize=(13, 7))
ax[0].plot(hist['loss'])
ax[1].plot(hist['child_acc'])

ax[0].set_title('Loss')
ax[1].set_title('Child ACC')
plt.show()

## 5. Test

In [None]:
child_model, inputs = model.decode()

In [None]:
child_model.summary()