## Colaboratory用
ランタイムをGPUにする
  - ランタイム -> ランタイムの変更
  - 「ハードウェアアクセラレータ」でGPUを選択

In [0]:
# 必要なパッケージのインストール

!apt -y install libcusparse8.0 libnvrtc8.0 libnvtoolsext1
!ln -snf /usr/lib/x86_64-linux-gnu/libnvrtc-builtins.so.8.0 /usr/lib/x86_64-linux-gnu/libnvrtc-builtins.so
!pip install https://github.com/kmaehashi/chainer-colab/releases/download/2018-02-06/cupy_cuda80-4.0.0b3-cp36-cp36m-linux_x86_64.whl
!pip install 'chainer==4.0.0b3'

## Import
お決まりのimprot

In [0]:
import sys
import numpy as np
import chainer
from chainer import cuda, Function, gradient_check, report, training, utils, Variable
from chainer import datasets, iterators, optimizers, serializers
from chainer import Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L
from chainer.training import extensions

In [0]:
import random

# CIFAR-10

## データセットのダウンロード

In [0]:
train_and_val, test = datasets.get_cifar10(ndim=3)

データの中身を見てみる

In [0]:
cifar10_labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

In [0]:
import matplotlib.pyplot as plt
% matplotlib inline

sample = train_and_val[0]
image = sample[0]
label = sample[1]

plt.imshow(image.transpose((1, 2, 0)))
print('Label:', cifar10_labels[label])

## 【小課題】 DataAugmentationの実装
CIFAR-10のデータを対象に、

- Horizontal flipping
- Random cropping

を実装する

In [0]:
def horizontal_flipping(img):
    ### Implement here!

    ###
    return flipped_img

In [0]:
# Preview
img = train_and_val[0][0]
img = horizontal_flipping(img)
plt.imshow(img.transpose((1, 2, 0)))

In [0]:
def random_cropping(img, crop_size):
    ### Implement here!

    ###
    return cropped_img

In [0]:
# Preview
img = train_and_val[0][0]
img = random_cropping(img, 24)
plt.imshow(img.transpose((1, 2, 0)))

# CNNでImage classification

## モデル定義

例: LeNet5
https://docs.chainer.org/en/stable/tutorial/convnet.html#lenet5

In [0]:
class LeNet5(Chain):
    def __init__(self, in_channels):
        super(LeNet5, self).__init__()
        with self.init_scope():
            self.conv1 = L.Convolution2D(
                in_channels=in_channels, out_channels=6, ksize=5, stride=1)
            self.conv2 = L.Convolution2D(
                in_channels=6, out_channels=16, ksize=5, stride=1)
            self.conv3 = L.Convolution2D(
                in_channels=16, out_channels=120, ksize=4, stride=1)
            self.fc4 = L.Linear(None, 84)
            self.fc5 = L.Linear(84, 10)

    def __call__(self, x):
        h = F.sigmoid(self.conv1(x))
        h = F.max_pooling_2d(h, 2, 1)
        h = F.sigmoid(self.conv2(h))
        h = F.max_pooling_2d(h, 2, 1)
        h = F.sigmoid(self.conv3(h))
        h = F.sigmoid(self.fc4(h))
        if chainer.config.train:
            return self.fc5(h)
        return F.softmax(self.fc5(h))

### 【課題】自分でCNNモデルを定義してみる
例：
```
Convolution (out_channels=32, k=5, s=1, p=0)
ReLU
BatchNormalization
MaxPooling (k=2, s=1)
Convolution (out_channels=64, k=3, s=1, p=0)
ReLU
BatchNormalization
Convolution (out_channels=64, k=3, s=1, p=0)
ReLU
BatchNormalization
MaxPooling (k=2, s=1)
FullyConnected (out_channels=512)
ReLU
FullyConnected (out_channels=10)
```

##### 参考: 比較的よく採用される構成
- ReLU + Batch Normalization
  - どちらが先か、など試行錯誤も
- FC層は少なめ。FCはゼロで最後はGlobal Average Poolingにする構成も。

In [0]:
class MyModel(Chain):
    ### Implement here!

    ###

### データセットを取得

In [0]:
# train_and_valをtrainとvalidationに分割
from chainer.datasets import split_dataset_random

n_train = 40000
train, validation = split_dataset_random(train_and_val, n_train, seed=42)

### 【課題】 DataAugmentationを行うDatasetを実装する

`chainer.datasets.get_cifar10` からはCIFAR10のデータをwrapしたデータセットオブジェクトが得られます (`chainer.datasets.tuple_dataset.TupleDataset`)。

```python
train_and_val, test = datasets.get_cifar10(ndim=3)
```

この`train_and_val`や`test`は、そのままイテレータオブジェクト(`iterators.SerialIterator`など)に渡すことができます。

```python
test_iter = iterators.SerialIterator(test, batch_size=100, shuffle=False, repeat=False)
```

CIFAR10をそのまま使う場合はこれで良いのですが、
CIFAR10のデータに何らかの前処理を行ったりする場合は、独自にデータセットオブジェクト（データセットクラス）を作ることができます。

`chainer.dataset.DatasetMixin` を継承したクラスを作り、`get_example(self, i)` メソッドを実装します。

例:
```python
class MyDataset(chainer.dataset.DatasetMixin):
    def get_example(self, i):
        image = # get image data
        label = # get label data
        return image, label
```

data augmentation（とmean subtraction）を行う独自データセットクラス`PreprocessedDataset`を実装してみましょう。

In [0]:
# chainer.datasetが扱う画像データは、sklearnとチャネルが違うので注意
# sklearn: (y, x, color)
# chainer: (color, y, x)
img, label = train.get_example(0)
print('Shape: ', img.shape)

plt.imshow(img.transpose(1, 2, 0))  # (color, y, x) を (y, x, color) に変換して表示

In [0]:
class PreprocessedDataset(chainer.dataset.DatasetMixin):

    def __init__(self, base, mean=None, crop_size=30, random=True):
        if mean is None:
            mean = np.zeros((3, 1, 1))
        
        self.base = base
        self.mean = mean.astype('f').reshape((3, 1, 1))
        self.crop_size = crop_size
        self.random = random

    def __len__(self):
        return len(self.base)

    def get_example(self, i):
        image, label = self.base[i]

        ### Implement here!
        # If random == True, apply the following processings to a given image
        # 1. Random cropping
        # 2. Random horizontal flipping

        ###
        
        image -= self.mean # Subtract mean
        return image, label

## 学習

In [0]:
# Data augmentationを適用
train = PreprocessedDataset(train)
validation = PreprocessedDataset(validation, random=False)

In [0]:
train_iter = iterators.SerialIterator(train, batch_size=100, shuffle=True)
validation_iter = iterators.SerialIterator(validation, batch_size=100, repeat=False, shuffle=False)

In [0]:
device = 0  # GPU:0, CPU: -1

In [0]:
net = MyModel()
model = L.Classifier(net)

optimizer = optimizers.Adam()
optimizer.setup(model)

updater = training.StandardUpdater(train_iter, optimizer, device=device)
trainer = training.Trainer(updater, (100, 'epoch'), out='result')

trainer.extend(extensions.Evaluator(validation_iter, model, device=device))
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport(['epoch', 'main/accuracy', 'validation/main/accuracy']))

trainer.run()

## 評価

In [0]:
import json

with open('result/log') as f:
    logs = json.load(f)

In [0]:
loss_train = [log['main/loss'] for log in logs]
loss_validation = [log['validation/main/loss'] for log in logs]

In [0]:
plt.plot(loss_train, label='loss_train')
plt.plot(loss_validation, label='loss_validation')
plt.legend()
plt.show()

In [0]:
test = PreprocessedDataset(test, random=False)

In [0]:
test_iter = chainer.iterators.SerialIterator(test, batch_size=100, repeat=False, shuffle=False)

In [0]:
from chainer.dataset import concat_examples

test_accuracies = []

while True:
    test_batch = test_iter.next()
    x_test, t_test = concat_examples(test_batch, device)
    
    pred_test = net(x_test)
    accuracy = F.accuracy(pred_test, t_test)
    accuracy.to_cpu()
    test_accuracies.append(accuracy.data)
    
    if test_iter.is_new_epoch:
        test_iter.epoch = 0
        test_iter.current_position = 0
        test_iter.is_new_epoch = False
        test_iter._pushed_position = None
        break

print('Accuracy: {}'.format(np.mean(test_accuracies)))

## 保存

In [0]:
serializers.save_npz('my.model', model)