# MNIST画像認識のtorch model ハイパーパラメータチューニング
それでは，実際にMNISTの画像認識を通してハイパーパラメータのチューニングを行いたいと思います．

### 実行環境???????
- Python : 3.6.8
- PyTorch : 0.4.1
- Optuna : 0.12.0

### チューニングを行うハイパーパラメータ
- 畳み込み層の数（3 ~ 7）
- 各畳み込み層のフィルタ数（16, 32, 48, ..., 128）
- 全結合層のユニット数（100, 200, 300, 400, 500）
- 活性化関数（ReLU, ELU）
- 最適化手法（Adam, MomentumSGD, rmsprop）
- 学習率（adam_lr(1e-10 ~ 1e-3), momentum_sgd_lr(1e-5 ~ 1e-1)）
- weight_decay（1e-10 ~ 1e-3）

## Load Dataset

In [8]:
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision import transforms
import numpy as np


BATCHSIZE = 128

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_set = MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_set, batch_size=BATCHSIZE, shuffle=True, num_workers=2)

test_set = MNIST(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(test_set, batch_size=BATCHSIZE, shuffle=False, num_workers=2)

classes = tuple(np.linspace(0, 9, 10, dtype=np.uint8))

ImportError: attempted relative import with no known parent package

## Define Model

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import optuna
optuna.logging.disable_default_handler()


#モデルの定義

#入力画像の高さと幅，畳み込み層のカーネルサイズ
in_height = 28
in_width = 28
kernel = 3
class Net(nn.Module):
  def __init__(self, trial, num_layer, mid_units, num_filters):
    super(Net, self).__init__()
    self.activation = get_activation(trial)
    #第1層
    self.convs = nn.ModuleList([nn.Conv2d(in_channels=1, out_channels=num_filters[0], kernel_size=3)])
    self.out_height = in_height - kernel +1
    self.out_width = in_width - kernel +1
    #第2層以降
    for i in range(1, num_layer):
      self.convs.append(nn.Conv2d(in_channels=num_filters[i-1], out_channels=num_filters[i], kernel_size=3))
      self.out_height = self.out_height - kernel + 1
      self.out_width = self.out_width - kernel +1
    #pooling層
    self.pool = nn.AvgPool2d(kernel_size=2, stride=2)
    self.out_height = int(self.out_height / 2)
    self.out_width = int(self.out_width / 2)
    #線形層
    self.out_feature = self.out_height * self.out_width * num_filters[num_layer - 1]
    self.fc1 = nn.Linear(in_features=self.out_feature, out_features=mid_units) 
    self.fc2 = nn.Linear(in_features=mid_units, out_features=10)
    
  def forward(self, x):
    for i, l in enumerate(self.convs):
      x = l(x)
      x = self.activation(x)
    x = self.pool(x)
    x = x.view(-1, self.out_feature)
    x = self.fc1(x)
    x = self.fc2(x)
    return F.log_softmax(x, dim=1)

## Train & Test

In [3]:
def train(model, device, train_loader, optimizer):
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        
def test(model, device, test_loader):
    model.eval()
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()
    return 1 - correct / len(test_loader.dataset)

## Tune Optimaisation

In [4]:
import torch.optim as optim

def get_optimizer(trial, model):
  optimizer_names = ['Adam', 'MomentumSGD', 'rmsprop']
  optimizer_name = trial.suggest_categorical('optimizer', optimizer_names)
  
  weight_decay = trial.suggest_loguniform('weight_decay', 1e-10, 1e-3)
  
  if optimizer_name == optimizer_names[0]: 
    adam_lr = trial.suggest_loguniform('adam_lr', 1e-5, 1e-1)
    optimizer = optim.Adam(model.parameters(), lr=adam_lr, weight_decay=weight_decay)
  elif optimizer_name == optimizer_names[1]:
    momentum_sgd_lr = trial.suggest_loguniform('momentum_sgd_lr', 1e-5, 1e-1)
    optimizer = optim.SGD(model.parameters(), lr=momentum_sgd_lr, momentum=0.9, weight_decay=weight_decay)
  else:
    optimizer = optim.RMSprop(model.parameters())
  
  return optimizer

## Tune activate function

In [5]:
def get_activation(trial):
    activation_names = ['ReLU', 'ELU']
    activation_name = trial.suggest_categorical('activation', activation_names)
    
    if activation_name == activation_names[0]:
        activation = F.relu
    else:
        activation = F.elu
    
    return activation

## Set the object function

In [6]:
EPOCH = 10
def objective(trial):
  device = "cuda" if torch.cuda.is_available() else "cpu"
  
  #畳み込み層の数
  num_layer = trial.suggest_int('num_layer', 3, 7)
  
  #FC層のユニット数
  mid_units = int(trial.suggest_discrete_uniform("mid_units", 100, 500, 100))
  
  #各畳込み層のフィルタ数
  num_filters = [int(trial.suggest_discrete_uniform("num_filter_"+str(i), 16, 128, 16)) for i in range(num_layer)]
  
  model = Net(trial, num_layer, mid_units, num_filters).to(device)
  optimizer = get_optimizer(trial, model)
  
  for step in range(EPOCH):
    train(model, device, train_loader, optimizer)
    error_rate = test(model, device, test_loader)
    
  return error_rate

## Execution

In [7]:
TRIAL_SIZE = 100
study = optuna.create_study()
study.optimize(objective, n_trials=TRIAL_SIZE)

  mid_units = int(trial.suggest_discrete_uniform("mid_units", 100, 500, 100))
  num_filters = [int(trial.suggest_discrete_uniform("num_filter_"+str(i), 16, 128, 16)) for i in range(num_layer)]
  weight_decay = trial.suggest_loguniform('weight_decay', 1e-10, 1e-3)
  adam_lr = trial.suggest_loguniform('adam_lr', 1e-5, 1e-1)
Trial 2 failed with parameters: {'num_layer': 7, 'mid_units': 400.0, 'num_filter_0': 64.0, 'num_filter_1': 32.0, 'num_filter_2': 48.0, 'num_filter_3': 48.0, 'num_filter_4': 16.0, 'num_filter_5': 64.0, 'num_filter_6': 32.0, 'activation': 'ELU', 'optimizer': 'Adam', 'weight_decay': 0.00034956728772355734, 'adam_lr': 4.4435816679583346e-05} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_16464/3192475094.py", line 18, in objective
    train(model, device, train_loader, optimize

KeyboardInterrupt: 

In [None]:
study.best_params

In [None]:
print(study.best_value)


# チューニングを行わなかった場合との比較
チューニングを行わなかった場合のハイパーパラメータの値は以下のように指定．

- 畳み込み層の数 : 3
- 各畳み込み層のフィルタ数 : 16, 32, 48
- 全結合層のユニット数 : 100
- 活性化関数 : ReLU
- 最適化手法 : Adam
- 学習率 : 0.001
- weight_decay : 0


|