# Optimizer

## スクラッチで実装

In [2]:
from torch import nn
import torch
import numpy as np
import matplotlib.pyplot as plt
from torch.nn import functional as F
from sklearn import datasets
from sklearn.model_selection import train_test_split

In [50]:
class MLP(nn.Module):
    
    def __init__(self, num_in, num_hidden, num_out):
        # 親のclassの継承
        super().__init__()
        self.l1 = nn.Linear(num_in, num_hidden)
        self.l2 = nn.Linear(num_hidden, num_out) # 出力層の全結合層

    # forwardの処理(順伝播の処理)
    def forward(self, x):
        x = self.l2(F.relu(self.l1(x)))
        return x

In [51]:
model = MLP(64, 30, 10)
model.parameters()  # generator

<generator object Module.parameters at 0x7ffefe90cd60>

In [52]:
class Optimizer():

    def __init__(self, parameters, lr=0.03):
        self.parameters = list(parameters)
        self.lr = lr

    def step(self):
        with torch.no_grad():
            for param in self.parameters:
                param -= self.lr * param.grad

    def zero_grad(self):
        for param in self.parameters:
            if param.grad is not None:  # if文がないとgradが作られていないのに呼ばれてしまう
                param.grad_zero_()
                



In [53]:
class Optimizer():
    def __init__(self, parameters, lr=0.03):
        self.parameters = list(parameters)
        self.lr = lr

    def step(self):
        with torch.no_grad():
            for param in self.parameters:
                param -= self.lr * param.grad

    def zero_grad(self):
        for param in self.parameters:
            if param.grad is not None:
                param.grad.zero_()

# # Usage
# opt = Optimizer(model.parameters(), lr)  # Pass model.parameters() here


In [54]:
model = MLP(64, 30, 10)
model.parameters()  # generator

<generator object Module.parameters at 0x7ffefe856d60>

In [55]:
learning_rate = 0.01
opt = Optimizer(parameters = model.parameters(), lr=learning_rate)

In [56]:
# nnクラスとF.関数を組み合わせて作成する
# # MLP(親のクラス(継承する))
# class MLP(nn.Module):
    
#     def __init__(self, num_in, num_hidden, num_out):
#         # 親のclassの継承
#         super().__init__()
#         self.l1 = nn.Linear(num_in, num_hidden)
#         self.l2 = nn.Linear(num_hidden, num_out) # 出力層の全結合層

#     # forwardの処理(順伝播の処理)
#     def forward(self, x):
#         x = self.l2(F.relu(self.l1(x)))
#         return x

# class Optimizer():

#     def __init__(self, parameters, lr=0.03):
#         self.parameters = list(parameters)
#         self.lr = lr

#     def step(self):
#         with torch.no_grad():
#             for param in self.parameters():
#                 param -= self.lr * param.grad

#     def zero_grad(self):
#         for param in self.parameters:
#             if param.grad is not None:  # if文がないとgradが作られていないのに呼ばれてしまう
#                 param.grad_zero_()

## Refactoring後の学習ループ(OptimizerやDataset, Dataloaderの後にRefactaring)
# ===データの準備===
dataset = datasets.load_digits()
data = dataset['data']
target = dataset['target']
images = dataset['images']
X_train, X_val, y_train, y_val = train_test_split(images, target, test_size=0.2, random_state=42)
X_mean = X_train.mean()
X_std = X_train.std()

X_train = (X_train - X_mean) / X_std
X_val  = (X_val  - X_mean ) / X_std

X_train = torch.tensor(X_train.reshape(-1, 64), dtype=torch.float32)
X_val = torch.tensor(X_val.reshape(-1, 64), dtype=torch.float32)

# yの値をone-hotしないようにする
y_train = torch.tensor(y_train)
y_val = torch.tensor(y_val)

batch_size = 30
num_in = 64
num_hidden = 30
num_out = 10
num_batches = np.ceil(len(y_train)/batch_size).astype(int)


learning_rate = 0.01

# モデルの初期化
# model = MLP(num_in, num_hidden, num_out)
# opt = Optimizer(parameters = model.parameters(), lr=learning_rate)

# ログ
train_losses = []
val_losses = []
val_accuracies = []

for epoch in range(100):
    # エポックごとにデータをシャッフルする
    shuffled_indices = np.random.permutation(len(y_train))

    running_loss = 0.0

    for i in range(num_batches):
    
        # mini batchの作成
        start = i * batch_size
        end = start + batch_size
    
        batch_indices = shuffled_indices[start:end]
        y = y_train[batch_indices] # batch_size x 10
    
        X = X_train[batch_indices, :] # batc_size x 64
        # 順伝播と逆伝播の計算
        preds  = model(X)
        loss = F.cross_entropy(preds, y)
        loss.backward()
        running_loss += loss.item()
    
        # パラメータ更新 ->ここをOptimizerクラスにする
        # with torch.no_grad():
        #     for param in model.parameters():
        #         param -= learning_rate * param.grad
    
        # model.zero_grad()
        opt.step()
        opt.zero_grad()

    # validtion
    with torch.no_grad():
        preds_val = model(X_val)
        val_loss = F.cross_entropy(preds_val, y_val)
    
        val_accuracy = torch.sum(torch.argmax(preds_val, dim=-1) == y_val) / y_val.shape[0]

    train_losses.append(running_loss/num_batches)
    val_losses.append(val_loss.item())
    val_accuracies.append(val_accuracy)
    print(f'epoch : {epoch}: train error : {running_loss/num_batches}, validation error : {val_loss.item()},val accuracy {val_accuracy.item()}')



epoch : 0: train error : 2.3478324760993323, validation error : 2.2816321849823,val accuracy 0.125
epoch : 1: train error : 2.2280027717351913, validation error : 2.1843042373657227,val accuracy 0.2944444417953491
epoch : 2: train error : 2.1168961028258004, validation error : 2.0786049365997314,val accuracy 0.4749999940395355
epoch : 3: train error : 1.9937495614091556, validation error : 1.9558337926864624,val accuracy 0.5166666507720947
epoch : 4: train error : 1.855493202805519, validation error : 1.8156661987304688,val accuracy 0.5972222089767456
epoch : 5: train error : 1.7014529009660084, validation error : 1.656975507736206,val accuracy 0.6222222447395325
epoch : 6: train error : 1.5375009129444759, validation error : 1.4861102104187012,val accuracy 0.6833333373069763
epoch : 7: train error : 1.371579386293888, validation error : 1.3145887851715088,val accuracy 0.7333333492279053
epoch : 8: train error : 1.212833049396674, validation error : 1.1542993783950806,val accuracy 0.80