# 使用するライブラリ

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import time
import tracemalloc

In [2]:
from common.optimizer import SGD,CGOptimizer
from two_layer_net import TwoLayerNet
from dataset.mnist import load_mnist

# 評価ログ用変数

In [3]:
loss_list_SGD = []
time_list_SGD = []
loss_list_CG = []
time_list_CG = []

# 計算時間・メモリ計測付き学習関数

In [4]:
def train_with_measurement(
    network,
    optimizer,
    x_train,
    t_train,
    iters_num=1000,
    batch_size=100
):
    train_size = x_train.shape[0]
    loss_list = []

    # ===== メモリ計測開始 =====
    tracemalloc.start()

    # ===== 時間計測開始 =====
    start_time = time.perf_counter()

    for i in range(iters_num):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        grads = network.gradient(x_batch, t_batch)
        optimizer.update(network.params, grads)

        loss = network.loss(x_batch, t_batch)
        loss_list.append(loss)

    # ===== 計測終了 =====
    total_time = time.perf_counter() - start_time
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()

    return loss_list, total_time, peak

# 各optimizer実行

## MNISTデータ読み込み

In [5]:
(x_train, t_train), (x_test, t_test) = load_mnist(
    normalize=True,
    one_hot_label=True
)

## 初期パラメータ保存

In [6]:
base_network = TwoLayerNet(
    input_size=784,
    hidden_size=50,
    output_size=10
)

init_params = {k: v.copy() for k, v in base_network.params.items()}


## SGD

In [7]:
network_SGD = TwoLayerNet(784, 50, 10)
network_SGD.params = {k: v.copy() for k, v in init_params.items()}

optimizer_SGD = SGD(lr=0.01)

loss_list_SGD, time_SGD, peak_SGD = train_with_measurement(
    network_SGD,
    optimizer_SGD,
    x_train,
    t_train,
    iters_num=10000,
    batch_size=100
)

# CG法

### MNISTをSPD行列に変換

In [8]:
def mnist_to_spd_lowrank(x, d=50, eps=1e-3):
    """
    x: (784,)
    d: reduced dimension
    """
    x_small = x[:d]
    A = np.outer(x_small, x_small)
    A += eps * np.eye(d)
    return A


In [9]:
def build_spd_mnist(x, d=50, eps=1e-3):
    """
    x: (N, 784)
    return: (N, d, d)
    """
    N = x.shape[0]
    spd_data = np.zeros((N, d, d), dtype=np.float32)

    for i in range(N):
        spd_data[i] = mnist_to_spd_lowrank(x[i], d, eps)

    return spd_data


In [10]:
import pickle

(x_train, y_train), (x_test, y_test) = load_mnist()

N = 60000   
d = 50

spd_train = build_spd_mnist(x_train[:N], d=d)

spd_dataset = {
    "spd_train": spd_train,
    "label_train": y_train[:N],
    "d": d
}

with open("spd_mnist.pkl", "wb") as f:
    pickle.dump(spd_dataset, f)


### CG法実行

In [13]:
network_CG = TwoLayerNet(784, 50, 10)
network_CG.params = {k: v.copy() for k, v in init_params.items()}

optimizer_CG = CGOptimizer(spd_train, cg_iters=3)

loss_list_CG, time_CG, peak_CG = train_with_measurement(
    network_CG,
    optimizer_CG,
    x_train,
    t_train,
    iters_num=10000,
    batch_size=100
)


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 39200 is different from 50)

# 評価指標の算出

In [None]:
# 最終的な損失値、収束速度の計算
final_loss_SGD = loss_list_SGD[-1]
final_loss_CG = loss_list_CG[-1]

# 収束判定のための閾値設定
target_loss = 1.0

# 収束速度の計算
convergence_iter_SGD = next(
    (i for i, l in enumerate(loss_list_SGD) if l < target_loss),
    None
)

convergence_iter_CG = next(
    (i for i, l in enumerate(loss_list_CG) if l < target_loss),
    None
)

# 結果

In [None]:
print("===== Evaluation =====")

print(f"Final loss (SGD)              : {final_loss_SGD:.4f}")
print(f"Final loss (CG)               : {final_loss_CG:.4f}")

print(f"Convergence iteration (SGD)   : {convergence_iter_SGD}")
print(f"Convergence iteration (CG)    : {convergence_iter_CG}")

print(f"Total time [sec] (SGD)        : {time_SGD:.2f}")
print(f"Total time [sec] (CG)         : {time_CG:.2f}")

print(f"Peak memory [MB] (SGD)        : {peak_SGD / 1024 / 1024:.2f}")
print(f"Peak memory [MB] (CG)         : {peak_CG / 1024 / 1024:.2f}")
