In [None]:
import pickle
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
# データ読み込み
notepc_data = pickle.load(open("notepc_data.pkl", "br"))

# カラム
# 0: price
# 1: num_of_cores
# 2: clock
# 3: memory
# 4: disk
# 5: weight
# 6: months

In [None]:
# 実際に使うデータを切り出し
price = np.array([t[0] for t in notepc_data], dtype=np.float32)
clock = np.array([t[2] for t in notepc_data], dtype=np.float32)

In [None]:
# データ可視化
plt.scatter(clock, price)
plt.xlabel("clock")
plt.ylabel("price")

In [None]:
# モデルのプロット用関数
x_val = np.linspace(1., 3.2, 101)

def plot(w, b):
    plt.plot(x_val, w * x_val + b, color="orange")
    plt.scatter(clock, price)
    plt.xlabel("clock")
    plt.ylabel("price")
    plt.ylim([price.min() - 15000, price.max() + 15000])

In [None]:
# 途中経過表示用の関数

fmt = "iter: {:>3d}, cost: {:>11.0f}, w: {:>7.0f}, b: {:>6.0f}"
def report(i, cost, w, b):
    cf = float(cost)
    wf = float(w)
    bf = float(b)
    print(fmt.format(i, cost, w, b))

In [None]:
# パラメータの初期値設定、モデル可視化
w_cur = -100000
b_cur = 200000

plot(w_cur, b_cur)

In [None]:
# レコード数 定義
n = float(price.size)
n

In [None]:
# コスト計算用の関数 (1)

def calc_cost1(w, b):
    cost = 0.
    for x, t in zip(clock, price):
        y = w * x + b
        cost += 0.5 * ((y - t) ** 2.)
        
    cost /= n
    return cost

In [None]:
# コストの wでの微分計算 (1)
def w_deriv1(w, b):
    deriv = 0.
    for x, t in zip(clock, price):
        y = w * x + b
        deriv += (y - t) * x
        
    deriv /= n
    return deriv

# コストの bでの微分計算 (1)
def b_deriv1(w, b):
    deriv = 0.
    for x, t in zip(clock, price):
        y = w * x + b
        deriv += y - t
        
    deriv /= n
    return deriv

In [None]:
# コスト計算用の関数 (2)

def calc_cost2(w, b):
    y = w * clock + b
    cost = 0.5 * np.sum((y - price) ** 2.)
    cost /= n
    return cost

In [None]:
# コストの wでの微分計算 (2)
def w_deriv2(w, b):
    y = w * clock + b
    deriv = np.sum((y - price) * clock) / n
    return deriv

# コストの bでの微分計算 (2)
def b_deriv2(w, b):
    y = w * clock + b
    deriv = np.sum(y - price) / n
    return deriv

In [None]:
# 実際の計算
w_cur = -100000
b_cur = 200000

alpha = 0.1
history = list()

for i in range(1000):
    # 現状のコスト値を計算、記録
    cost = calc_cost1(w_cur, b_cur)
    history.append((cost, w_cur, b_cur))
    
    # 進捗モニタリング
    report(i, cost, w_cur, b_cur)

    # パラメータ更新
    w_new = w_cur - alpha * w_deriv1(w_cur, b_cur)
    b_new = b_cur - alpha * b_deriv1(w_cur, b_cur)
    
    w_cur = w_new
    b_cur = b_new

In [None]:
# 結果として得られたモデルの可視化
plot(w_cur, b_cur)

In [None]:
# 学習曲線
plt.plot([t[0] for t in history])
plt.xlabel("iterations")
plt.ylabel("cost")

In [None]:
# 途中経過の可視化

In [None]:
# 表示に必要な諸々の初期設定
grid_n = 201
w_ls = np.linspace(-150000, 150000, grid_n)
b_ls = np.linspace(-300000, 300000, grid_n)
 
w_grid, b_grid = np.meshgrid(w_ls, b_ls)
cost_v_func = np.vectorize(calc_cost2)
y_grid = cost_v_func(w_grid, b_grid)

levels = list()
step = 50000000
v = 1300000000
for i in range(20):
    levels.append(v)
    v = v + step
    step *= 2

In [None]:
def visualize_state(h):
    itr, w, b = h
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.contour(w_grid, b_grid, y_grid, levels=levels, colors=["lightgray"]);
    plt.scatter([w], [b], marker="x", color="r", s=48)
    plt.xlabel("w")
    plt.ylabel("b")
    
    plt.subplot(1, 2, 2)
    plot(w, b)

In [None]:
visualize_state(history[0])

In [None]:
visualize_state(history[1])

In [None]:
visualize_state(history[3])

In [None]:
visualize_state(history[10])

In [None]:
visualize_state(history[20])

In [None]:
visualize_state(history[50])

In [None]:
visualize_state(history[100])

In [None]:
visualize_state(history[200])

In [None]:
visualize_state(history[500])

In [None]:
visualize_state(history[999])