In [1]:
import numpy as np
from sklearn import datasets

In [2]:
train_ratio = 0.8
rand_std = 0.030
learning_rate = 0.001
epoch_count = 100
report_period = 10
random_fix = True

In [3]:
iris_dataset = datasets.load_iris()

data = iris_dataset.data
target = iris_dataset.target
target_names = iris_dataset.target_names

#print("dimension: data{}, target{}, target_names{}".
#    format(data.shape, target.shape, target_names.shape))
#print(target_names)
#print(data[:5])
#print(target[-5:])

In [4]:
if random_fix: np.random.seed(1234)

data_count = len(data)
train_count = int(data_count * train_ratio)
test_count = data_count - train_count
    
indices = np.arange(data_count)
np.random.shuffle(indices)
    
train_data = data[indices[0:train_count]]
train_target = target[indices[0:train_count]]
    
test_data = data[indices[train_count:data_count]]
test_target = target[indices[train_count:data_count]]

In [5]:
input_dim, output_dim = 3, 1

def get_test_data():
    test_X = test_data[:, 0:3]
    test_Y = test_data[:, 3:4]

    return test_X, test_Y

def get_train_data(batch_size, nth):
    global indices
    
    if nth == 0:
        indices = np.arange(train_count)
        np.random.shuffle(indices)
        
    from_idx = nth * batch_size
    to_idx = (nth + 1) * batch_size
    
    train_X = train_data[indices[from_idx:to_idx], 0:3]
    train_Y = train_data[indices[from_idx:to_idx], 3:4]   
    
    return train_X, train_Y

In [6]:
def init_rand_normal(in_dim, out_dim):
    init_64 = np.random.normal(0, rand_std, [in_dim, out_dim])
    init = init_64.astype('float32')
    return init

In [7]:
hidden_dim = 10

def init_parameter():
    if random_fix: np.random.seed(9876)

    global w_hid, b_hid, w_out, b_out
    
    w_hid = init_rand_normal(input_dim, hidden_dim)
    b_hid = np.zeros([hidden_dim])

    w_out = init_rand_normal(hidden_dim, output_dim)
    b_out = np.zeros([output_dim])

In [8]:
def relu(x):
    return np.maximum(x, 0)     # np.max(x, 0)

def relu_derv(y):
    return np.sign(y)

In [9]:
def proc_forward(x):
    global w_hid, b_hid, w_out, b_out
    global hidden
    
    hidden = relu(np.matmul(x, w_hid) + b_hid)
    output = np.matmul(hidden, w_out) + b_out
    
    return output

def proc_backward(x, grad):
    global w_hid, b_hid, w_out, b_out
    global hidden
    
    w_out_derv = hidden.transpose()
    w_out_grad = np.matmul(w_out_derv, grad)
    
    b_out_grad = np.sum(grad, axis=0)
    
    hidden_derv = w_out.transpose()
    hidden_grad = np.matmul(grad, hidden_derv)
    
    hidden_affine_derv = relu_derv(hidden)
    hidden_affine_grad = hidden_affine_derv * hidden_grad
    
    w_hid_derv = x.transpose()
    w_hid_grad = np.matmul(w_hid_derv, hidden_affine_grad)
    
    b_hid_grad = np.sum(hidden_affine_grad, axis=0)
    
    w_hid = w_hid - learning_rate * w_hid_grad
    b_hid = b_hid - learning_rate * b_hid_grad
    
    w_out = w_out - learning_rate * w_out_grad
    b_out = b_out - learning_rate * b_out_grad

In [10]:
def eval_accuracy(output, y):
    return 1 - np.mean(np.abs(output-y) / y)

def test(x, y):
    output = proc_forward(x)
    return eval_accuracy(output, y)

def train_step(x, y):
    output = proc_forward(x)
    diff = output - y
    power = np.power(diff, 2)
    loss = np.mean(power)
    
    loss_grad = 1.0
    
    power_derv = np.ones_like(y) / np.prod(y.shape)
    power_grad = power_derv * loss_grad
    
    diff_derv = 2 * diff
    diff_grad = diff_derv * power_grad
    
    output_derv = 1
    output_grad = output_derv * diff_grad
    
    proc_backward(x, output_grad)
    
    return loss, eval_accuracy(output, y)

In [11]:
def train_and_test(batch_size=0):
    if batch_size == 0: batch_size = train_count
    batch_count = int(train_count / batch_size)
    test_X, test_Y = get_test_data()
        
    init_parameter()
    
    if random_fix: np.random.seed(1945)
        
    for epoch in range(epoch_count):
        costs = []
        accs = []
        for n in range(batch_count):
            train_X, train_Y = get_train_data(batch_size, n)
            cost, acc = train_step(train_X, train_Y)
            costs.append(cost)
            accs.append(acc)
            
        if (epoch+1) % report_period == 0:
            acc = test(test_X, test_Y)
            print("Epoch {}: cost={:5.3f}, accuracy={:5.3f}/{:5.3f}". \
                  format(epoch+1, np.mean(costs), np.mean(accs), acc))
            
    final_acc = test(test_X, test_Y)
    print("\nFinal Test: final accuracy = {:5.3f}".format(final_acc))

In [12]:
train_and_test(10)

Epoch 10: cost=0.548, accuracy=0.070/-0.261
Epoch 20: cost=0.265, accuracy=-0.221/-0.648
Epoch 30: cost=0.229, accuracy=-0.130/-0.556
Epoch 40: cost=0.196, accuracy=-0.040/-0.436
Epoch 50: cost=0.165, accuracy=0.074/-0.317
Epoch 60: cost=0.137, accuracy=0.179/-0.184
Epoch 70: cost=0.112, accuracy=0.277/-0.049
Epoch 80: cost=0.092, accuracy=0.388/0.071
Epoch 90: cost=0.077, accuracy=0.468/0.194
Epoch 100: cost=0.066, accuracy=0.555/0.294

Final Test: final accuracy = 0.294


In [13]:
train_and_test()

Epoch 10: cost=1.866, accuracy=0.065/0.088
Epoch 20: cost=1.779, accuracy=0.130/0.170
Epoch 30: cost=1.684, accuracy=0.201/0.259
Epoch 40: cost=1.580, accuracy=0.265/0.283
Epoch 50: cost=1.462, accuracy=0.335/0.310
Epoch 60: cost=1.329, accuracy=0.359/0.294
Epoch 70: cost=1.183, accuracy=0.341/0.237
Epoch 80: cost=1.028, accuracy=0.305/0.158
Epoch 90: cost=0.871, accuracy=0.249/0.066
Epoch 100: cost=0.723, accuracy=0.181/-0.042

Final Test: final accuracy = -0.042


In [14]:
train_and_test(1)

Epoch 10: cost=0.074, accuracy=0.521/0.291
Epoch 20: cost=0.049, accuracy=0.783/0.668
Epoch 30: cost=0.048, accuracy=0.795/0.704
Epoch 40: cost=0.046, accuracy=0.797/0.712
Epoch 50: cost=0.045, accuracy=0.802/0.692
Epoch 60: cost=0.044, accuracy=0.808/0.692
Epoch 70: cost=0.044, accuracy=0.799/0.702
Epoch 80: cost=0.043, accuracy=0.809/0.673
Epoch 90: cost=0.043, accuracy=0.804/0.707
Epoch 100: cost=0.042, accuracy=0.799/0.702

Final Test: final accuracy = 0.702
