In [1]:
import numpy as np
from sklearn import datasets

In [2]:
train_ratio = 0.8
rand_std = 0.030
learning_rate = 0.001
epoch_count = 100
report_period = 10
random_fix = True

In [3]:
iris_dataset = datasets.load_iris()

data = iris_dataset.data
target = iris_dataset.target
target_names = iris_dataset.target_names

#print("dimension: data{}, target{}, target_names{}".
#    format(data.shape, target.shape, target_names.shape))
#print(target_names)
#print(data[:5])
#print(target[-5:])

In [4]:
if random_fix: np.random.seed(1234)

data_count = len(data)
train_count = int(data_count * train_ratio)
test_count = data_count - train_count
    
indices = np.arange(data_count)
np.random.shuffle(indices)
    
train_data = data[indices[0:train_count]]
train_target = target[indices[0:train_count]]
    
test_data = data[indices[train_count:data_count]]
test_target = target[indices[train_count:data_count]]

In [5]:
input_dim, output_dim = 3, 1

def get_test_data():
    test_X = test_data[:, 0:3]
    test_Y = test_data[:, 3:4]

    return test_X, test_Y

def get_train_data(batch_size, nth):
    global indices
    
    if nth == 0:
        indices = np.arange(train_count)
        np.random.shuffle(indices)
        
    from_idx = nth * batch_size
    to_idx = (nth + 1) * batch_size
    
    train_X = train_data[indices[from_idx:to_idx], 0:3]
    train_Y = train_data[indices[from_idx:to_idx], 3:4]   
    
    return train_X, train_Y

In [6]:
def init_rand_normal(in_dim, out_dim):
    init_64 = np.random.normal(0, rand_std, [in_dim, out_dim])
    init = init_64.astype('float32')

    return init

In [7]:
hidden_dims = [8, 4, 2]

def init_parameter():
    if random_fix: np.random.seed(9876)

    global weights, biases
    
    weights, biases = [], []
    prev_dim = input_dim

    for n in range(len(hidden_dims)):
        next_dim = hidden_dims[n]
        w = init_rand_normal(prev_dim, next_dim)
        b = np.zeros([next_dim])
        weights.append(w)
        biases.append(b)
        prev_dim = next_dim

    w = init_rand_normal(prev_dim, output_dim)
    b = np.zeros([output_dim])
    weights.append(w)
    biases.append(b)   

In [8]:
def relu(x):
    return np.maximum(x, 0)     # np.max(x, 0)

def relu_derv(y):
    return np.sign(y)

In [9]:
def proc_forward(x):
    global weights, biases
    global hiddens
    
    hiddens = [x]

    for n in range(len(hidden_dims)):
        hid = relu(np.matmul(hiddens[-1], weights[n]) + biases[n])
        hiddens.append(hid)
    
    output = np.matmul(hiddens[-1], weights[-1]) + biases[-1]
    
    return output

def proc_backward(x, grad):
    global weights, biases
    global hiddens
    
    w_out_derv = hiddens[-1].transpose()
    w_out_grad = np.matmul(w_out_derv, grad)
    
    b_out_grad = np.sum(grad, axis=0)
    
    hidden_derv = weights[-1].transpose()
    hidden_grad = np.matmul(grad, hidden_derv)
    
    for n in range(len(hidden_dims))[::-1]:
        hidden_affine_derv = relu_derv(hiddens[n+1])
        hidden_affine_grad = hidden_affine_derv * hidden_grad
    
        w_hid_derv = hiddens[n].transpose()
        w_hid_grad = np.matmul(w_hid_derv, hidden_affine_grad)
    
        b_hid_grad = np.sum(hidden_affine_grad, axis=0)
        
        grad = hidden_affine_grad
    
        hidden_derv = weights[n].transpose()
        hidden_grad = np.matmul(grad, hidden_derv)
    
        weights[n] = weights[n] - learning_rate * w_hid_grad
        biases[n] = biases[n] - learning_rate * b_hid_grad
    
    weights[-1] = weights[-1] - learning_rate * w_out_grad
    biases[-1] = biases[-1] - learning_rate * b_out_grad

In [10]:
def eval_accuracy(output, y):
    return 1 - np.mean(np.abs(output-y) / y)

def test(x, y):
    output = proc_forward(x)
    return eval_accuracy(output, y)

def train_step(x, y):
    output = proc_forward(x)
    diff = output - y
    power = np.power(diff, 2)
    loss = np.mean(power)
    
    loss_grad = 1.0
    
    power_derv = np.ones_like(y) / np.prod(y.shape)
    power_grad = power_derv * loss_grad
    
    diff_derv = 2 * diff
    diff_grad = diff_derv * power_grad
    
    output_derv = 1
    output_grad = output_derv * diff_grad
    
    proc_backward(x, output_grad)
    
    return loss, eval_accuracy(output, y)

In [11]:
def train_and_test(batch_size=0):
    if batch_size == 0: batch_size = train_count
    batch_count = int(train_count / batch_size)
    test_X, test_Y = get_test_data()
        
    init_parameter()
    
    if random_fix: np.random.seed(1945)
        
    for epoch in range(epoch_count):
        costs = []
        accs = []
        for n in range(batch_count):
            train_X, train_Y = get_train_data(batch_size, n)
            cost, acc = train_step(train_X, train_Y)
            costs.append(cost)
            accs.append(acc)
            
        if (epoch+1) % report_period == 0:
            acc = test(test_X, test_Y)
            print("Epoch {}: cost={:5.3f}, accuracy={:5.3f}/{:5.3f}". \
                  format(epoch+1, np.mean(costs), np.mean(accs), acc))
            
    final_acc = test(test_X, test_Y)
    print("\nFinal Test: final accuracy = {:5.3f}".format(final_acc))

In [12]:
train_and_test(10)

Epoch 10: cost=1.456, accuracy=0.318/0.229
Epoch 20: cost=1.111, accuracy=0.161/-0.055
Epoch 30: cost=0.897, accuracy=-0.003/-0.320
Epoch 40: cost=0.765, accuracy=-0.139/-0.533
Epoch 50: cost=0.683, accuracy=-0.243/-0.700
Epoch 60: cost=0.633, accuracy=-0.327/-0.832
Epoch 70: cost=0.601, accuracy=-0.394/-0.936
Epoch 80: cost=0.582, accuracy=-0.447/-1.017
Epoch 90: cost=0.570, accuracy=-0.493/-1.081
Epoch 100: cost=0.563, accuracy=-0.528/-1.131

Final Test: final accuracy = -1.131


In [13]:
train_and_test()

Epoch 10: cost=1.926, accuracy=0.042/0.059
Epoch 20: cost=1.872, accuracy=0.087/0.116
Epoch 30: cost=1.820, accuracy=0.131/0.172
Epoch 40: cost=1.770, accuracy=0.175/0.227
Epoch 50: cost=1.723, accuracy=0.212/0.254
Epoch 60: cost=1.677, accuracy=0.243/0.264
Epoch 70: cost=1.632, accuracy=0.274/0.274
Epoch 80: cost=1.590, accuracy=0.303/0.283
Epoch 90: cost=1.549, accuracy=0.333/0.293
Epoch 100: cost=1.510, accuracy=0.333/0.274

Final Test: final accuracy = 0.274


In [14]:
train_and_test(1)

Epoch 10: cost=0.567, accuracy=-0.520/-1.127
Epoch 20: cost=0.552, accuracy=-0.662/-1.307
Epoch 30: cost=0.552, accuracy=-0.660/-1.326
Epoch 40: cost=0.552, accuracy=-0.668/-1.327
Epoch 50: cost=0.552, accuracy=-0.653/-1.330
Epoch 60: cost=0.552, accuracy=-0.653/-1.331
Epoch 70: cost=0.552, accuracy=-0.662/-1.327
Epoch 80: cost=0.552, accuracy=-0.651/-1.329
Epoch 90: cost=0.552, accuracy=-0.663/-1.322
Epoch 100: cost=0.552, accuracy=-0.657/-1.327

Final Test: final accuracy = -1.327
