In [1]:
import numpy as np
from sklearn import datasets

In [2]:
train_ratio = 0.8
rand_std = 0.030
learning_rate = 0.001
epoch_count = 100
report_period = 10
random_fix = True

In [3]:
iris_dataset = datasets.load_iris()

data = iris_dataset.data
target = iris_dataset.target
target_names = iris_dataset.target_names

#print("dimension: data{}, target{}, target_names{}".
#    format(data.shape, target.shape, target_names.shape))
#print(target_names)
#print(data[:5])
#print(target[-5:])

In [4]:
if random_fix: np.random.seed(1234)

data_count = len(data)
train_count = int(data_count * train_ratio)
test_count = data_count - train_count
    
indices = np.arange(data_count)
np.random.shuffle(indices)
    
train_data = data[indices[0:train_count]]
train_target = target[indices[0:train_count]]
    
test_data = data[indices[train_count:data_count]]
test_target = target[indices[train_count:data_count]]

In [5]:
input_dim, output_dim = 4, 1

def get_test_data():
    test_X = test_data
    test_Y = np.zeros([test_count, 1])
    
    for i in range(test_count):
        if test_target[i] == 0: test_Y[i, 0] = 1.0
            
    return test_X, test_Y

def get_train_data(batch_size, nth):
    global indices
    
    if nth == 0:
        indices = np.arange(train_count)
        np.random.shuffle(indices)
        
    from_idx = nth * batch_size
    to_idx = (nth + 1) * batch_size
    
    train_X = train_data[indices[from_idx:to_idx]]
    train_Y = np.zeros([batch_size, 1])
    
    for i in range(batch_size):
        k = indices[from_idx+i]
        if train_target[k] == 0: train_Y[i, 0] = 1.0
            
    return train_X, train_Y

In [6]:
def init_rand_normal(in_dim, out_dim):
    init_64 = np.random.normal(0, rand_std, [in_dim, out_dim])
    init = init_64.astype('float32')

    return init

In [7]:
hidden_dims = [8, 4, 2]

def init_parameter():
    if random_fix: np.random.seed(9876)

    global weights, biases
    
    weights, biases = [], []
    prev_dim = input_dim

    for n in range(len(hidden_dims)):
        next_dim = hidden_dims[n]
        w = init_rand_normal(prev_dim, next_dim)
        b = np.zeros([next_dim])
        weights.append(w)
        biases.append(b)
        prev_dim = next_dim

    w = init_rand_normal(prev_dim, output_dim)
    b = np.zeros([output_dim])
    weights.append(w)
    biases.append(b)

In [8]:
def relu(x):
    return np.maximum(x, 0)     # np.max(x, 0)

def relu_derv(y):
    return np.sign(y)

In [9]:
def proc_forward(x):
    global weights, biases
    global hiddens
    
    hiddens = [x]

    for n in range(len(hidden_dims)):
        hid = relu(np.matmul(hiddens[-1], weights[n]) + biases[n])
        hiddens.append(hid)
    
    output = np.matmul(hiddens[-1], weights[-1]) + biases[-1]
    
    return output

def proc_backward(x, grad):
    global weights, biases
    global hiddens
    
    w_out_derv = hiddens[-1].transpose()
    w_out_grad = np.matmul(w_out_derv, grad)
    
    b_out_grad = np.sum(grad, axis=0)
    
    hidden_derv = weights[-1].transpose()
    hidden_grad = np.matmul(grad, hidden_derv)
    
    for n in range(len(hidden_dims))[::-1]:
        hidden_affine_derv = relu_derv(hiddens[n+1])
        hidden_affine_grad = hidden_affine_derv * hidden_grad
    
        w_hid_derv = hiddens[n].transpose()
        w_hid_grad = np.matmul(w_hid_derv, hidden_affine_grad)
    
        b_hid_grad = np.sum(hidden_affine_grad, axis=0)
        
        grad = hidden_affine_grad
    
        hidden_derv = weights[n].transpose()
        hidden_grad = np.matmul(grad, hidden_derv)
    
        weights[n] = weights[n] - learning_rate * w_hid_grad
        biases[n] = biases[n] - learning_rate * b_hid_grad
    
    weights[-1] = weights[-1] - learning_rate * w_out_grad
    biases[-1] = biases[-1] - learning_rate * b_out_grad

In [10]:
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def sigmoid_derv(x, y):
    return y * (1 - y)

def sigmoid_cross_entropy(z, x):
    return np.maximum(x, 0) - x * z + np.log(1 + np.exp(-np.abs(x)))

def sigmoid_cross_entropy_derv(z, x):
    return -z + sigmoid(x)

In [11]:
def eval_accuracy(output, y):
    #probs = sigmoid(output)
    #estimate = np.greater(probs, 0.5)
    estimate = np.greater(output, 0)
    answer = np.equal(y, 1.0)
    correct = np.equal(estimate, answer)
    return np.mean(correct)

def test(x, y):
    output = proc_forward(x)
    return eval_accuracy(output, y)

def train_step(x, y):
    output = proc_forward(x)
    
    entropy = sigmoid_cross_entropy(y, output)
    loss = np.mean(entropy)
    
    loss_grad = 1.0
    
    ent_derv = np.ones_like(entropy) / np.prod(entropy.shape)
    ent_grad = ent_derv * loss_grad
    
    output_derv = sigmoid_cross_entropy_derv(y, output)
    output_grad = output_derv * ent_grad
    
    proc_backward(x, output_grad)
    
    return loss, eval_accuracy(output, y)

In [12]:
def train_and_test(batch_size=0):
    if batch_size == 0: batch_size = train_count
    batch_count = int(train_count / batch_size)
    test_X, test_Y = get_test_data()
        
    init_parameter()
    
    if random_fix: np.random.seed(1945)
        
    for epoch in range(epoch_count):
        costs = []
        accs = []
        for n in range(batch_count):
            train_X, train_Y = get_train_data(batch_size, n)
            cost, acc = train_step(train_X, train_Y)
            costs.append(cost)
            accs.append(acc)
            
        if (epoch+1) % report_period == 0:
            acc = test(test_X, test_Y)
            print("Epoch {}: cost={:15.13f}, accuracy={:5.3f}/{:5.3f}". \
                  format(epoch+1, np.mean(costs), np.mean(accs), acc))
            
    final_acc = test(test_X, test_Y)
    print("\nFinal Test: final accuracy = {:5.3f}".format(final_acc))

In [13]:
train_and_test(10)

Epoch 10: cost=0.6897798219634, accuracy=0.675/0.633
Epoch 20: cost=0.6864083343055, accuracy=0.675/0.633
Epoch 30: cost=0.6832332285909, accuracy=0.675/0.633
Epoch 40: cost=0.6802443644089, accuracy=0.675/0.633
Epoch 50: cost=0.6774262947355, accuracy=0.675/0.633
Epoch 60: cost=0.6747725369672, accuracy=0.675/0.633
Epoch 70: cost=0.6722759719072, accuracy=0.675/0.633
Epoch 80: cost=0.6699219704177, accuracy=0.675/0.633
Epoch 90: cost=0.6677071555779, accuracy=0.675/0.633
Epoch 100: cost=0.6656195421616, accuracy=0.675/0.633

Final Test: final accuracy = 0.633


In [14]:
train_and_test()

Epoch 10: cost=0.6928712777588, accuracy=0.675/0.633
Epoch 20: cost=0.6925671249712, accuracy=0.675/0.633
Epoch 30: cost=0.6922644893329, accuracy=0.675/0.633
Epoch 40: cost=0.6919633632710, accuracy=0.675/0.633
Epoch 50: cost=0.6916637392479, accuracy=0.675/0.633
Epoch 60: cost=0.6913656097614, accuracy=0.675/0.633
Epoch 70: cost=0.6910689673447, accuracy=0.675/0.633
Epoch 80: cost=0.6907738045664, accuracy=0.675/0.633
Epoch 90: cost=0.6904801140299, accuracy=0.675/0.633
Epoch 100: cost=0.6901878883736, accuracy=0.675/0.633

Final Test: final accuracy = 0.633


In [15]:
train_and_test(1)

Epoch 10: cost=0.6666757873531, accuracy=0.675/0.633
Epoch 20: cost=0.6509473533335, accuracy=0.675/0.633
Epoch 30: cost=0.6422044010868, accuracy=0.675/0.633
Epoch 40: cost=0.6372829456182, accuracy=0.675/0.633
Epoch 50: cost=0.6344951043511, accuracy=0.675/0.633
Epoch 60: cost=0.6328960461727, accuracy=0.675/0.633
Epoch 70: cost=0.6319734409037, accuracy=0.675/0.633
Epoch 80: cost=0.6314401544122, accuracy=0.675/0.633
Epoch 90: cost=0.6311330676229, accuracy=0.675/0.633
Epoch 100: cost=0.6309508666343, accuracy=0.675/0.633

Final Test: final accuracy = 0.633
