In [1]:
import numpy as np
from sklearn import datasets

In [2]:
train_ratio = 0.8
rand_std = 0.030
learning_rate = 0.001
epoch_count = 100
report_period = 10
random_fix = True

In [3]:
iris_dataset = datasets.load_iris()

data = iris_dataset.data
target = iris_dataset.target
target_names = iris_dataset.target_names

#print("dimension: data{}, target{}, target_names{}".
#    format(data.shape, target.shape, target_names.shape))
#print(target_names)
#print(data[:5])
#print(target[-5:])

In [4]:
if random_fix: np.random.seed(1234)

data_count = len(data)
train_count = int(data_count * train_ratio)
test_count = data_count - train_count
    
indices = np.arange(data_count)
np.random.shuffle(indices)
    
train_data = data[indices[0:train_count]]
train_target = target[indices[0:train_count]]
    
test_data = data[indices[train_count:data_count]]
test_target = target[indices[train_count:data_count]]

In [5]:
input_dim, output_dim = 4, 3

def get_test_data():
    test_X = test_data
    test_Y = np.zeros([test_count, 3])
    
    for i in range(test_count):
        test_Y[i, test_target[i]] = 1.0
        
    return test_X, test_Y

def get_train_data(batch_size, nth):
    global indices
    
    if nth == 0:
        indices = np.arange(train_count)
        np.random.shuffle(indices)
        
    from_idx = nth * batch_size
    to_idx = (nth + 1) * batch_size
    
    train_X = train_data[indices[from_idx:to_idx]]
    train_Y = np.zeros([batch_size, 3])
    
    for i in range(batch_size):
        k = indices[from_idx+i]
        train_Y[i, train_target[k]] = 1.0
        
    return train_X, train_Y

In [6]:
def init_rand_normal(in_dim, out_dim):
    init_64 = np.random.normal(0, rand_std, [in_dim, out_dim])
    init = init_64.astype('float32')

    return init

In [7]:
hidden1_dim = 8
hidden2_dim = 4
hidden3_dim = 2

def init_parameter():
    if random_fix: np.random.seed(9876)

    global w_hid1,b_hid1, w_hid2,b_hid2, w_hid3,b_hid3, w_out,b_out
    
    w_hid1 = init_rand_normal(input_dim, hidden1_dim)
    b_hid1 = np.zeros([hidden1_dim])

    w_hid2 = init_rand_normal(hidden1_dim, hidden2_dim)
    b_hid2 = np.zeros([hidden2_dim])

    w_hid3 = init_rand_normal(hidden2_dim, hidden3_dim)
    b_hid3 = np.zeros([hidden3_dim])

    w_out = init_rand_normal(hidden3_dim, output_dim)
    b_out = np.zeros([output_dim])

In [8]:
def relu(x):
    return np.maximum(x, 0)     # np.max(x, 0)

def relu_derv(y):
    return np.sign(y)

In [9]:
def proc_forward(x):
    global w_hid1,b_hid1, w_hid2,b_hid2, w_hid3,b_hid3, w_out,b_out
    global hidden1, hidden2, hidden3
    
    hidden1 = relu(np.matmul(x, w_hid1) + b_hid1)
    hidden2 = relu(np.matmul(hidden1, w_hid2) + b_hid2)
    hidden3 = relu(np.matmul(hidden2, w_hid3) + b_hid3)
    
    output = np.matmul(hidden3, w_out) + b_out
    
    return output

def proc_backward(x, grad):
    global w_hid1,b_hid1, w_hid2,b_hid2, w_hid3,b_hid3, w_out,b_out
    global hidden1, hidden2, hidden3
    
    w_out_derv = hidden3.transpose()
    w_out_grad = np.matmul(w_out_derv, grad)
    
    b_out_grad = np.sum(grad, axis=0)
    
    hidden3_derv = w_out.transpose()
    hidden3_grad = np.matmul(grad, hidden3_derv)
    
    hidden3_affine_derv = relu_derv(hidden3)
    hidden3_affine_grad = hidden3_affine_derv * hidden3_grad
    
    w_hid3_derv = hidden2.transpose()
    w_hid3_grad = np.matmul(w_hid3_derv, hidden3_affine_grad)
    
    b_hid3_grad = np.sum(hidden3_affine_grad, axis=0)
    
    hidden2_derv = w_hid3.transpose()
    hidden2_grad = np.matmul(hidden3_affine_grad, hidden2_derv)
    
    hidden2_affine_derv = relu_derv(hidden2)
    hidden2_affine_grad = hidden2_affine_derv * hidden2_grad
    
    w_hid2_derv = hidden1.transpose()
    w_hid2_grad = np.matmul(w_hid2_derv, hidden2_affine_grad)
    
    b_hid2_grad = np.sum(hidden2_affine_grad, axis=0)
    
    hidden1_derv = w_hid2.transpose()
    hidden1_grad = np.matmul(hidden2_affine_grad, hidden1_derv)
    
    hidden1_affine_derv = relu_derv(hidden1)
    hidden1_affine_grad = hidden1_affine_derv * hidden1_grad
    
    w_hid1_derv = x.transpose()
    w_hid1_grad = np.matmul(w_hid1_derv, hidden1_affine_grad)
    
    b_hid1_grad = np.sum(hidden1_affine_grad, axis=0)
    
    w_hid3 = w_hid3 - learning_rate * w_hid3_grad
    b_hid3 = b_hid3 - learning_rate * b_hid3_grad
    
    w_hid2 = w_hid2 - learning_rate * w_hid2_grad
    b_hid2 = b_hid2 - learning_rate * b_hid2_grad
    
    w_hid1 = w_hid1 - learning_rate * w_hid1_grad
    b_hid1 = b_hid1 - learning_rate * b_hid1_grad
    
    w_out = w_out - learning_rate * w_out_grad
    b_out = b_out - learning_rate * b_out_grad

In [10]:
def softmax(x):
    max_elem = np.max(x, axis=1)
    diff = (x.transpose() - max_elem).transpose()
    exp = np.exp(diff)
    sum_exp = np.sum(exp, axis=1)
    probs = (exp.transpose() / sum_exp).transpose()
    return probs

def softmax_derv(x, y):
    mb_size, nom_size = x.shape
    derv = np.ndarray([mb_size, nom_size, nom_size])
    for n in range(mb_size):
        for i in range(nom_size):
            for j in range(nom_size):
                derv[n, i, j] = -y[n,i] * y[n,j]
            derv[n, i, i] += y[n,i]
    return derv

def softmax_cross_entropy(p, q):
    return -np.sum(p * np.log(q), axis=1)

def softmax_cross_entropy_derv(p, q):
    return -p / q

In [11]:
def eval_accuracy(output, y):
    #probs = softmax(output)
    #estimate = np.argmax(probs, axis=1)
    estimate = np.argmax(output, axis=1)
    answer = np.argmax(y, axis=1)
    correct = np.equal(estimate, answer)
    return np.mean(correct)

def test(x, y):
    output = proc_forward(x)
    return eval_accuracy(output, y)

def train_step(x, y):
    output = proc_forward(x)
    
    probs = softmax(output)
    entropy = softmax_cross_entropy(y, probs)
    loss = np.mean(entropy)
    
    loss_grad = 1.0
    
    ent_grad = loss_grad / np.prod(entropy.shape)
    
    probs_derv = softmax_cross_entropy_derv(y, probs)
    probs_grad = probs_derv * ent_grad
    
    output_derv = softmax_derv(output, probs)
    output_grad = [np.matmul(output_derv[n], probs_grad[n]) \
                   for n in range(output.shape[0])]
    
    proc_backward(x, output_grad)
    
    return loss, eval_accuracy(output, y)

In [12]:
def train_and_test(batch_size=0):
    if batch_size == 0: batch_size = train_count
    batch_count = int(train_count / batch_size)
    test_X, test_Y = get_test_data()
        
    init_parameter()
    
    if random_fix: np.random.seed(1945)
        
    for epoch in range(epoch_count):
        costs = []
        accs = []
        for n in range(batch_count):
            train_X, train_Y = get_train_data(batch_size, n)
            cost, acc = train_step(train_X, train_Y)
            costs.append(cost)
            accs.append(acc)
            
        if (epoch+1) % report_period == 0:
            acc = test(test_X, test_Y)
            print("Epoch {}: cost={:5.3f}, accuracy={:5.3f}/{:5.3f}". \
                  format(epoch+1, np.mean(costs), np.mean(accs), acc))
            
    final_acc = test(test_X, test_Y)
    print("\nFinal Test: final accuracy = {:5.3f}".format(final_acc))

In [13]:
train_and_test(10)

Epoch 10: cost=1.099, accuracy=0.358/0.233
Epoch 20: cost=1.098, accuracy=0.358/0.233
Epoch 30: cost=1.098, accuracy=0.358/0.233
Epoch 40: cost=1.098, accuracy=0.358/0.233
Epoch 50: cost=1.098, accuracy=0.358/0.233
Epoch 60: cost=1.098, accuracy=0.358/0.233
Epoch 70: cost=1.098, accuracy=0.358/0.233
Epoch 80: cost=1.098, accuracy=0.358/0.233
Epoch 90: cost=1.098, accuracy=0.358/0.233
Epoch 100: cost=1.098, accuracy=0.358/0.233

Final Test: final accuracy = 0.233


In [14]:
train_and_test()

Epoch 10: cost=1.099, accuracy=0.358/0.233
Epoch 20: cost=1.099, accuracy=0.358/0.233
Epoch 30: cost=1.099, accuracy=0.358/0.233
Epoch 40: cost=1.099, accuracy=0.358/0.233
Epoch 50: cost=1.099, accuracy=0.358/0.233
Epoch 60: cost=1.099, accuracy=0.358/0.233
Epoch 70: cost=1.099, accuracy=0.358/0.233
Epoch 80: cost=1.099, accuracy=0.358/0.233
Epoch 90: cost=1.099, accuracy=0.358/0.233
Epoch 100: cost=1.099, accuracy=0.358/0.233

Final Test: final accuracy = 0.233


In [15]:
train_and_test(1)

Epoch 10: cost=1.098, accuracy=0.358/0.233
Epoch 20: cost=1.098, accuracy=0.358/0.233
Epoch 30: cost=1.098, accuracy=0.358/0.233
Epoch 40: cost=1.098, accuracy=0.358/0.233
Epoch 50: cost=1.098, accuracy=0.358/0.233
Epoch 60: cost=1.098, accuracy=0.358/0.233
Epoch 70: cost=1.098, accuracy=0.358/0.233
Epoch 80: cost=1.098, accuracy=0.358/0.233
Epoch 90: cost=1.098, accuracy=0.358/0.233
Epoch 100: cost=1.098, accuracy=0.358/0.233

Final Test: final accuracy = 0.233
