In [1]:
import numpy as np
from sklearn import datasets

In [2]:
train_ratio = 0.8
rand_std = 0.030
learning_rate = 0.001
epoch_count = 100
report_period = 10
random_fix = True

In [3]:
iris_dataset = datasets.load_iris()

data = iris_dataset.data
target = iris_dataset.target
target_names = iris_dataset.target_names

#print("dimension: data{}, target{}, target_names{}".
#    format(data.shape, target.shape, target_names.shape))
#print(target_names)
#print(data[:5])
#print(target[-5:])

In [4]:
if random_fix: np.random.seed(1234)

data_count = len(data)
train_count = int(data_count * train_ratio)
test_count = data_count - train_count
    
indices = np.arange(data_count)
np.random.shuffle(indices)
    
train_data = data[indices[0:train_count]]
train_target = target[indices[0:train_count]]
    
test_data = data[indices[train_count:data_count]]
test_target = target[indices[train_count:data_count]]

In [5]:
input_dim, output_dim = 4, 3

def get_test_data():
    test_X = test_data
    test_Y = np.zeros([test_count, 3])
    
    for i in range(test_count):
        test_Y[i, test_target[i]] = 1.0
        
    return test_X, test_Y

def get_train_data(batch_size, nth):
    global indices
    
    if nth == 0:
        indices = np.arange(train_count)
        np.random.shuffle(indices)
        
    from_idx = nth * batch_size
    to_idx = (nth + 1) * batch_size
    
    train_X = train_data[indices[from_idx:to_idx]]
    train_Y = np.zeros([batch_size, 3])
    
    for i in range(batch_size):
        k = indices[from_idx+i]
        train_Y[i, train_target[k]] = 1.0
        
    return train_X, train_Y

In [6]:
def init_rand_normal(in_dim, out_dim):
    init_64 = np.random.normal(0, rand_std, [in_dim, out_dim])
    init = init_64.astype('float32')

    return init

In [7]:
def init_parameter():
    if random_fix: np.random.seed(9876)

    global w, b
    w = init_rand_normal(input_dim, output_dim)
    b = np.zeros([output_dim])

In [8]:
def proc_forward(x):
    global w, b
    
    output = np.matmul(x, w) + b
    return output

def proc_backward(x, grad):
    global w, b
    
    w_derv = x.transpose()
    w_grad = np.matmul(w_derv, grad)
    
    b_grad = np.sum(grad, axis=0)
    
    w = w - learning_rate * w_grad
    b = b - learning_rate * b_grad

In [9]:
def softmax(x):
    max_elem = np.max(x, axis=1)
    diff = (x.transpose() - max_elem).transpose()
    exp = np.exp(diff)
    sum_exp = np.sum(exp, axis=1)
    probs = (exp.transpose() / sum_exp).transpose()
    return probs

def softmax_derv(x, y):
    mb_size, nom_size = x.shape
    derv = np.ndarray([mb_size, nom_size, nom_size])
    for n in range(mb_size):
        for i in range(nom_size):
            for j in range(nom_size):
                derv[n, i, j] = -y[n,i] * y[n,j]
            derv[n, i, i] += y[n,i]
    return derv

def softmax_cross_entropy(p, q):
    return -np.sum(p * np.log(q), axis=1)

def softmax_cross_entropy_derv(p, q):
    return -p / q

In [10]:
def eval_accuracy(output, y):
    #probs = softmax(output)
    #estimate = np.argmax(probs, axis=1)
    estimate = np.argmax(output, axis=1)
    answer = np.argmax(y, axis=1)
    correct = np.equal(estimate, answer)
    return np.mean(correct)

def test(x, y):
    output = proc_forward(x)
    return eval_accuracy(output, y)

def train_step(x, y):
    output = proc_forward(x)
    
    probs = softmax(output)
    entropy = softmax_cross_entropy(y, probs)
    loss = np.mean(entropy)
    
    loss_grad = 1.0
    
    ent_grad = loss_grad / np.prod(entropy.shape)
    
    probs_derv = softmax_cross_entropy_derv(y, probs)
    probs_grad = probs_derv * ent_grad
    
    output_derv = softmax_derv(output, probs)
    output_grad = [np.matmul(output_derv[n], probs_grad[n]) \
                   for n in range(output.shape[0])]
    
    proc_backward(x, output_grad)
    
    return loss, eval_accuracy(output, y)

In [11]:
def train_and_test(batch_size=0):
    if batch_size == 0: batch_size = train_count
    batch_count = int(train_count / batch_size)
    test_X, test_Y = get_test_data()
        
    init_parameter()
    
    if random_fix: np.random.seed(1945)
        
    for epoch in range(epoch_count):
        costs = []
        accs = []
        for n in range(batch_count):
            train_X, train_Y = get_train_data(batch_size, n)
            cost, acc = train_step(train_X, train_Y)
            costs.append(cost)
            accs.append(acc)
            
        if (epoch+1) % report_period == 0:
            acc = test(test_X, test_Y)
            print("Epoch {}: cost={:5.3f}, accuracy={:5.3f}/{:5.3f}". \
                  format(epoch+1, np.mean(costs), np.mean(accs), acc))
            
    final_acc = test(test_X, test_Y)
    print("\nFinal Test: final accuracy = {:5.3f}".format(final_acc))

In [12]:
train_and_test(10)

Epoch 10: cost=1.003, accuracy=0.592/0.767
Epoch 20: cost=0.941, accuracy=0.642/0.767
Epoch 30: cost=0.889, accuracy=0.800/0.767
Epoch 40: cost=0.846, accuracy=0.692/0.767
Epoch 50: cost=0.807, accuracy=0.808/0.833
Epoch 60: cost=0.774, accuracy=0.708/0.867
Epoch 70: cost=0.747, accuracy=0.742/0.933
Epoch 80: cost=0.722, accuracy=0.825/0.933
Epoch 90: cost=0.700, accuracy=0.783/0.933
Epoch 100: cost=0.680, accuracy=0.792/0.967

Final Test: final accuracy = 0.967


In [13]:
train_and_test()

Epoch 10: cost=1.085, accuracy=0.383/0.400
Epoch 20: cost=1.071, accuracy=0.333/0.400
Epoch 30: cost=1.060, accuracy=0.325/0.400
Epoch 40: cost=1.050, accuracy=0.325/0.400
Epoch 50: cost=1.042, accuracy=0.333/0.400
Epoch 60: cost=1.034, accuracy=0.350/0.400
Epoch 70: cost=1.028, accuracy=0.367/0.400
Epoch 80: cost=1.021, accuracy=0.408/0.400
Epoch 90: cost=1.015, accuracy=0.483/0.467
Epoch 100: cost=1.009, accuracy=0.542/0.567

Final Test: final accuracy = 0.567


In [14]:
train_and_test(1)

Epoch 10: cost=0.700, accuracy=0.758/0.967
Epoch 20: cost=0.571, accuracy=0.808/0.933
Epoch 30: cost=0.512, accuracy=0.892/0.933
Epoch 40: cost=0.475, accuracy=0.908/1.000
Epoch 50: cost=0.444, accuracy=0.942/1.000
Epoch 60: cost=0.421, accuracy=0.925/1.000
Epoch 70: cost=0.402, accuracy=0.958/1.000
Epoch 80: cost=0.381, accuracy=0.950/1.000
Epoch 90: cost=0.367, accuracy=0.942/0.967
Epoch 100: cost=0.354, accuracy=0.950/0.967

Final Test: final accuracy = 0.967
