In [1]:
import numpy as np

In [2]:
random_fix = True

In [3]:
class Dataset():
    def __init__(self, mode):
        self.mode = mode
        self.input_dim = self.output_dim = self.train_count = 0
        self.train_xs = self.test_xs = self.validate_xs = []
        self.train_ys = self.test_ys = self.validate_ys = []
        self.target_names = []
    
    def get_train_data(self, batch_size, nth):
        if nth == 0:
            self.indices = np.arange(self.train_count)
            np.random.shuffle(self.indices)
        
        from_idx = nth * batch_size
        to_idx = (nth + 1) * batch_size
    
        train_X = self.train_xs[self.indices[from_idx:to_idx]]
        train_Y = self.train_ys[self.indices[from_idx:to_idx]]
        
        return train_X, train_Y    
    
    def get_test_data(self, validate=False, count=0):
        if validate:
            xs, ys = self.validate_xs, self.validate_ys
        else:
            xs, ys = self.test_xs, self.test_ys
        
        if count == 0:
            return xs, ys
        
        if count > len(xs): count = len(xs)
        
        indices = np.arange(len(xs))
        np.random.shuffle(indices)
        
        return xs[indices[0:count]], ys[indices[0:count]]
    
    def get_target_name(self, idxs):
        return self.target_names[idxs]
    
    def demonstrate(self, x, estimate, answer):
        pass

In [4]:
class Model(object):
    def __init__(self, name, dataset):
        self.name = name
        self.dataset = dataset
    
    def train(self, epoch=10, batch_size=10):
        pass
    
    def test(self):
        pass
    
    def demonstrate(self, num):
        pass

In [5]:
from sklearn import datasets

class IrisDataset(Dataset):
    pass

def iris_init(self, mode, train_ratio=0.8):
    Dataset.__init__(self, mode)
    
    dataset = datasets.load_iris()
    
    xs, ys = iris_prepare(self, mode, dataset.data, dataset.target)
    
    data_count = len(dataset.data)
    self.train_count = int(data_count * train_ratio)

    indices = np.arange(data_count)
    np.random.shuffle(indices)

    self.train_xs = xs[indices[0:self.train_count]]
    self.train_ys = ys[indices[0:self.train_count]]
    self.test_xs = self.validate_xs = xs[indices[self.train_count:]]
    self.test_ys = self.validate_ys = ys[indices[self.train_count:]]
    
    self.target_names = dataset.target_names

def iris_prepare(self, mode, data, target):
    if mode == "regression":
        self.input_dim = 3
        self.output_dim = 1
        xs = data[:, 0:3]
        ys = data[:, 3:4]
    elif mode == "binary":
        self.input_dim = 4
        self.output_dim = 1
        xs = data
        ys = np.equal(target, 0).astype("float32").reshape(-1,1)
    elif mode == "select":
        self.input_dim = 4
        self.output_dim = 3
        xs = data
        ys = np.eye(3)[target]
        
    return xs, ys

def iris_demonstrate(self, x, estimate, answer):
    if self.mode == "regression":
        print("({}, {}, {}) => 추정 {:3.1f}, 정답: {:3.1f}".format(x[0], x[1], x[2], estimate, answer))
    elif self.mode == "binary":
        estr = "is setosa"
        astr = "(오답)"
        if not estimate: estr = "is not setosa"
        if estimate == answer: astr = "(정답)"
        print("({}, {}, {}, {}) => {} {}".format(x[0], x[1], x[2], x[3], estr, astr))
    elif self.mode == "select":
        estr = self.target_names[estimate]
        astr = "({})".format(self.target_names[answer])
        if estimate == answer: astr = "(정답)"
        print("({}, {}, {}, {}) => {} {}".format(x[0], x[1], x[2], x[3], estr, astr))
    
IrisDataset.__init__ = iris_init
IrisDataset.demonstrate = iris_demonstrate

In [6]:
id1 = IrisDataset("regression")
id2 = IrisDataset("binary", 0.88)
id3 = IrisDataset("select", 0.7)

In [7]:
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt

class MnistDataset(Dataset):
    pass

def mnist_init(self):
    Dataset.__init__(self, "select")
    
    dataset = input_data.read_data_sets("MNIST_data/", one_hot=True)
        
    self.train_xs = dataset.train.images
    self.train_ys = dataset.train.labels
        
    self.validate_xs = dataset.validation.images
    self.validate_ys = dataset.validation.labels
        
    self.test_xs = dataset.test.images
    self.test_ys = dataset.test.labels

    self.input_dim = 28 * 28
    self.output_dim = 10
    
    self.train_count = len(dataset.train.images)
    self.target_names = ['0','1','2','3','4','5','6','7','8','9']

def mnist_demonstrate(self, xs, est, ans):
    rows, cols = 4, 4
    f, a = plt.subplots(rows, cols, figsize=(cols, rows))
    for row in range(cols):
        for col in range(cols):
            i = row * cols + col
            estr = self.target_names[est[i]]
            astr = self.target_names[ans[i]]
            if est[i] == ans[i]:
                caption = "{}".format(estr)
            else:
                caption = "{}=>{}".format(astr, estr)
            a[row][col].axvspan(0, 0, 0, 6.0)
            a[row][col].imshow(np.reshape(xs[i], (28,28)))
            a[row][col].text(0.5, -1.5, caption)
            a[row][col].axis('off')
    f.show()
    plt.draw()
    plt.show()
        
MnistDataset.__init__ = mnist_init
MnistDataset.demonstrate = mnist_demonstrate

In [8]:
md = MnistDataset()

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [9]:
class MLPNoTFModel(Model):
    pass

def mlp_init(self, name, dataset, hidden_dims, learning_rate=0.001):
    Model.__init__(self, name, dataset)
    init_parameters(self, hidden_dims)
    self.learning_rate = learning_rate

MLPNoTFModel.__init__ = mlp_init

In [10]:
def init_rand_normal(in_dim, out_dim, rand_std=0.0300):
    init_64 = np.random.normal(0, rand_std, [in_dim, out_dim])
    init = init_64.astype('float32')
    return init

In [11]:
def init_parameters(self, hidden_dims):
    if random_fix: np.random.seed(9876)

    input_dim = self.dataset.input_dim
    output_dim = self.dataset.output_dim

    self.weights, self.biases = [], []
    
    prev_dim = input_dim

    for n in range(len(hidden_dims)):
        next_dim = hidden_dims[n]
        w = init_rand_normal(prev_dim, next_dim)
        b = np.zeros([next_dim])
        self.weights.append(w)
        self.biases.append(b)
        prev_dim = next_dim

    w = init_rand_normal(prev_dim, output_dim)
    b = np.zeros([output_dim])
    self.weights.append(w)
    self.biases.append(b)
    
    self.hidden_dims = hidden_dims

"""
def build_neuralnet(self, hidden_dims):
    hidden = self.x
    for n in range(len(hidden_dims)):
        affine = tf.matmul(hidden, self.w_hids[n]) + self.b_hids[n]
        hidden = tf.nn.relu(affine)
    self.output = tf.matmul(hidden, self.w_out) + self.b_out
"""

'\ndef build_neuralnet(self, hidden_dims):\n    hidden = self.x\n    for n in range(len(hidden_dims)):\n        affine = tf.matmul(hidden, self.w_hids[n]) + self.b_hids[n]\n        hidden = tf.nn.relu(affine)\n    self.output = tf.matmul(hidden, self.w_out) + self.b_out\n'

In [12]:
"""
def build_loss_accuracy(self):
    if self.dataset.mode == "regression":
        self.estimate = self.output[:,0]
        self.answer = self.y[:,0]
        diff = self.estimate - self.answer
        self.loss = tf.reduce_mean(tf.pow(diff, 2))
        error = tf.reduce_mean(tf.abs(diff) / self.answer)
        self.accuracy = 1 - error
    elif self.dataset.mode == "binary":
        cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y, logits=self.output)
        self.loss = tf.reduce_mean(cross_entropy)
        #probs = tf.nn.sigmoid(output)
        #estimate = tf.greater(probs, 0.5)
        self.estimate = tf.greater(self.output, 0)
        self.answer = tf.equal(self.y, 1.0)
        correct_bool = tf.equal(self.estimate, self.answer)
        correct = tf.cast(correct_bool, "float")
        self.accuracy = tf.reduce_mean(correct)
    elif self.dataset.mode == "select":
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=self.y, logits=self.output)
        self.loss = tf.reduce_mean(cross_entropy)
        #probs = tf.nn.softmax(output)
        #estimate = tf.argmax(probs, 1)
        self.estimate = tf.argmax(self.output, 1)
        self.answer = tf.argmax(self.y, 1)
        correct_bool = tf.equal(self.estimate, self.answer)
        correct = tf.cast(correct_bool, "float")
        self.accuracy = tf.reduce_mean(correct)
"""

'\ndef build_loss_accuracy(self):\n    if self.dataset.mode == "regression":\n        self.estimate = self.output[:,0]\n        self.answer = self.y[:,0]\n        diff = self.estimate - self.answer\n        self.loss = tf.reduce_mean(tf.pow(diff, 2))\n        error = tf.reduce_mean(tf.abs(diff) / self.answer)\n        self.accuracy = 1 - error\n    elif self.dataset.mode == "binary":\n        cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y, logits=self.output)\n        self.loss = tf.reduce_mean(cross_entropy)\n        #probs = tf.nn.sigmoid(output)\n        #estimate = tf.greater(probs, 0.5)\n        self.estimate = tf.greater(self.output, 0)\n        self.answer = tf.equal(self.y, 1.0)\n        correct_bool = tf.equal(self.estimate, self.answer)\n        correct = tf.cast(correct_bool, "float")\n        self.accuracy = tf.reduce_mean(correct)\n    elif self.dataset.mode == "select":\n        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=self.y, 

In [13]:
"""
def build_optimizer(self, learning_rate):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    self.train_op = optimizer.minimize(self.loss)

def build_saver(self):
    var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name)
    self.saver = tf.train.Saver(var_list=var_list)
"""

'\ndef build_optimizer(self, learning_rate):\n    optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n    self.train_op = optimizer.minimize(self.loss)\n\ndef build_saver(self):\n    var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name)\n    self.saver = tf.train.Saver(var_list=var_list)\n'

In [14]:
import time

def model_train(self, epoch_count=10, batch_size=10):
    if batch_size == 0:
        batch_size = self.dataset.train_count
        
    batch_count = int(self.dataset.train_count / batch_size)
    report_period = epoch_count / 10
    
    if random_fix: np.random.seed(1945)
    
    time1 = time2 = int(time.time())
    
    print("Model {} train report:".format(self.name))
    
    dset = self.dataset

    print(self.weights)
    for epoch in range(epoch_count):
        costs = []
        accs = []
        for n in range(batch_count):
            train_X, train_Y = dset.get_train_data(batch_size, n)
            cost, acc = self.train_step(train_X, train_Y)
            print("step{}: cost:{}, acc:{}".format(n, cost, acc))
            costs.append(cost)
            accs.append(acc)
            
        if (epoch+1) % report_period == 0:
            validate_X, validate_Y = dset.get_test_data(True, 30)
            acc = self.get_accuracy(validate_X, validate_Y)
            time3 = int(time.time())
            print("    Epoch {}: cost={:5.3f}, \
accuracy={:5.3f}/{:5.3f} ({}/{} secs)". \
                  format(epoch+1, np.mean(costs), np.mean(accs), \
                         acc, time3-time2, time3-time1))
            time2 = time3

    print("")
    
    path = "params/{}.ntpt".format(self.name)
    self.save_parameters(path)

Model.train = model_train

In [15]:
def model_test(self):
    test_X, test_Y = self.dataset.get_test_data()

    path = "params/{}.ntpt".format(self.name)
    self.restore_parameters(path)
    
    time1 = int(time.time())
    acc = self.get_accuracy(test_X, test_Y)
    time2 = int(time.time())
    
    print("Model {} test report: accuracy = {:5.3f}, ({} secs)".format(self.name, acc, time2-time1))
    print("")
    
Model.test = model_test

In [16]:
def model_demonstrate(self, num=10, batch=False):
    demo_X, demo_Y = self.dataset.get_test_data(False, num)

    path = "params/{}.ntpt".format(self.name)
    self.restore_parameters(path)
    
    print("Model {} Demonstration".format(self.name))
    est, ans = self.get_estimate_answer(demo_X, demo_Y)
    if batch:
        self.dataset.demonstrate(demo_X, est, ans)
    else:
        for n in range(len(demo_X)):
            self.dataset.demonstrate(demo_X[n], est[n], ans[n])
        print("")
    
Model.demonstrate = model_demonstrate

In [17]:
def train_step(self, x, y):
    output = self.proc_forward(x)

    if self.dataset.mode == "regression":
        diff = output - y
        power = np.power(diff, 2)
        loss = np.mean(power)
        loss_grad = 1.0
        power_derv = np.ones_like(y) / np.prod(y.shape)
        power_grad = power_derv * loss_grad
        diff_derv = 2 * diff
        diff_grad = diff_derv * power_grad
        output_derv = 1
        output_grad = output_derv * diff_grad
    elif self.dataset.mode == "binary":
        entropy = sigmoid_cross_entropy(y, output)
        loss = np.mean(entropy)
        loss_grad = 1.0
        ent_derv = np.ones_like(entropy) / np.prod(entropy.shape)
        ent_grad = ent_derv * loss_grad
        output_derv = sigmoid_cross_entropy_derv(y, output)
        output_grad = output_derv * ent_grad
    elif self.dataset.mode == "select":
        probs = softmax(output)
        entropy = softmax_cross_entropy(y, probs)
        loss = np.mean(entropy)
        loss_grad = 1.0
        ent_grad = loss_grad / np.prod(entropy.shape)
        probs_derv = softmax_cross_entropy_derv(y, probs)
        probs_grad = probs_derv * ent_grad
        output_derv = softmax_derv(output, probs)
        output_grad = [np.matmul(output_derv[n], probs_grad[n]) \
                       for n in range(output.shape[0])]
    
    self.proc_backward(x, output_grad)
    
    return loss, self.eval_accuracy(output, y)

MLPNoTFModel.train_step = train_step

In [18]:
def proc_forward(self, x):
    self.hiddens = [x]

    for n in range(len(self.hidden_dims)):
        hid = relu(np.matmul(self.hiddens[-1], self.weights[n]) + self.biases[n])
        self.hiddens.append(hid)
    
    output = np.matmul(self.hiddens[-1], self.weights[-1]) + self.biases[-1]
    
    self.hiddens.append(output)
    
    return output

def proc_backward(self, x, grad):
    w_out_derv = self.hiddens[-1].transpose()
    w_out_grad = np.matmul(w_out_derv, grad)
    
    b_out_grad = np.sum(grad, axis=0)
    
    hidden_derv = self.weights[-1].transpose()
    hidden_grad = np.matmul(grad, hidden_derv)
    
    for n in range(len(self.hidden_dims))[::-1]:
        hidden_affine_derv = relu_derv(self.hiddens[n+1])
        hidden_affine_grad = hidden_affine_derv * hidden_grad
    
        w_hid_derv = self.hiddens[n].transpose()
        w_hid_grad = np.matmul(w_hid_derv, hidden_affine_grad)
    
        b_hid_grad = np.sum(hidden_affine_grad, axis=0)
        
        grad = hidden_affine_grad
    
        hidden_derv = self.weights[n].transpose()
        hidden_grad = np.matmul(grad, hidden_derv)
    
        self.weights[n] = self.weights[n] - self.learning_rate * w_hid_grad
        self.biases[n] = self.biases[n] - self.learning_rate * b_hid_grad
    
    self.weights[-1] = self.weights[-1] - self.learning_rate * w_out_grad
    self.biases[-1] = self.biases[-1] - self.learning_rate * b_out_grad
    
MLPNoTFModel.proc_forward = proc_forward
MLPNoTFModel.proc_backward = proc_backward

In [19]:
def get_accuracy(self, x, y):
    output = self.proc_forward(x)
    return self.eval_accuracy(output, y)

def eval_accuracy(self, output, y):
    if self.dataset.mode == "regression":
        diff = output - y
        answer = y[:,0]
        error = np.mean(np.abs(diff) / answer)
        accuracy = 1 - error
    elif self.dataset.mode == "binary":
        #probs = sigmoid(output)
        #estimate = np.greater(probs, 0.5)
        estimate = np.greater(output, 0)
        answer = np.equal(y, 1.0)
        correct_bool = np.equal(estimate, answer)
        #correct = np.cast(correct_bool, "float32")
        accuracy = np.mean(correct_bool)
    elif self.dataset.mode == "select":
        #probs = softmax(output)
        #estimate = np.argmax(probs, 1)
        estimate = np.argmax(output, 1)
        answer = np.argmax(y, 1)
        correct_bool = np.equal(estimate, answer)
        #correct = np.cast(correct_bool, "float32")
        accuracy = np.mean(correct_bool)
    
    return accuracy

def get_estimate_answer(self, x, y):
    output = self.proc_forward(x)

    if self.dataset.mode == "regression":
        estimate = output[:,0]
        answer = y[:,0]
    elif self.dataset.mode == "binary":
        #probs = sigmoid(output)
        #estimate = np.greater(probs, 0.5)
        estimate = np.greater(output, 0)
        answer = np.equal(y, 1.0)
    elif self.dataset.mode == "select":
        #probs = softmax(output)
        #estimate = np.argmax(probs, 1)
        estimate = np.argmax(output, 1)
        answer = np.argmax(y, 1)
    
    return estimate, answer

MLPNoTFModel.eval_accuracy = eval_accuracy
MLPNoTFModel.get_accuracy = get_accuracy
MLPNoTFModel.get_estimate_answer = get_estimate_answer

In [20]:
def save_parameters(self, path):
    pass

def restore_parameters(self, path):
    pass

MLPNoTFModel.save_parameters = save_parameters
MLPNoTFModel.restore_parameters = restore_parameters

In [21]:
def relu(x):#3.2.1.8
    return np.maximum(x, 0)

def relu_derv(y):
    return np.sign(y)

In [22]:
def sigmoid(x): #2.6.9
    return 1.0 / (1.0 + np.exp(-x))

def sigmoid_derv(x, y):
    return y * (1 - y)

def sigmoid_cross_entropy(z, x):
    return np.maximum(x, 0) - x * z + np.log(1 + np.exp(-np.abs(x)))

def sigmoid_cross_entropy_derv(z, x):
    return -z + sigmoid(x)

In [23]:
def softmax(x):
    max_elem = np.max(x, axis=1)
    diff = (x.transpose() - max_elem).transpose()
    exp = np.exp(diff)
    sum_exp = np.sum(exp, axis=1)
    probs = (exp.transpose() / sum_exp).transpose()
    return probs

def softmax_derv(x, y):
    mb_size, nom_size = x.shape
    derv = np.ndarray([mb_size, nom_size, nom_size])
    for n in range(mb_size):
        for i in range(nom_size):
            for j in range(nom_size):
                derv[n, i, j] = -y[n,i] * y[n,j]
            derv[n, i, i] += y[n,i]
    return derv

def softmax_cross_entropy(p, q):
    return -np.sum(p * np.log(q), axis=1)

def softmax_cross_entropy_derv(p, q):
    return -p / q

In [24]:
m1 = MLPNoTFModel("notf-iris-regression-hid-1", id1, [10])
m1.train(epoch_count=1)
print(babo)

Model notf-iris-regression-hid-1 train report:
[array([[ 0.01173613, -0.03273037,  0.00643122,  0.00968027, -0.02775064,
         0.00091096, -0.03340559, -0.00369707, -0.00038589,  0.01969427],
       [-0.00980483, -0.00899604, -0.02689693, -0.03099372,  0.0066654 ,
        -0.00755667,  0.02855561,  0.03363259,  0.01013013,  0.01377459],
       [ 0.02621648, -0.00263455,  0.01591358,  0.02987679,  0.05952203,
         0.01030967,  0.0228562 ,  0.00307394, -0.03339037,  0.01263231]], dtype=float32), array([[ 0.03830937],
       [-0.04520908],
       [ 0.0265373 ],
       [ 0.03551965],
       [ 0.02676535],
       [-0.01836309],
       [-0.02519594],
       [ 0.02597552],
       [ 0.05982467],
       [-0.00786958]], dtype=float32)]
step0: cost:1.9321341950563824, acc:-1.2129872262835595
step1: cost:2.697365506041181, acc:-0.6337946625920798
step2: cost:1.3922001540232831, acc:-1.1062475843829351
step3: cost:2.0185345985371836, acc:-3.0120568344421654
step4: cost:1.971924433445647, acc

NameError: name 'babo' is not defined

In [None]:
m1 = MLPNoTFModel("notf-iris-regression-hid-1", id1, [10])
m1.train(epoch_count=100)

m2 = MLPNoTFModel("notf-iris-binary-hid-3", id2, [8,4,2])
m2.train(epoch_count=100)

m3 = MLPNoTFModel("notf-iris-select-hid-none", id3, [])
m3.train(epoch_count=100)

In [None]:
m1.test()
m2.test()
m3.test()

In [None]:
m1.demonstrate(5)
m2.demonstrate(10)
m3.demonstrate(8)

In [None]:
m4 = MLPNoTFModel("notf-mnist-hidden-1", md, [32], 0.01)
m4.train(epoch_count=10)

m5 = MLPNoTFModel("notf-mnist-hidden-3", md, [16,8,4], 0.01)
m5.train(epoch_count=10)

In [None]:
m4.test()
m5.test()

In [None]:
m4.demonstrate(16, True)
m5.demonstrate(16, True)