In [1]:
import numpy as np

In [2]:
random_fix = True

In [3]:
class Dataset():
    def __init__(self, mode):
        self.mode = mode
        self.input_dim = self.output_dim = self.train_count = 0
        self.train_xs = self.test_xs = self.validate_xs = []
        self.train_ys = self.test_ys = self.validate_ys = []
        self.target_names = []
    
    def get_train_data(self, batch_size, nth):
        if nth == 0:
            self.indices = np.arange(self.train_count)
            np.random.shuffle(self.indices)
        
        from_idx = nth * batch_size
        to_idx = (nth + 1) * batch_size
    
        train_X = self.train_xs[self.indices[from_idx:to_idx]]
        train_Y = self.train_ys[self.indices[from_idx:to_idx]]
        
        return train_X, train_Y    
    
    def get_test_data(self, validate=False, count=0):
        if validate:
            xs, ys = self.validate_xs, self.validate_ys
        else:
            xs, ys = self.test_xs, self.test_ys
        
        if count == 0:
            return xs, ys
        
        if count > len(xs): count = len(xs)
        
        indices = np.arange(len(xs))
        np.random.shuffle(indices)
        
        return xs[indices[0:count]], ys[indices[0:count]]
    
    def get_target_name(self, idxs):
        return self.target_names[idxs]
    
    def demonstrate(self, x, estimate, answer):
        pass

In [4]:
from sklearn import datasets

class IrisDataset(Dataset):
    pass

In [5]:
def iris_init(self, mode, train_ratio=0.8):
    Dataset.__init__(self, mode)
    
    dataset = datasets.load_iris()
    
    xs, ys = iris_prepare(self, mode, dataset.data, dataset.target)
    
    data_count = len(dataset.data)
    self.train_count = int(data_count * train_ratio)

    if random_fix: np.random.seed(1234)

    indices = np.arange(data_count)
    np.random.shuffle(indices)

    self.train_xs = xs[indices[0:self.train_count]]
    self.train_ys = ys[indices[0:self.train_count]]
    self.test_xs = self.validate_xs = xs[indices[self.train_count:]]
    self.test_ys = self.validate_ys = ys[indices[self.train_count:]]
    
    self.target_names = dataset.target_names

def iris_prepare(self, mode, data, target):
    if mode == "regression":
        self.input_dim = 3
        self.output_dim = 1
        xs = data[:, 0:3]
        ys = data[:, 3:4]
    elif mode == "binary":
        self.input_dim = 4
        self.output_dim = 1
        xs = data
        ys = np.equal(target, 0).astype("float32").reshape(-1,1)
    elif mode == "select":
        self.input_dim = 4
        self.output_dim = 3
        xs = data
        ys = np.eye(3)[target]
        
    return xs, ys

IrisDataset.__init__ = iris_init

In [6]:
def iris_demonstrate(self, x, estimate, answer):
    if self.mode == "regression":
        print("({}, {}, {}) => 추정 {:3.1f}, 정답: {:3.1f}".format(x[0], x[1], x[2], estimate, answer))
    elif self.mode == "binary":
        estr = "is setosa"
        astr = "(오답)"
        if not estimate: estr = "is not setosa"
        if estimate == answer: astr = "(정답)"
        print("({}, {}, {}, {}) => {} {}".format(x[0], x[1], x[2], x[3], estr, astr))
    elif self.mode == "select":
        estr = self.target_names[estimate]
        astr = "({})".format(self.target_names[answer])
        if estimate == answer: astr = "(정답)"
        print("({}, {}, {}, {}) => {} {}".format(x[0], x[1], x[2], x[3], estr, astr))
    
IrisDataset.demonstrate = iris_demonstrate

In [7]:
id1 = IrisDataset("regression")
id2 = IrisDataset("binary", 0.88)
id3 = IrisDataset("select", 0.7)

In [8]:
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt

class MnistDataset(Dataset):
    pass

def mnist_init(self):
    Dataset.__init__(self, "select")
    
    dataset = input_data.read_data_sets("MNIST_data/", one_hot=True)
        
    self.train_xs = dataset.train.images
    self.train_ys = dataset.train.labels
        
    self.validate_xs = dataset.validation.images
    self.validate_ys = dataset.validation.labels
        
    self.test_xs = dataset.test.images
    self.test_ys = dataset.test.labels

    self.input_dim = 28 * 28
    self.output_dim = 10
    
    self.train_count = len(dataset.train.images)
    self.target_names = ['0','1','2','3','4','5','6','7','8','9']

def mnist_demonstrate(self, xs, est, ans):
    rows, cols = 4, 4
    f, a = plt.subplots(rows, cols, figsize=(cols, rows))
    for row in range(cols):
        for col in range(cols):
            i = row * cols + col
            estr = self.target_names[est[i]]
            astr = self.target_names[ans[i]]
            if est[i] == ans[i]:
                caption = "{}".format(estr)
            else:
                caption = "{}=>{}".format(astr, estr)
            a[row][col].axvspan(0, 0, 0, 6.0)
            a[row][col].imshow(np.reshape(xs[i], (28,28)))
            a[row][col].text(0.5, -1.5, caption)
            a[row][col].axis('off')
    f.show()
    plt.draw()
    plt.show()
        
MnistDataset.__init__ = mnist_init
MnistDataset.demonstrate = mnist_demonstrate

In [9]:
md = MnistDataset()

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [10]:
class Model(object):
    def __init__(self, name, dataset):
        self.name = name
        self.dataset = dataset
        
    def train(self, epoch_count=10, batch_size=10):
        pass
    
    def test(self):
        pass
    
    def demonstrate(self, num=10, batch=False):
        pass

In [11]:
import tensorflow as tf

class MultiLayerPerceptronModel(Model):
    pass

In [12]:
import time

def mlp_model_train(self, epoch_count=10, batch_size=10):
    if batch_size == 0:
        batch_size = self.dataset.train_count
        
    batch_count = int(self.dataset.train_count / batch_size)
    report_period = epoch_count / 10
    
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    if random_fix: np.random.seed(1945)
    
    time1 = time2 = int(time.time())
    
    print("Model {} train report:".format(self.name))
    
    dset = self.dataset
    run_targets = [self.train_op, self.loss, self.accuracy]

    for epoch in range(epoch_count):
        costs = []
        accs = []
        for n in range(batch_count):
            train_X, train_Y = dset.get_train_data(batch_size, n)
            _, cost, acc = sess.run(run_targets, \
                feed_dict={self.x:train_X, self.y:train_Y})
            costs.append(cost)
            accs.append(acc)
            
        if (epoch+1) % report_period == 0:
            validate_X, validate_Y = dset.get_test_data(True, 30)
            acc = sess.run(self.accuracy, \
                feed_dict={self.x:validate_X, self.y:validate_Y})
            time3 = int(time.time())
            print("    Epoch {}: cost={:5.3f}, \
accuracy={:5.3f}/{:5.3f} ({}/{} secs)". \
                  format(epoch+1, np.mean(costs), np.mean(accs), \
                         acc, time3-time2, time3-time1))
            time2 = time3

    print("")
    
    path = "params/{}.ckpt".format(self.name)
    self.saver.save(sess, path)
    sess.close()

MultiLayerPerceptronModel.train = mlp_model_train

In [13]:
def mlp_model_test(self):
    test_X, test_Y = self.dataset.get_test_data()

    sess = tf.Session()
    path = "params/{}.ckpt".format(self.name)
    self.saver.restore(sess, path)
    
    time1 = int(time.time())
    acc = sess.run(self.accuracy, feed_dict={self.x:test_X, self.y:test_Y})
    time2 = int(time.time())
    
    print("Model {} test report: accuracy = {:5.3f}, ({} secs)".format(self.name, acc, time2-time1))
    print("")
    
    sess.close()

MultiLayerPerceptronModel.test = mlp_model_test

In [48]:
def mlp_model_demonstrate(self, num=10, batch=False):
    demo_X, demo_Y = self.dataset.get_test_data(False, num)

    sess = tf.Session()
    path = "params/{}.ckpt".format(self.name)
    self.saver.restore(sess, path)
    
    print("Model {} Demonstration".format(self.name))
    est, ans, probs = sess.run([self.estimate, self.answer, self.probs], \
           feed_dict={self.x:demo_X, self.y:demo_Y})
    if batch:
        self.dataset.demonstrate(demo_X, demo_Y, est, ans, probs)
    else:
        for n in range(len(demo_X)):
            self.dataset.demonstrate(demo_X[n], demo_Y[n], est[n], ans[n], probs[n])
        print("")
    
    sess.close()
    
MultiLayerPerceptronModel.demonstrate = mlp_model_demonstrate

In [15]:
def mlp_init(self, name, dataset, hidden_dims, learning_rate=0.001):
    Model.__init__(self, name, dataset)
    with tf.variable_scope(self.name):
        self.build_placeholders()
        self.build_parameters(hidden_dims)
        self.build_neuralnet(hidden_dims)
        self.build_loss_accuracy()
        build_optimizer(self, learning_rate)
        build_saver(self)

MultiLayerPerceptronModel.__init__ = mlp_init

In [16]:
def build_placeholders(self):
    input_dim = self.dataset.input_dim
    output_dim = self.dataset.output_dim
    self.x = tf.placeholder("float", [None, input_dim])
    self.y = tf.placeholder("float", [None, output_dim])

MultiLayerPerceptronModel.build_placeholders = build_placeholders

In [17]:
def build_parameters(self, hidden_dims):
    if random_fix: np.random.seed(9876)

    input_dim = self.dataset.input_dim
    output_dim = self.dataset.output_dim

    self.w_hids, self.b_hids = [], []
    
    prev_dim = input_dim

    for n in range(len(hidden_dims)):
        next_dim = hidden_dims[n]
        w = tf.Variable(init_rand_normal(prev_dim, next_dim))
        b = tf.Variable(tf.zeros([next_dim]))
        self.w_hids.append(w)
        self.b_hids.append(b)
        prev_dim = next_dim

    self.w_out = tf.Variable(init_rand_normal(prev_dim, output_dim))
    self.b_out = tf.Variable(tf.zeros([output_dim]))
    
def init_rand_normal(in_dim, out_dim, rand_std=0.0300):
    if not random_fix:
        init = tf.random_normal([in_dim, out_dim], stddev=rand_std)
    else:
        init_64 = np.random.normal(0, rand_std, [in_dim, out_dim])
        init = init_64.astype('float32')

    return init
        
MultiLayerPerceptronModel.build_parameters = build_parameters

In [18]:
def build_neuralnet(self, hidden_dims):
    hidden = self.x
    for n in range(len(hidden_dims)):
        affine = tf.matmul(hidden, self.w_hids[n]) + self.b_hids[n]
        hidden = tf.nn.relu(affine)
    self.output = tf.matmul(hidden, self.w_out) + self.b_out
        
MultiLayerPerceptronModel.build_neuralnet = build_neuralnet

In [19]:
def build_loss_accuracy(self):
    if self.dataset.mode == "regression":
        self.estimate = self.output[:,0]
        self.answer = self.y[:,0]
        diff = self.estimate - self.answer
        self.probs = tf.constant(0)
        self.loss = tf.reduce_mean(tf.pow(diff, 2))
        error = tf.reduce_mean(tf.abs(diff) / self.answer)
        self.accuracy = 1 - error
    elif self.dataset.mode == "binary":
        cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y, logits=self.output)
        self.loss = tf.reduce_mean(cross_entropy)
        self.probs = tf.nn.sigmoid(output)
        self.estimate = tf.greater(self.output, 0)
        self.answer = tf.equal(self.y, 1.0)
        correct_bool = tf.equal(self.estimate, self.answer)
        correct = tf.cast(correct_bool, "float")
        self.accuracy = tf.reduce_mean(correct)
    elif self.dataset.mode == "select":
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=self.y, logits=self.output)
        self.loss = tf.reduce_mean(cross_entropy)
        self.probs = tf.nn.softmax(output)
        self.estimate = tf.argmax(self.output, 1)
        self.answer = tf.argmax(self.y, 1)
        correct_bool = tf.equal(self.estimate, self.answer)
        correct = tf.cast(correct_bool, "float")
        self.accuracy = tf.reduce_mean(correct)
        
MultiLayerPerceptronModel.build_loss_accuracy = build_loss_accuracy

In [20]:
def build_optimizer(self, learning_rate):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    self.train_op = optimizer.minimize(self.loss)

def build_saver(self):
    var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name)
    self.saver = tf.train.Saver(var_list=var_list)

In [21]:
def relu(x):
    return np.maximum(x, 0)

def relu_derv(y):
    return np.sign(y)

In [22]:
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def sigmoid_derv(x, y):
    return y * (1 - y)

def sigmoid_cross_entropy(z, x):
    return np.maximum(x, 0) - x * z + np.log(1 + np.exp(-np.abs(x)))

def sigmoid_cross_entropy_derv(z, x):
    return -z + sigmoid(x)

In [23]:
def softmax(x):
    max_elem = np.max(x, axis=1)
    diff = (x.transpose() - max_elem).transpose()
    exp = np.exp(diff)
    sum_exp = np.sum(exp, axis=1)
    probs = (exp.transpose() / sum_exp).transpose()
    return probs

def softmax_derv(x, y):
    mb_size, nom_size = x.shape
    derv = np.ndarray([mb_size, nom_size, nom_size])
    for n in range(mb_size):
        for i in range(nom_size):
            for j in range(nom_size):
                derv[n, i, j] = -y[n,i] * y[n,j]
            derv[n, i, i] += y[n,i]
    return derv

def softmax_cross_entropy(p, q):
    return -np.sum(p * np.log(q), axis=1)

def softmax_cross_entropy_derv(p, q):
    return -p / q

In [24]:
class NoTFMLPModel(Model):
    pass

In [25]:
def notf_init(self,name,dataset,hidden_dims,learning_rate=0.001):
    Model.__init__(self, name, dataset)
    init_parameters(self, hidden_dims)
    self.learning_rate = learning_rate
    self.hidden_dims = hidden_dims
    
NoTFMLPModel.__init__ = notf_init

In [26]:
import time

def notf_model_train(self, epoch_count=10, batch_size=10):
    if batch_size == 0:
        batch_size = self.dataset.train_count
        
    batch_count = int(self.dataset.train_count / batch_size)
    report_period = epoch_count / 10
    
    if random_fix: np.random.seed(1945)
    
    time1 = time2 = int(time.time())
    
    print("Model {} train report:".format(self.name))
    
    dset = self.dataset

    for epoch in range(epoch_count):
        costs = []
        accs = []
        for n in range(batch_count):
            train_X, train_Y = dset.get_train_data(batch_size, n)
            cost, acc = self.train_step(train_X, train_Y)
            costs.append(cost)
            accs.append(acc)
            
        if (epoch+1) % report_period == 0:
            validate_X, validate_Y = dset.get_test_data(True, 30)
            acc = self.get_accuracy(validate_X, validate_Y)
            time3 = int(time.time())
            print("    Epoch {}: cost={:5.3f}, \
accuracy={:5.3f}/{:5.3f} ({}/{} secs)". \
                  format(epoch+1, np.mean(costs), np.mean(accs), \
                         acc, time3-time2, time3-time1))
            time2 = time3

    print("")
    
    path = "params/{}.ntpt".format(self.name)
    self.save_parameters(path)

NoTFMLPModel.train = notf_model_train

In [27]:
def notf_model_test(self):
    test_X, test_Y = self.dataset.get_test_data()

    path = "params/{}.ntpt".format(self.name)
    self.restore_parameters(path)
    
    time1 = int(time.time())
    acc = self.get_accuracy(test_X, test_Y)
    time2 = int(time.time())
    
    print("Model {} test report: accuracy = {:5.3f}, ({} secs)". \
          format(self.name, acc, time2-time1))
    print("")
    
NoTFMLPModel.test = notf_model_test

In [28]:
def notf_model_demonstrate(self, num=10, batch=False):
    demo_X, demo_Y = self.dataset.get_test_data(False, num)

    path = "params/{}.ntpt".format(self.name)
    self.restore_parameters(path)
    
    print("Model {} Demonstration".format(self.name))
    est, ans = self.get_estimate_answer(demo_X, demo_Y)
    if batch:
        self.dataset.demonstrate(demo_X, est, ans)
    else:
        for n in range(len(demo_X)):
            self.dataset.demonstrate(demo_X[n], est[n], ans[n])
        print("")
    
NoTFMLPModel.demonstrate = notf_model_demonstrate

In [29]:
def train_step(self, x, y):
    output = self.proc_forward(x)

    loss_grad = 1.0
    
    if self.dataset.mode == "regression":
        diff = output - y
        power = np.power(diff, 2)
        loss = np.mean(power)
        power_derv = np.ones_like(y) / np.prod(y.shape)
        power_grad = power_derv * loss_grad
        diff_derv = 2 * diff
        diff_grad = diff_derv * power_grad
        output_derv = 1
        output_grad = output_derv * diff_grad
    elif self.dataset.mode == "binary":
        entropy = sigmoid_cross_entropy(y, output)
        loss = np.mean(entropy)
        ent_derv = np.ones_like(entropy) / np.prod(entropy.shape)
        ent_grad = ent_derv * loss_grad
        output_derv = sigmoid_cross_entropy_derv(y, output)
        output_grad = output_derv * ent_grad
    elif self.dataset.mode == "select":
        probs = softmax(output)
        entropy = softmax_cross_entropy(y, probs)
        loss = np.mean(entropy)
        ent_grad = loss_grad / np.prod(entropy.shape)
        probs_derv = softmax_cross_entropy_derv(y, probs)
        probs_grad = probs_derv * ent_grad
        output_derv = softmax_derv(output, probs)
        output_grad = [np.matmul(output_derv[n], probs_grad[n]) \
                       for n in range(output.shape[0])]
    
    self.proc_backward(x, output_grad)
    
    return loss, self.eval_accuracy(output, y)

NoTFMLPModel.train_step = train_step

In [30]:
def init_parameters(self, hidden_dims):
    if random_fix: np.random.seed(9876)

    input_dim = self.dataset.input_dim
    output_dim = self.dataset.output_dim

    self.weights, self.biases = [], []
    
    prev_dim = input_dim

    for n in range(len(hidden_dims)):
        next_dim = hidden_dims[n]
        w = init_rand_normal(prev_dim, next_dim)
        b = np.zeros([next_dim])
        self.weights.append(w)
        self.biases.append(b)
        prev_dim = next_dim

    w = notf_init_rand_normal(prev_dim, output_dim)
    b = np.zeros([output_dim])
    self.weights.append(w)
    self.biases.append(b)
    
def notf_init_rand_normal(in_dim, out_dim, rand_std=0.0300):
    init_64 = np.random.normal(0, rand_std, [in_dim, out_dim])
    init = init_64.astype('float32')
    return init

In [31]:
def proc_forward(self, x):
    self.hiddens = [x]

    for n in range(len(self.hidden_dims)):
        w, b = self.weights[n], self.biases[n]
        hid = relu(np.matmul(self.hiddens[-1], w) + b)
        self.hiddens.append(hid)
    
    w, b = self.weights[-1], self.biases[-1]
    output = np.matmul(self.hiddens[-1], w) + b
    
    return output

NoTFMLPModel.proc_forward = proc_forward

In [32]:
def proc_backward(self, x, grad):
    w_out_derv = self.hiddens[-1].transpose()
    w_out_grad = np.matmul(w_out_derv, grad)
    
    b_out_grad = np.sum(grad, axis=0)
    
    hidden_derv = self.weights[-1].transpose()
    hidden_grad = np.matmul(grad, hidden_derv)
    
    for n in range(len(self.hidden_dims))[::-1]:
        hidden_affine_derv = relu_derv(self.hiddens[n+1])
        hidden_affine_grad = hidden_affine_derv * hidden_grad
    
        w_hid_derv = self.hiddens[n].transpose()
        w_hid_grad = np.matmul(w_hid_derv, hidden_affine_grad)
    
        b_hid_grad = np.sum(hidden_affine_grad, axis=0)
        
        hidden_derv = self.weights[n].transpose()
        hidden_grad = np.matmul(hidden_affine_grad, hidden_derv)
    
        self.weights[n] = self.weights[n] - self.learning_rate * w_hid_grad
        self.biases[n] = self.biases[n] - self.learning_rate * b_hid_grad
    
    self.weights[-1] = self.weights[-1] - self.learning_rate * w_out_grad
    self.biases[-1] = self.biases[-1] - self.learning_rate * b_out_grad
    
NoTFMLPModel.proc_backward = proc_backward

In [33]:
def get_accuracy(self, x, y):
    output = self.proc_forward(x)
    return self.eval_accuracy(output, y)

def eval_accuracy(self, output, y):
    if self.dataset.mode == "regression":
        diff = output - y
        answer = y[:,0]
        error = np.mean(np.abs(diff) / answer)
        accuracy = 1 - error
        probs = 0
    elif self.dataset.mode == "binary":
        #probs = sigmoid(output)
        #estimate = np.greater(probs, 0.5)
        estimate = np.greater(output, 0)
        answer = np.equal(y, 1.0)
        correct_bool = np.equal(estimate, answer)
        #correct = np.cast(correct_bool, "float32")
        accuracy = np.mean(correct_bool)
    elif self.dataset.mode == "select":
        #probs = softmax(output)
        #estimate = np.argmax(probs, 1)
        estimate = np.argmax(output, 1)
        answer = np.argmax(y, 1)
        correct_bool = np.equal(estimate, answer)
        #correct = np.cast(correct_bool, "float32")
        accuracy = np.mean(correct_bool)
    
    return accuracy

NoTFMLPModel.eval_accuracy = eval_accuracy
NoTFMLPModel.get_accuracy = get_accuracy

In [34]:
def get_estimate_answer(self, x, y):
    output = self.proc_forward(x)

    if self.dataset.mode == "regression":
        estimate = output[:,0]
        answer = y[:,0]
    elif self.dataset.mode == "binary":
        #probs = sigmoid(output)
        #estimate = np.greater(probs, 0.5)
        estimate = np.greater(output, 0)
        answer = np.equal(y, 1.0)
    elif self.dataset.mode == "select":
        #probs = softmax(output)
        #estimate = np.argmax(probs, 1)
        estimate = np.argmax(output, 1)
        answer = np.argmax(y, 1)
    
    return estimate, answer

NoTFMLPModel.get_estimate_answer = get_estimate_answer

In [35]:
def save_parameters(self, path):
    np.savez(path, self.weights, self.biases)

def restore_parameters(self, path):
    fc = np.load(path)
    self.weights, self.biases = fc['arr_0'], fc['arr_1']

NoTFMLPModel.save_parameters = save_parameters
NoTFMLPModel.restore_parameters = restore_parameters

In [113]:
class SudokuDataset(Dataset):
    pass

def unpack_map(line):
    quiz = np.zeros([81], dtype=np.int8)
    solution = np.zeros([81], dtype=np.int8)
    for n in range(81):
        quiz[n] = int(line[n])
        solution[n] = int(line[n+82])
    return quiz, solution

def fill_hints(quiz, solution, hint_cnt):
    for n in range(hint_cnt):
        for k in range(1000):
            pos = np.random.randint(81)
            if quiz[pos] == 0:
                quiz[pos] = solution[pos]
                break
    return quiz
    
def sudoku_init(self, train_ratio=0.80, valid_ratio=0.05):
    Dataset.__init__(self, "sudoku")
    
    quizzes, solutions = [], []

    max_count = 50000
    
    for line in open("./data/sudoku.csv"):
        if line[0] == 'q': continue
        quiz, solution = unpack_map(line)
        
        #quiz = solution
        #for n in range(5):
        #    pos = np.random.randint(81)
        #    quiz[pos] = 0
        #quizzes.append(solution)
        #solutions.append(solution)
        
        open_cnt = int((81-np.count_nonzero(quiz)) / 10)
        for n in range(5):
            quizzes.append(quiz)
            solutions.append(solution)
            quiz = fill_hints(quiz, solution, open_cnt)
        if len(quizzes) >= max_count: break

    ones = np.ones([81]).astype(int)
    solution_idxes = solutions - ones
    xs = np.asarray(quizzes)
    ys = solution_idxes.reshape(-1, 81)
    #ys = np.eye(9)[solution_idxes].reshape(-1, 81*9)

    self.input_dim = 81
    self.output_dim = 81
    #self.output_dim = 81*9

    data_count = len(xs)
    train_count = int(data_count * train_ratio)
    valid_count = int(data_count * valid_ratio)
    print('data_count', data_count)
    print('train_count', train_count)
    print('valid_count', valid_count)

    test_start_idx = train_count + valid_count

    indices = np.arange(data_count)
    np.random.shuffle(indices)

    self.train_xs = xs[indices[0:train_count]]
    self.train_ys = ys[indices[0:train_count]]
    self.validate_xs = xs[indices[train_count:test_start_idx]]
    self.validate_ys = ys[indices[train_count:test_start_idx]]
    self.test_xs = xs[indices[test_start_idx:]]
    self.test_ys = ys[indices[test_start_idx:]]

    self.train_count = train_count
    
def sudoku_demonstrate(self, x, y, estimate, answer, probs):
    print("sudoku demonstrate dummy")
    
SudokuDataset.__init__ = sudoku_init
SudokuDataset.demonstrate = sudoku_demonstrate

In [114]:
sd = SudokuDataset()

data_count 50000
train_count 40000
valid_count 2500


In [115]:
class SudokuCnnModel(MultiLayerPerceptronModel):
    pass

def sudoku_init(self, name, dataset, hidden_dims, learning_rate=0.001):
    MultiLayerPerceptronModel.__init__(self, name, dataset, hidden_dims, learning_rate)

def sudoku_build_loss_accuracy(self):
    #self.temp_labels = tf.reshape(self.y, [-1, 81, 9])
    self.temp_labels = tf.cast(tf.reshape(self.y, [-1, 81]), "int64")
    self.temp_logits = tf.reshape(self.output, [-1, 81, 9])
    self.probs = tf.nn.softmax(self.temp_logits)
    #self.temp_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=self.temp_labels, logits=self.probs)
    self.temp_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.temp_labels, logits=self.temp_logits)
    self.loss = tf.reduce_mean(self.temp_cross_entropy)
    self.temp_test = tf.argmax(self.temp_logits, 2)
    self.estimate = tf.argmax(self.temp_logits, 2)
    #self.answer = tf.argmax(self.temp_labels, 2)
    self.answer = self.temp_labels
    self.temp_correct = tf.cast(tf.equal(self.estimate, self.answer), "float")
    self.accuracy = tf.reduce_mean(self.temp_correct)
    self.max_probs = tf.reduce_max(self.probs, 2)
    self.masked_max_probs = self.max_probs * tf.cast(1 - tf.sign(self.x), "float")
    self.max_pos = tf.argmax(self.masked_max_probs, 1)
    self.max_est = tf.argmax(self.probs, 2)
    self.index_mask = tf.one_hot(self.max_pos, 81)
    self.est_val = tf.reduce_sum(tf.cast(self.max_est, "float") * self.index_mask,1)
    self.ans_val = tf.reduce_sum(tf.cast(self.answer, "float") * self.index_mask,1)
    self.max_correct = tf.cast(tf.equal(self.est_val, self.ans_val), "float")
    self.max_acc = tf.reduce_mean(self.max_correct)

    #foo = tf.constant([[1,2,3], [4,5,6]])
    #indexes = tf.constant([1,2]) #[1, 2])
    #self.est_val = self.max_est[:, indexes]
    #self.est_val = foo[:, indexes]
    #self.est_val = self.max_est # self.max_est[:, self.max_pos]
    
    #foo = tf.constant([[1,2,3], [4,5,6]])
    #foo[:, 1] # [2, 5]
    #indexes = tf.constant([1, 2])
    #foo[:, indexes] # [2, 6]


    #self.est_val = self.max_est[self.max_pos]
    #self.onehot = tf.cast(tf.one_hot(self.max_pos, 81), "float")
    #self.max_est_val = tf.matmul(self.max_est, self.onehot) #tf.gather(self.max_est, self.max_pos)
    #self.max_probs = tf.argmax(self.output, 2)
    #self.est_masked = tf.multiply(self.est_max, tf.cast(1 - tf.sign(self.x), "int64"))
    #self.est_max2 = tf.argmax(self.est_masked, 1)
    #self.est_pos = tf.cast(tf.divide(self.est_max2, 10), "int32")
    #self.est_val = self.est_masked % 10
    #self.ans_val = self.est_val #tf.gather(self.answer, self.est_pos)
    #self.max_correct = tf.cast(tf.equal(self.est_val, self.ans_val), "float")
    #self.max_acc = tf.reduce_mean(self.max_correct)

def sudoku_train(self, epoch_count=2, batch_size=10):
    if batch_size == 0:
        batch_size = self.dataset.train_count
        
    batch_count = int(self.dataset.train_count / batch_size)
    report_period = epoch_count / 10
    
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    if random_fix: np.random.seed(1945)
    
    time1 = time2 = int(time.time())
    
    print("Model {} train report:".format(self.name))
    
    for epoch in range(epoch_count):
        costs = []
        accs = []
        max_accs = []
        for n in range(batch_count):
            train_X, train_Y = self.dataset.get_train_data(batch_size, n)
            _, labels, output, logits, entropy, cost, probs, estimate, answer, correct, \
                max_probs, masked_max_probs, max_pos, max_est, index_mask, est_val, ans_val, max_correct, max_acc, \
                acc = \
                sess.run([self.train_op, self.temp_labels, self.output, self.temp_logits, 
                          self.temp_cross_entropy, self.loss, self.probs, self.estimate, 
                          self.answer, self.temp_correct, 
                          self.max_probs, self.masked_max_probs, self.max_pos, self.max_est, self.index_mask, self.est_val, self.ans_val,
                          self.max_correct, self.max_acc, 
                          self.accuracy], \
                                    feed_dict={self.x:train_X, self.y:train_Y})
            #_, cost, acc = sess.run([self.train_op, self.loss, self.accuracy], \
            #                        feed_dict={self.x:train_X, self.y:train_Y})
            costs.append(cost)
            accs.append(acc)
            max_accs.append(max_acc)
        
        if (epoch+1) % report_period == 0:
            validate_X, validate_Y = self.dataset.get_test_data(True, 30)
            acc = sess.run(self.accuracy, feed_dict={self.x:validate_X, self.y:validate_Y})
            time3 = int(time.time())
            print("    Epoch {}: cost={:5.3f}, accuracy={:5.3f}, {:5.3f}/{:5.3f}, {:5.3f} ({}/{} secs)". \
                  format(epoch+1, np.mean(costs), acc, np.mean(accs), max_acc, np.mean(max_accs), time3-time2, time3-time1))
            time2 = time3

    """
    print("probs", np.shape(probs))
    print(probs[0])
    print("max_probs", np.shape(max_probs))
    print(max_probs[0])
    print("train_X", np.shape(train_X))
    print(train_X[0])
    print("masked_max_probs", np.shape(masked_max_probs))
    print(masked_max_probs[0])
    print("max_pos", max_pos)
    print("max_est", max_est)
    print("index_mask", index_mask)
    print("answer", answer)
    """
    print("est_val", est_val)
    print("ans_val", ans_val)
    print("max_acc", max_acc)
    
    #print("train_X", np.shape(train_X))
    #print(train_X[0])
    #print("train_Y", np.shape(train_Y))
    #print(train_Y[0])
    #print("labels", np.shape(labels))
    #print(labels[0])
    #print("output", np.shape(output))
    #print(output[0])
    #print("logits", np.shape(logits))
    #print(logits[0])
    #print("probs", np.shape(probs))
    #print(np.max(probs[0], axis=1))
    #print("est_masked", np.shape(est_masked))
    #print(est_masked[0])
    #print("est_max", np.shape(est_max))
    #print(est_max)
    #print("est_pos", np.shape(est_pos))
    #print(est_pos)
    #print("est_val", np.shape(est_val))
    #print(est_val)
    #print("ans_val", np.shape(ans_val))
    #print(ans_val)
    #print("max_correct", np.shape(max_correct))
    #print(max_correct)
    #print("max_acc", max_acc)
    """
    print("train_X", np.shape(train_X))
    print("x", np.shape(x))
    print(x[0])
    print("y", np.shape(y))
    print(y[0])
    print("labels", np.shape(labels))
    print(labels[0])
    print("output", np.shape(output))
    print(output[0])
    print("logits", np.shape(logits))
    print(logits[0])
    print("probs", np.shape(probs))
    print(probs[0])
    print("entropy", np.shape(entropy))
    print(entropy[0])
    print("temp_test", np.shape(temp_test))
    print(temp_test[0])
    print("estimate", np.shape(estimate))
    print(estimate[0])
    print("answer", np.shape(answer))
    print(answer[0])
    print("correct", np.shape(correct))
    print(correct[0])
    print("acc", np.shape(acc))
    print(acc)

    print("")
    """
    
    path = "params/{}.ckpt".format(self.name)
    self.saver.save(sess, path)
    sess.close()

SudokuCnnModel.__init__ = sudoku_init
SudokuCnnModel.train = sudoku_train
SudokuCnnModel.build_loss_accuracy = sudoku_build_loss_accuracy

In [116]:
def sudoku_build_placeholders(self):
    MultiLayerPerceptronModel.build_placeholders(self)
    #self.cx = tf.placeholder("float", [None, 27,9,1])
    
    """
    row, col, box = np.zeros([4,7]), np.zeros([4,7]), np.zeros([4,7])
    
    for m in range(4):
        for n in range(7):
            nn = n % 4
            row[m][n] = m * 4 + nn
            col[m][n] = nn * 4 + m
            box[m][n] = (m//2*4+m%2) * 2 + (nn//2*4+nn%2)
    
    #print('box', box)
    f2 = np.reshape([row, col, box], [-1]).astype(int)
                
    self.f2 = tf.constant(f2)
    """
    
    row, col, box = np.zeros([9,17]), np.zeros([9,17]), np.zeros([9,17])
    
    for m in range(9):
        for n in range(17):
            nn = n % 9
            row[m][n] = m * 9 + nn
            col[m][n] = nn * 9 + m
            box[m][n] = (m//3*9+m%3) * 3 + (nn//3*9+nn%3)
    
    #print('box', box)
    f3 = np.reshape([row, col, box], [-1]).astype(int)
                
    self.f3 = tf.constant(f3)

    rev_map = np.zeros([9,9,3])
    
    for r in range(9):
        for c in range(9):
            rev_map[r,c,0] = r * 9 + c
            rev_map[r,c,1] = 81 + c * 9 + r
            rev_map[r,c,2] = 162 + ((r//3)*3+c//3) * 9 + (r%3)*3+c%3

    #print("rev_map", rev_map);
                    
    frev3 = np.reshape(rev_map, [-1]).astype(int)
                
    self.ff3 = tf.constant(frev3)

    """
    #cube2, cube3 = np.zeros([2,4,4]), np.zeros([2,9,9])
    cube3 = np.zeros([2,4,4])

    for k in range(2):
        for m in range(4):
            for n in range(4):
                cube2[k][m][n] = (k+1)*100+(m+1)*10+(n+1)
    
    for k in range(2):
        for m in range(9):
            for n in range(9):
                cube3[k][m][n] = (k+1)*100+(m+1)*10+(n+1)
                
    self.x2 = tf.constant(np.reshape(cube2, [2,16]).astype(np.float32))
    self.x3 = tf.constant(np.reshape(cube3, [2,81]).astype(np.float32))

    #self.f2 = tf.constant([[0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15]])
    
    self.g2 = tf.reshape(tf.gather(self.x2, self.f2, axis=1), [-1, 12, 4+3, 1])
    """

def sudoku_build_parameters(self, hidden_dim):
    MultiLayerPerceptronModel.build_parameters(self, [hidden_dim])
    #self.kernel_xw = tf.Variable(tf.ones([1, 9, 1, hidden_dim], tf.float32)) #tf.random_normal([1,9,1,hidden_dim], stddev=0.03))
    #self.kernel_xb = tf.Variable(tf.zeros([hidden_dim]))
    #w = np.zeros([1, 9, 1, hidden_dim], dtype="float32")
    #w[0,0,0,0] = 1
    #w[0,:,0,1] = 1
    self.kernel_xw = tf.Variable(tf.random_normal([1,9,1,hidden_dim], stddev=0.03))
    self.kernel_xb = tf.Variable(tf.zeros([hidden_dim]))

    #w2 = np.zeros([1, 1, 3*hidden_dim, 9], dtype="float32")
    #w2[0,0,0,0] = 1
    #w2[0,:,0,1] = 1
    self.kernel_xw2 = tf.Variable(tf.random_normal([1, 1, 3*hidden_dim, 9], stddev=0.03))
    self.kernel_xb2 = tf.Variable(tf.zeros([9]))

def sudoku_build_neuralnet(self, hidden_dim):
    MultiLayerPerceptronModel.build_neuralnet(self, [hidden_dim])
    
    self.g3 = tf.reshape(tf.gather(self.x, self.f3, axis=1), [-1, 27, 9+8, 1])

    #conv_linear = tf.nn.conv2d(self.g3, self.kernel_xw, strides=[1, 1, 9, 1], padding='SAME', data_format='NHWC')
    conv_linear = tf.nn.conv2d(self.g3, self.kernel_xw, strides=[1, 1, 1, 1], padding='VALID', data_format='NHWC')
    conv_with_b = tf.nn.bias_add(conv_linear, self.kernel_xb, data_format='NHWC')
    conv_out = tf.reshape(tf.nn.relu(conv_with_b), [-1,243,hidden_dim])
    self.conv2_in = tf.reshape(tf.gather(conv_out, self.ff3, axis=1), [-1, 81, 1, 3*hidden_dim])
    
    conv_linear2 = tf.nn.conv2d(self.conv2_in, self.kernel_xw2, strides=[1, 1, 1, 1], padding='VALID', data_format='NHWC')
    conv_with_b2 = tf.nn.bias_add(conv_linear2, self.kernel_xb2, data_format='NHWC')
    conv_out2 = tf.nn.relu(conv_with_b2)

    self.conv_out = tf.reshape(tf.nn.relu(conv_out2), [-1, 81, 9])
    self.output = self.conv_out

SudokuCnnModel.build_placeholders = sudoku_build_placeholders
SudokuCnnModel.build_parameters = sudoku_build_parameters
SudokuCnnModel.build_neuralnet = sudoku_build_neuralnet

In [117]:
def sudoku_dump(self, batch_size=100):
    sess = tf.Session()
    path = "params/{}.ckpt".format(self.name)
    self.saver.restore(sess, path)
    
    train_X, train_Y = self.dataset.get_train_data(batch_size, 0)
    
    loss, est_val, ans_val, max_acc, acc, conv2_in, conv_out = \
        sess.run([self.loss, self.est_val, self.ans_val, self.max_acc, self.accuracy, self.conv2_in, self.conv_out], \
                 feed_dict={self.x:train_X, self.y:train_Y})

    print("est_val", est_val)
    print("ans_val", ans_val)
    print("max_acc", max_acc)
    
    print('loss', loss)

    print('done')
    
    sess.close()

SudokuCnnModel.dump = sudoku_dump

In [111]:
sm1 = SudokuCnnModel("sudoku-cnn-1", sd, 2)
sm1.dump()

INFO:tensorflow:Restoring parameters from params/sudoku-cnn-1.ckpt


NotFoundError: Key sudoku-cnn-1_21/Variable_3 not found in checkpoint
	 [[Node: sudoku-cnn-1_21/save/RestoreV2_115 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_sudoku-cnn-1_21/save/Const_0_0, sudoku-cnn-1_21/save/RestoreV2_115/tensor_names, sudoku-cnn-1_21/save/RestoreV2_115/shape_and_slices)]]
	 [[Node: sudoku-cnn-1_21/save/RestoreV2_142/_211 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_534_sudoku-cnn-1_21/save/RestoreV2_142", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]

Caused by op 'sudoku-cnn-1_21/save/RestoreV2_115', defined at:
  File "/usr/lib/python3.4/runpy.py", line 170, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.4/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.4/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.4/dist-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python3.4/dist-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/usr/local/lib/python3.4/dist-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/usr/local/lib/python3.4/dist-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/usr/local/lib/python3.4/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.4/dist-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/usr/local/lib/python3.4/dist-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/lib/python3.4/dist-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python3.4/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.4/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python3.4/dist-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python3.4/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python3.4/dist-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.4/dist-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python3.4/dist-packages/IPython/core/interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python3.4/dist-packages/IPython/core/interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python3.4/dist-packages/IPython/core/interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-111-8eea6e4cb332>", line 1, in <module>
    sm1 = SudokuCnnModel("sudoku-cnn-1", sd, 2)
  File "<ipython-input-108-53353353135a>", line 5, in sudoku_init
    MultiLayerPerceptronModel.__init__(self, name, dataset, hidden_dims, learning_rate)
  File "<ipython-input-15-ee230eede2ff>", line 9, in mlp_init
    build_saver(self)
  File "<ipython-input-20-3c5344e83047>", line 7, in build_saver
    self.saver = tf.train.Saver(var_list=var_list)
  File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/training/saver.py", line 1218, in __init__
    self.build()
  File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/training/saver.py", line 1227, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/training/saver.py", line 1263, in _build
    build_save=build_save, build_restore=build_restore)
  File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/training/saver.py", line 751, in _build_internal
    restore_sequentially, reshape)
  File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/training/saver.py", line 427, in _AddRestoreOps
    tensors = self.restore_op(filename_tensor, saveable, preferred_shard)
  File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/training/saver.py", line 267, in restore_op
    [spec.tensor.dtype])[0])
  File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/ops/gen_io_ops.py", line 1021, in restore_v2
    shape_and_slices=shape_and_slices, dtypes=dtypes, name=name)
  File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

NotFoundError (see above for traceback): Key sudoku-cnn-1_21/Variable_3 not found in checkpoint
	 [[Node: sudoku-cnn-1_21/save/RestoreV2_115 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_sudoku-cnn-1_21/save/Const_0_0, sudoku-cnn-1_21/save/RestoreV2_115/tensor_names, sudoku-cnn-1_21/save/RestoreV2_115/shape_and_slices)]]
	 [[Node: sudoku-cnn-1_21/save/RestoreV2_142/_211 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_534_sudoku-cnn-1_21/save/RestoreV2_142", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]


In [119]:
sm1 = SudokuCnnModel("sudoku-cnn-1", sd, 5)
sm1.train(epoch_count=10, batch_size=100)

Model sudoku-cnn-1 train report:
    Epoch 1: cost=2.196, accuracy=0.118, 0.113/0.140, 0.106 (2/2 secs)
    Epoch 2: cost=2.194, accuracy=0.138, 0.123/0.070, 0.110 (3/5 secs)
    Epoch 3: cost=2.191, accuracy=0.169, 0.151/0.170, 0.141 (2/7 secs)
    Epoch 4: cost=2.187, accuracy=0.195, 0.178/0.140, 0.175 (2/9 secs)
    Epoch 5: cost=2.183, accuracy=0.177, 0.184/0.180, 0.196 (3/12 secs)
    Epoch 6: cost=2.176, accuracy=0.189, 0.183/0.190, 0.212 (2/14 secs)
    Epoch 7: cost=2.162, accuracy=0.189, 0.185/0.170, 0.225 (2/16 secs)
    Epoch 8: cost=2.143, accuracy=0.188, 0.187/0.220, 0.239 (3/19 secs)
    Epoch 9: cost=2.121, accuracy=0.180, 0.186/0.230, 0.250 (2/21 secs)
    Epoch 10: cost=2.094, accuracy=0.177, 0.180/0.250, 0.258 (2/23 secs)
est_val [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0

In [112]:
sm1 = SudokuCnnModel("sudoku-cnn-1", sd, 5)
sm1.train(epoch_count=10, batch_size=100)

Model sudoku-cnn-1 train report:


ValueError: Cannot feed value of shape (100, 81) for Tensor 'sudoku-cnn-1_22/Placeholder_1:0', which has shape '(?, 729)'

In [68]:
sm2 = SudokuCnnModel("sudoku-cnn-2", sd, 64, learning_rate=0.01)
sm2.train(epoch_count=100, batch_size=10)

Model sudoku-cnn-2 train report:
    Epoch 10: cost=0.963, accuracy=0.660, 0.653/0.300, 0.315 (183/183 secs)
    Epoch 20: cost=0.877, accuracy=0.655, 0.657/0.100, 0.335 (175/358 secs)
    Epoch 30: cost=0.863, accuracy=0.656, 0.660/0.100, 0.350 (177/535 secs)
    Epoch 40: cost=0.854, accuracy=0.658, 0.663/0.300, 0.362 (177/712 secs)
    Epoch 50: cost=0.842, accuracy=0.668, 0.669/0.300, 0.402 (177/889 secs)
    Epoch 60: cost=0.832, accuracy=0.671, 0.673/0.600, 0.440 (176/1065 secs)
    Epoch 70: cost=0.822, accuracy=0.676, 0.677/0.400, 0.466 (175/1240 secs)
    Epoch 80: cost=0.816, accuracy=0.679, 0.678/0.500, 0.473 (174/1414 secs)
    Epoch 90: cost=0.812, accuracy=0.681, 0.680/0.400, 0.501 (174/1588 secs)
    Epoch 100: cost=0.809, accuracy=0.682, 0.682/0.500, 0.532 (172/1760 secs)
est_val [ 0.  1.  1.  0.  1.  3.  1.  1.  1.  1.]
ans_val [ 1.  1.  1.  0.  6.  2.  1.  0.  1.  2.]
max_acc 0.5


In [75]:
sm2.dump()

INFO:tensorflow:Restoring parameters from params/sudoku-cnn-2.ckpt
est_val [ 7.  0.  1.  1.  1.  1.  1.  0.  0.  1.  0.  1.  0.  0.  1.  1.  1.  1.
  2.  1.  4.  1.  0.  0.  1.  1.  0.  1.  1.  1.  0.  1.  1.  1.  7.  1.
  1.  7.  7.  1.  4.  1.  0.  3.  0.  1.  1.  4.  0.  1.  1.  1.  1.  0.
  7.  1.  1.  4.  3.  4.  1.  3.  7.  1.  1.  1.  0.  1.  1.  0.  1.  4.
  1.  0.  1.  1.  1.  1.  1.  1.  4.  0.  1.  3.  1.  0.  1.  3.  7.  4.
  4.  0.  1.  0.  0.  0.  1.  1.  1.  4.]
ans_val [ 8.  0.  0.  1.  1.  1.  1.  1.  2.  1.  1.  0.  1.  1.  0.  1.  7.  1.
  0.  1.  4.  0.  1.  1.  2.  1.  0.  2.  1.  1.  1.  7.  1.  1.  6.  2.
  1.  8.  8.  1.  5.  1.  2.  3.  6.  0.  1.  4.  1.  2.  3.  2.  2.  0.
  7.  1.  3.  3.  4.  4.  1.  3.  7.  1.  1.  0.  1.  3.  0.  1.  1.  5.
  2.  1.  1.  0.  4.  0.  0.  2.  5.  0.  1.  2.  1.  1.  0.  3.  6.  4.
  5.  0.  1.  1.  0.  2.  1.  1.  2.  4.]
max_acc 0.44
loss 0.806386
done


In [45]:
#print(sm1)
sm2.test()
#m2.test()

INFO:tensorflow:Restoring parameters from params/sudoku-cnn-2.ckpt
Model sudoku-cnn-2 test report: accuracy = 0.441, (0 secs)



In [46]:
def sudoku_demonstrate(self, x, y, estimate, answer, probs):
    unknown_cnt = 0
    known_mismatch_cnt = 0
    
    for n in range(81):
        if x[n] != 0:
            est = np.argmax(probs[n])
            if est != x[n]: known_mismatch_cnt += 1
            probs[n,:] = 0
        else: unknown_cnt += 1
            
    print('unknown_cnt', unknown_cnt)
    print('known_mismatch_cnt', known_mismatch_cnt)
    idx = np.argmax(probs)
    print("shape(probs)", np.shape(probs))
    print("idx(argmax)", idx)
    """
    pos = int(idx / 10)
    val = idx % 10
    ans = np.argmax(y[pos*10:(pos+1)*10])
    #print(idx, pos, val)
    print(x[pos], val, ans, probs[pos][val])
    print(probs[pos])
    """
    
SudokuDataset.demonstrate = sudoku_demonstrate

In [49]:
sm2.demonstrate(1)

INFO:tensorflow:Restoring parameters from params/sudoku-cnn-2.ckpt
Model sudoku-cnn-2 Demonstration
unknown_cnt 27
known_mismatch_cnt 45
shape(probs) (81, 9)
idx(argmax) 378

