Import and setup some auxiliary functions

In [1]:
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
import numpy as np
import timeit
from collections import OrderedDict
from pprint import pformat
import tensorflow as tf


def compute_score(acc, min_thres, max_thres):
    if acc <= min_thres:
        base_score = 0.0
    elif acc >= max_thres:
        base_score = 100.0
    else:
        base_score = float(acc - min_thres) / (max_thres - min_thres) \
                     * 100
    return base_score


def run(algorithm, dataset_name, x_train, y_train, x_valid, y_valid, x_test, y_test):
    start = timeit.default_timer()
    np.random.seed(0)
    predicted_y_test = algorithm(dataset_name, x_train, y_train, x_valid, y_valid, x_test)
    np.random.seed()
    stop = timeit.default_timer()
    run_time = stop - start

    y_test = y_test.flatten()
    predicted_y_test = np.asarray(predicted_y_test).flatten()

    correct_predict = (y_test == predicted_y_test).astype(np.int32).sum()
    incorrect_predict = len(y_test) - correct_predict
    accuracy = float(correct_predict) / len(y_test)

    return (correct_predict, accuracy, run_time)

Convenient class for iterating through train set randomly. Onehot convert to convert form for numpy array.

In [2]:
class DatasetIterator:
    def __init__(self, x, y, batch_size):
        assert len(x) == len(y)
        self.x = x
        self.y = y
        self.b_sz = batch_size
        self.b_pt = 0
        self.d_sz = len(x)
        self.idx = None
        self.randomize()

    def randomize(self):
        self.idx = np.random.permutation(self.d_sz)
        self.b_pt = 0

    def next_batch(self):
        start = self.b_pt
        end = self.b_pt + self.b_sz
        idx = self.idx[start:end]
        x = self.x[idx]
        y = self.y[idx]

        self.b_pt += self.b_sz
        if self.b_pt >= self.d_sz:
            self.randomize()

        return x, y


def one_hot(a, num_classes):
    return np.squeeze(np.eye(num_classes)[a.reshape(-1)])

TODO: Implement Logistic Regression here

In [3]:
def logistic_regression(dataset_name, x_train, y_train, x_valid, y_valid, x_test):
    if dataset_name == "MNIST":
        pass
    elif dataset_name == "CIFAR10":
        pass
    # This is just a random function that return random label
    # TODO: implement logistic regression hyper-parameter tuning here
    return np.random.randint(max(y_train) + 1, size=len(x_test))

Main loop. Run time and total score will be shown below.

In [4]:
def run_on_dataset(dataset_name):
    if dataset_name == "MNIST":
        min_thres = 0.82
        max_thres = 0.92
        mnist = read_data_sets('data', one_hot=False)
        x_train, y_train = (mnist.train._images, mnist.train._labels)
        x_test, y_test = (mnist.test._images, mnist.test.labels)
    elif dataset_name == "CIFAR10":
        min_thres = 0.28
        max_thres = 0.38
        cifar10 = tf.keras.datasets.cifar10
        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    x_valid, y_valid = x_train[-10000:], y_train[-10000:]
    x_train, y_train = x_train[:-10000], y_train[:-10000]

    correct_predict, accuracy, run_time = run(logistic_regression, dataset_name,
                                              x_train, y_train, x_valid, y_valid, x_test, y_test)
    score = compute_score(accuracy, min_thres, max_thres)
    result = OrderedDict(correct_predict=correct_predict,
                         accuracy=accuracy, score=score,
                         run_time=run_time)
    return result, score


def main():
    result_all = OrderedDict()
    score_weights = [0.5, 0.5]
    scores = []
    for dataset_name in ["MNIST", "CIFAR10"]:
        result_all[dataset_name], this_score = run_on_dataset(dataset_name)
        scores.append(this_score)
    total_score = [score * weight for score, weight in zip(scores, score_weights)]
    total_score = np.asarray(total_score).sum().item()
    result_all['total_score'] = total_score
    with open('result.txt', 'w') as f:
        f.writelines(pformat(result_all, indent=4))
    print("\nResult:\n", pformat(result_all, indent=4))


main()

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting data/train-labels-idx1-ubyte.gz
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.

Result:
 OrderedDict([   (   'MNIST',
                    OrderedDict([   ('correct_predict', 1009),
                                    ('accuracy', 0.1009),
                                    ('score', 0.0),
                                    ('run_time', 0.004746139980852604)])),
                (   'CIFAR10',
                    OrderedDict([   ('correct_predict', 1026),
     