In [1]:
import scipy.io
import keras
import numpy as np
import random
import matplotlib.pyplot as plt
from skimage import color, transform, img_as_ubyte

train_mat = scipy.io.loadmat('/Users/yutakobayashi/Desktop/FA2020/NDD/train_32x32.mat')
test_mat = scipy.io.loadmat('/Users/yutakobayashi/Desktop/FA2020/NDD/test_32x32.mat')
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

In [2]:
train_x = train_mat.get('X')
test_x = test_mat.get('X')
data_x_svhn = np.concatenate([train_x, test_x], axis=3)
train_y = train_mat.get('y')
test_y = test_mat.get('y')
data_y_svhn = np.concatenate([train_y, test_y]).reshape(-1)
data_x_svhn = data_x_svhn.transpose((3,0,1,2))

In [3]:
data_x_mnist = np.concatenate([X_train, X_test])
data_x_mnist = data_x_mnist.reshape((data_x_mnist.shape[0], data_x_mnist.shape[1] * data_x_mnist.shape[2]))
data_y_mnist = np.concatenate([y_train, y_test]).reshape(-1)

In [4]:
resized_svhn_x = []
for i in range(10000):
    a = color.rgb2gray(data_x_svhn[i])
    resized_svhn_x.append(img_as_ubyte(transform.resize(a, (28,28))))

In [5]:
x_svhn_processed = np.stack(resized_svhn_x)
#print(x_svhn_processed.shape)
x_svhn_processed = x_svhn_processed.reshape((x_svhn_processed.shape[0], x_svhn_processed.shape[1] * x_svhn_processed.shape[2]))

In [6]:
data_y_svhn = data_y_svhn[:10000]

In [7]:
from proglearn.progressive_learner import ProgressiveLearner
from proglearn.deciders import SimpleArgmaxAverage
from proglearn.transformers import TreeClassificationTransformer, NeuralClassificationTransformer
from proglearn.voters import TreeClassificationVoter, KNNClassificationVoter

from joblib import Parallel, delayed
import time
from itertools import product
import pandas as pd

In [8]:
def cross_val_data(data_x_mnist, data_y_mnist, data_x_svhn, data_y_svhn, total_cls=10):
    x = data_x_mnist.copy()
    y = data_y_mnist.copy()
    x2 = data_x_svhn.copy()
    y2 = data_y_svhn.copy()
    idx = [np.where(y == u)[0] for u in np.unique(y)]
    idx2 = [np.where(y2 == u)[0] for u in np.unique(y2)]

    for i in range(total_cls):
        indx = idx[i]#np.roll(idx[i],(cv-1)*100)
        indx2 = idx2[i]
        random.shuffle(indx)
        random.shuffle(indx2)

        if i==0:
            train_x1 = x[indx[0:500],:]
            train_x2 = x2[indx2[0:500],:]
            train_y1 = y[indx[0:500]]
            train_y2 = y2[indx2[0:500]]

            test_x = x[indx[500:505],:]
            test_y = y[indx[500:505]]
        else:
            train_x1 = np.concatenate((train_x1, x[indx[0:500],:]), axis=0)
            train_x2 = np.concatenate((train_x2, x2[indx2[0:500],:]), axis=0)
            train_y1 = np.concatenate((train_y1, y[indx[0:500]]), axis=0)
            train_y2 = np.concatenate((train_y2, y2[indx2[0:500]]), axis=0)

            test_x = np.concatenate((test_x, x[indx[500:505],:]), axis=0)
            test_y = np.concatenate((test_y, y[indx[500:505]]), axis=0)


    return train_x1, train_y1, train_x2, train_y2, test_x, test_y

In [9]:
def LF_experiment(train_x1, train_y1, train_x2, train_y2, test_x, test_y, ntrees, acorn=None):
  
    default_transformer_class = TreeClassificationTransformer
    default_transformer_kwargs = {"kwargs" : {"max_depth" : 30}}

    default_voter_class = TreeClassificationVoter
    default_voter_kwargs = {}

    default_decider_class = SimpleArgmaxAverage
    progressive_learner = ProgressiveLearner(default_transformer_class = default_transformer_class,
                                         default_transformer_kwargs = default_transformer_kwargs,
                                         default_voter_class = default_voter_class,
                                         default_voter_kwargs = default_voter_kwargs,
                                         default_decider_class = default_decider_class)

    errors = np.zeros(2)
    
    if acorn is not None:
        np.random.seed(acorn)

    progressive_learner.add_task(
        X = train_x1,
        y = train_y1,
        decider_kwargs = {"classes" : np.unique(train_y1)},
        voter_kwargs = {"classes" : np.unique(train_y1)}
    )

    progressive_learner.add_transformer(
        X = train_x2,
        y = train_y2,
        #decider_kwargs = {"classes" : np.unique(train_y2)},
        #voter_kwargs = {"classes" : np.unique(train_y2)},
        backward_task_ids = [0]
    )

    llf_single_task=progressive_learner.predict(test_x, task_id=0, transformer_ids=[0])
    llf_task1=progressive_learner.predict(test_x, task_id=0)
    
    errors[0] = errors[0]+(1 - np.mean(llf_single_task == test_y))
    errors[1] = errors[1]+(1 - np.mean(llf_task1 == test_y))

    print("Errors: {}".format(errors))
    return errors

In [10]:
def run_parallel_exp(data_x, data_y, data_x2, data_y2, n_trees):
    train_x1, train_y1, train_x2, train_y2, test_x, test_y = cross_val_data(data_x, data_y, data_x2, data_y2)
    errors = LF_experiment(train_x1, train_y1, train_x2, train_y2, test_x, test_y, n_trees, acorn=12345)
    
    return errors

In [11]:
reps = range(1)
n_trees=20 # Number of trees in UF

for i in reps:
    errors = run_parallel_exp(x_svhn_processed, data_y_svhn, data_x_mnist, data_y_mnist, n_trees)

(50, 784)
(50, 784)
(50, 784)
Errors: [0.8 0.8]
