In [None]:
from __future__ import division, print_function
import sys
if '..' not in sys.path:
    sys.path.insert(0, '..')

import theano
import theano.tensor as T
import lasagne

import time

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

from sklearn.metrics import confusion_matrix

In [None]:
%matplotlib inline

# Introduction

The objective here is to do some unit testing on every function and blocks of the EMANN method.


# Load datasets

- the datasets are loaded/built.
- The batchsize is defined
- half of the data name (the source part) is defined

- [1. Loading of datasets](#Load-datasets)
- [2. Transformation of datasets](#Transform-datasets)
- [3. Clusters init](#Clusters-Init)
- [4. Optimal transport init](#Optimal-Transport-Init)
- [5. Align](#Align)
- [6. Neural Network](#Neural-Network)
- [7. Compiler](#Compiler)
- [8. NN class](#Neural-Network-class)

## Datasets Imports 

In [None]:
from datasets.toys import make_clouds, make_circles, make_X, make_moons
from datasets.utils import make_dataset


# Transform datasets

- the transformed datasets are built.
- last part of the data name (the target part) is defined


- [1. Loading of datasets](#Load-datasets)
- [2. Transformation of datasets](#Transform-datasets)
- [3. Clusters init](#Clusters-Init)
- [4. Optimal transport init](#Optimal-Transport-Init)
- [5. Align](#Align)
- [6. Neural Network](#Neural-Network)
- [7. Compiler](#Compiler)
- [8. NN class](#Neural-Network-class)

## Transformation Imports

In [None]:
from datasets.utils import make_domain_dataset, make_corrector_dataset
import datasets.transform as transform

# Clusters Init
Here we initialize the clusters on the *source* and on the *target* domains.

- [1. Loading of datasets](#Load-datasets)
- [2. Transformation of datasets](#Transform-datasets)
- [3. Clusters init](#Clusters-Init)
- [4. Optimal transport init](#Optimal-Transport-Init)
- [5. Align](#Align)
- [6. Neural Network](#Neural-Network)
- [7. Compiler](#Compiler)
- [8. NN class](#Neural-Network-class)

In [None]:
X_src, y_src = make_clouds(n_samples=50, n_classes=6)
X_tgt, y_tgt = make_moons(n_samples=500)
data_name = "Clouds-to-moons"

In [None]:
from sklearn.cluster import KMeans

k_src = 10
k_tgt = 11
k_means_src = KMeans(n_clusters=k_src).fit(X_src)
k_means_tgt = KMeans(n_clusters=k_tgt).fit(X_tgt)


In [None]:
def mass(k_means):
    """
    Params
    ------
        k_means: (sklearn.cluster.KMeans instance)
    Return
    ------
        w: (numpy.array [n_clusters]) the mass of each clusters 
    """
    w = np.unique(k_means.labels_, return_counts=True)[1]
    w = w/np.sum(w)
    return w
    

In [None]:
w_src = mass(k_means_src)
# w_src = np.abs(np.sin(-w_src-np.arange(w_src.shape[0])))
# w_src /= np.sum(w_src)

w_tgt = mass(k_means_tgt)
w_tgt = np.abs(np.sin(-w_tgt-0.6*np.arange(w_tgt.shape[0])))
# w_tgt = np.exp(-w_tgt-np.arange(w_tgt.shape[0]))
w_tgt /= np.sum(w_tgt)
cost_mat = np.random.uniform(0,1, size=(w_src.shape[0], w_tgt.shape[0]))


In [None]:
import visual
visual.mat(cost_mat)


# Optimal Transport Init

- [1. Loading of datasets](#Load-datasets)
- [2. Transformation of datasets](#Transform-datasets)
- [3. Clusters init](#Clusters-Init)
- [4. Optimal transport init](#Optimal-Transport-Init)
- [5. Align](#Align)
- [6. Neural Network](#Neural-Network)
- [7. Compiler](#Compiler)
- [8. NN class](#Neural-Network-class)

In [None]:
from opt_transport import opt_transp_sup, computeTransportSinkhorn, computeTransportSinkhornLabelsLpL1

In [None]:
# transp = opt_transp_sup(k_means_src.cluster_centers_, k_means_tgt.cluster_centers_)
transp = computeTransportSinkhorn(w_src, w_tgt, cost_mat, reg=10)

In [None]:
visual.mat(transp)

# Align

- [1. Loading of datasets](#Load-datasets)
- [2. Transformation of datasets](#Transform-datasets)
- [3. Clusters init](#Clusters-Init)
- [4. Optimal transport init](#Optimal-Transport-Init)
- [5. Align](#Align)
- [6. Neural Network](#Neural-Network)
- [7. Compiler](#Compiler)
- [8. NN class](#Neural-Network-class)

In [None]:
transp.shape, X_src.shape, k_means_src.labels_.shape

In [None]:
from align_learn.preprocess import align

Sanity check. The cluster distribution of the aligned data should be the same as the target distrib

In [None]:
align_idx, cluster_T = align(transp, k_means_src.labels_, k_means_tgt.labels_)
# print(np.unique(res).shape, X_src.shape, X_tgt.shape)
uniq, count = np.unique(cluster_T, return_counts=True)
# plt.plot(np.sum(transp,0), label='transp.sum(0)')
plt.plot(w_tgt, label='w_tgt')
# plt.plot(np.sum(transp,1), label='transp.sum(1)')
plt.plot(w_src, label='w_src')
plt.plot(count/cluster_T.shape[0], label='mapping')
plt.legend()
plt.show()

In [None]:
X_S, y_S = X_src, y_src
X_T, y_T = X_tgt[align_idx], y_tgt[align_idx]

In [None]:
visual.target_2D(X_T, y_T)
visual.target_2D(X_tgt, y_tgt)
plt.show()

In [None]:
X_T.shape, X_S.shape

Build the probabilities to be predict

In [None]:
def train_dataset(X_S, X_T, k_means_src, k_means_tgt, transp, align_idx, cluster_T):
    """
    """
    # Build the probabilities to be predict
    # For the source data
    proba_src = np.zeros((X_S.shape[0], k_means_src.n_clusters))
    proba_src[np.arange(X_S.shape[0]), k_means_src.labels_] = 1.
    proba_tgt = transp[k_means_src.labels_]
    Y_S = np.hstack([proba_src, proba_tgt])
    
    # Build the probabilities to be predict
    # For the aligned target data
    proba_tgt = np.zeros((X_T.shape[0], k_means_tgt.n_clusters))
    proba_tgt[np.arange(X_T.shape[0]), cluster_T] = 1.
    proba_src = transp[:, cluster_T].T
    Y_T = np.hstack([proba_src, proba_tgt])
    
    Y = np.vstack([Y_S, Y_T])
    X = np.vstack([X_S, X_T])
    return X, Y

In [None]:
X, Y = train_dataset(X_S, X_T, k_means_src, k_means_tgt, transp, align_idx, cluster_T)

In [None]:
print(Y[0])
print(Y[350])

In [None]:
X.shape, Y.shape

# Neural Network


- [1. Loading of datasets](#Load-datasets)
- [2. Transformation of datasets](#Transform-datasets)
- [3. Clusters init](#Clusters-Init)
- [4. Optimal transport init](#Optimal-Transport-Init)
- [5. Align](#Align)
- [6. Neural Network](#Neural-Network)
- [7. Compiler](#Compiler)
- [8. NN class](#Neural-Network-class)

In [None]:
from nn.rgl import ReverseGradientLayer
from nn.compilers import crossentropy_sgd_mom, squared_error_sgd_mom, adversarial
from nn.training import Trainner, training


In [None]:
from logs import log_fname, new_logger, empty_logger

hp_lambda = 0.
batchsize = 20

# Learning rates and momentums
label_rate = 0.1
label_mom = 0.9

domain_rate = 0.1
domain_mom = 0.9

# Get a logger
logger = new_logger()


In [None]:
_shape = np.shape(X)
n_dim = len(_shape)
n_features = np.prod(_shape[1:])

shape = (batchsize,) + _shape[1:]
target_var = T.matrix('targets')

# Logs
logger.info('Building the input and output variables for : {}'.format(data_name))
logger.info('Input data expected shape : {}'.format(shape))


In [None]:
# Build the layers
input_layer = lasagne.layers.InputLayer(shape=shape)

dense_1 = lasagne.layers.DenseLayer(
                input_layer,
                num_units=25,
                nonlinearity=lasagne.nonlinearities.rectify,
                # W=lasagne.init.Uniform(range=0.01, std=None, mean=0.0),
                )


In [None]:
def proba_chain(last_layer, k_src, k_tgt):
    """
    """
    proba_src = lasagne.layers.DenseLayer(
                last_layer,
                num_units=k_src,
                nonlinearity=lasagne.nonlinearities.softmax,
                # W=lasagne.init.Uniform(range=0.01, std=None, mean=0.0),
                )
    proba_tgt = lasagne.layers.DenseLayer(
                last_layer,
                num_units=k_tgt,
                nonlinearity=lasagne.nonlinearities.softmax,
                # W=lasagne.init.Uniform(range=0.01, std=None, mean=0.0),
                )
    concat_layer = lasagne.layers.ConcatLayer([proba_src, proba_tgt], axis=1)
    return concat_layer


# Compiler
Append the last part and compile

- [1. Loading of datasets](#Load-datasets)
- [2. Transformation of datasets](#Transform-datasets)
- [3. Clusters init](#Clusters-Init)
- [4. Optimal transport init](#Optimal-Transport-Init)
- [5. Align](#Align)
- [6. Neural Network](#Neural-Network)
- [7. Compiler](#Compiler)
- [8. NN class](#Neural-Network-class)

In [None]:
def compiler(output_layer, lr=1, mom=.9, target_var=T.ivector('target'),
                        regularization=None, reg_param=0.1): 
    """
    Stochastic Gradient Descent compiler with optionnal momentum.

    info: it uses the categorical_crossentropy. Should be given to a softmax layer.
    
    Params
    ------
        output_layer: the output layer from which the loss and updtaes will be computed
        lr: (default=1) learning rate.
        mom: (default=0.9) momentum.
        regularisation: (default=None) the regularization, can be 'l1' or 'l2' or None.
        reg_param: (default=0.1) the regularization hyper parameter: 
                        loss = loss + reg_param * regularization

    Return
    ------
        A dictionnary with :
            -train : function used to train the neural network
            'train_desription': ('loss',),
            -predict : function used to predict the label
            'predict_desription': ('prediction',),
            -valid : function used to get the accuracy and loss 
            'valid_desription': ('loss',),
            -output : function used to get the output (exm: predict the label probabilities)
            'output_desription': ('prediction',)
    
    Example:
    --------
    >>> funs = compiler_sgd_mom(output_layer, lr=0.01, mom=0.1)
    >>> loss, acc = funs.train(X, y)
    
    """    

    input_var = lasagne.layers.get_all_layers(output_layer)[0].input_var
    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    pred = lasagne.layers.get_output(output_layer)
    loss = T.mean(lasagne.objectives.categorical_crossentropy(pred, target_var))
    # Add a regularization term to the loss if needed
    if regularization == 'l1':
        reg = lasagne.regularization.regularize_network_params(output_layer, lasagne.regularization.l1)
        loss += reg_param*reg
    elif regularization == 'l2':
        reg = lasagne.regularization.regularize_network_params(output_layer, lasagne.regularization.l2)
        loss += reg_param*reg
    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent and add a momentum to it.
    params = lasagne.layers.get_all_params(output_layer, trainable=True)
    updates = lasagne.updates.sgd(loss, params, learning_rate=lr)
    updates = lasagne.updates.apply_momentum(updates, params, momentum=mom)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_function = theano.function([input_var, target_var], [loss,], 
        updates=updates, allow_input_downcast=True)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout and noise layers.
    pred = lasagne.layers.get_output(output_layer, deterministic=True)
    loss = T.mean(lasagne.objectives.categorical_crossentropy(pred, target_var))
    # Compile a second function computing the validation loss and accuracy:
    valid_function = theano.function([input_var, target_var], [loss,], allow_input_downcast=True)
    # Compile a function computing the predicted labels:
    predict_function = theano.function([input_var], [pred], allow_input_downcast=True)
    # Compile an output function
    output_function = theano.function([input_var], [pred], allow_input_downcast=True)

    return {
            'train': train_function,
            'train_description': ('loss',),
            'predict': predict_function,
            'predict_description': ('prediction',),
            'valid': valid_function,
            'valid_description': ('loss',),
            'output': output_function,
            'output_description': ('prediction',),
           }


In [None]:
predict_layer = proba_chain(dense_1, k_src, k_tgt)
funs = compiler(predict_layer, lr=label_rate, mom=label_mom, target_var=T.matrix('targets'))
proba_nn = Trainner(funs)

# Training

In [None]:
def do_n_epoch(trainers, datas, n_epoch=1, epoch_counter=0, final_stats={}):
    epoch_counter += n_epoch
    # Now do the trainning part !
    logger.info('Trainning the neural network for {} additional epochs ({} total)'.format(n_epoch, epoch_counter))
    stats = training(trainers, datas, num_epochs=n_epoch, logger=None)
    final_stats = {k: (final_stats[k]+v if k in final_stats else v) for k, v in stats.items()}
    return final_stats

In [None]:
trainers = [proba_nn,]
datas = [make_dataset(X, Y, batchsize=batchsize),]

In [None]:
epoch_counter = 0
final_stats = {}

In [None]:
final_stats = do_n_epoch(trainers, datas, n_epoch=10, epoch_counter=epoch_counter, final_stats=final_stats)

# Visualization of the learning procedure

In [None]:
# ================
# Learning curve
# ================
fig, ax = visual.learning_curve(final_stats, regex='loss')
#     SAVE
# fig.tight_layout()
# fig.savefig(fig_title+'-Learning_curve.png',bbox_inches='tight')
fig.show()
# visual.learning_curve(final_stats, regex='domain.* acc');


# Neural Network class

- [1. Loading of datasets](#Load-datasets)
- [2. Transformation of datasets](#Transform-datasets)
- [3. Clusters init](#Clusters-Init)
- [4. Optimal transport init](#Optimal-Transport-Init)
- [5. Align](#Align)
- [6. Neural Network](#Neural-Network)
- [7. Compiler](#Compiler)
- [8. NN class](#Neural-Network-class)

In [None]:
from nn.helper import CNN

In [None]:
nn = CNN()
nn['out'] = 'layer'
def foo(string, **kwargs):
    def bar():
        print('compile('+', '.join([string]+kwargs.keys())+')')
    return {'bar': bar}
nn.compile('out', foo, kwargs1=8, kwargs2='bla')
nn['out'].bar()
print('OK')
nn['out']['bar']()
print('OK')
nn.parts.out.bar()
print('OK')

## Training session

In [None]:
n_classes = 3
n_samples = 1000
test_dataset = make_dataset(*make_clouds(n_samples=n_samples, n_classes=n_classes), batchsize=60)
# test_dataset = make_dataset(*make_moons(n_samples=n_samples), batchsize=60)
test_dataset.keys()

In [None]:
# Get general information :
# =========================
X = test_dataset.X_train
_shape = np.shape(X)
n_dim = len(_shape)
n_features = np.prod(_shape[1:])

shape = (batchsize,) + _shape[1:]
target_var = T.ivector('targets')

# Logs
logger.info('Building the input and output variables for : {}'.format(data_name))
logger.info('Input data expected shape : {}'.format(shape))

# Build the layers :
# ==================
# Build the layers
input_layer = lasagne.layers.InputLayer(shape=shape)

dense_1 = lasagne.layers.DenseLayer(
                input_layer,
                num_units=30,
                nonlinearity=lasagne.nonlinearities.tanh,
                # W=lasagne.init.Uniform(range=0.01, std=None, mean=0.0),
                )
softmax_layer = lasagne.layers.DenseLayer(
                dense_1,
                num_units=n_classes,
                nonlinearity=lasagne.nonlinearities.softmax,
                # W=lasagne.init.Uniform(range=0.01, std=None, mean=0.0),
                )

# Instanciate the NN :
# ====================

nn = CNN(name='Moons test')
nn.add_output('main', softmax_layer)

# Compile :
# =========
nn.compile('main', compiler)




In [None]:
# Train the nn :
# ==============
nn.train([test_dataset, ], ['main', ]);


In [None]:
# ================
# Learning curve
# ================
fig, ax = visual.learning_curve(nn.global_stats, regex='loss')
#     SAVE
# fig.tight_layout()
# fig.savefig(fig_title+'-Learning_curve.png',bbox_inches='tight')
fig.show()
# visual.learning_curve(final_stats, regex='domain.* acc');


In [None]:
for c in range(len(np.unique(test_dataset.y_test))):
    visual.bound(test_dataset.X_test, test_dataset.y_test, nn['main'].output, class_idx=c);
plt.show()