In [None]:
#!/usr/bin/env python
# coding: utf-8
from __future__ import division, print_function
import sys
sys.path.append('..')

import theano
import theano.tensor as T
import lasagne

import time

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

from logs import log_fname, new_logger

from sklearn.metrics import confusion_matrix

In [None]:
%matplotlib inline

# Load datasets

Here the datasets are loaded/built.

## Circles

In [None]:
from datasets.toys import load_circles
n_samples = 80  # Number of sample per class
n_classes = 10
batchsize = 80
_data_name = 'Circles'
source_data = load_circles(n_samples=n_samples, n_classes=n_classes, batchsize=batchsize)

## Clouds

In [None]:
from datasets.toys import load_clouds
n_samples = 30  # Number of sample per class
n_classes = 3
batchsize = 80
_data_name = 'Clouds'
source_data = load_clouds(n_samples=n_samples, n_classes=n_classes, batchsize=batchsize)

## Moons

In [None]:
from datasets.toys import load_moons
n_samples = 800
batchsize = 80
_data_name = 'Moons'
source_data = load_moons(n_samples=n_samples, batchsize=batchsize)

## MNIST

In [None]:
from datasets.mnist import load_mnist
batchsize = 500
_data_name = 'MNIST'
source_data = load_mnist(batchsize=batchsize)

# Transform datasets

Here the transformed datasets are built.

## Data rotated

In [None]:
from datasets.utils import make_domain_dataset, make_corrector_dataset
from datasets.transform import rotate_dataset

data_name = _data_name+'_Rotated'
angle = 80

target_data = rotate_dataset(source_data)
domain_data = make_domain_dataset([source_data, target_data])
corrector_data = make_corrector_dataset(source_data, target_data)

### Data . Random Matrix

In [None]:
from datasets.utils import make_domain_dataset, make_corrector_dataset
from datasets.transform import random_mat_dataset

data_name = _data_name+'_RMat'

target_data = random_mat_dataset(source_data, normalize=False)
domain_data = make_domain_dataset([source_data, target_data])
corrector_data = make_corrector_dataset(source_data, target_data)

### Data . Diag Dominant matrix

In [None]:
from datasets.utils import make_domain_dataset, make_corrector_dataset
from datasets.transform import diag_dataset

data_name = _data_name+'_Diag'

target_data = diag_dataset(source_data, normalize=True)
domain_data = make_domain_dataset([source_data, target_data])
corrector_data = make_corrector_dataset(source_data, target_data)

### Data Mirror

In [None]:
from datasets.utils import make_domain_dataset, make_corrector_dataset
from datasets.transform import mirror_dataset

data_name = _data_name+'_Mirror'

target_data = mirror_dataset(source_data)
domain_data = make_domain_dataset([source_data, target_data])
corrector_data = make_corrector_dataset(source_data, target_data)

## Epoch Preprocessing

The preprocessing function that will run at the begining of each epoch

# Build the Neural Network

^^[Back to the Loading of datasets](#Load-datasets)


In [None]:
from nn.rgl import ReverseGradientLayer
from nn.block import adversarial
from nn.compilers import crossentropy_sgd_mom, squared_error_sgd_mom
from nn.training import Trainner, training


## Network building
Start with the variables

## Parameters

In [None]:
hp_lambda = 0.

label_rate = 1
label_mom = 0.9

domain_rate = 1
domain_mom = 0.9

# Get a logger
logger = new_logger()


In [None]:
# Prepare Theano variables for inputs and targets
if data_name.startswith('MNIST'):
    input_var = T.tensor3('inputs')
    src_var = T.tensor3('src')
    target_var = T.tensor3('targets')
    shape = (batchsize, 28, 28)
elif data_name.startswith('Moon') or data_name.startswith('Clouds') or data_name.startswith('Circles'):
    input_var = T.matrix('inputs')
    src_var = T.matrix('src')
    target_var = T.matrix('targets')
    shape = (batchsize, 2)

# Logs
logger.info('Building the input and output variables for |{}|'.format(data_name))
logger.info('Input data expected shape : {}'.format(shape))


## Architecture

^^[Back to the Loading of datasets](#Load-datasets)

^[Back to the init of the NN](#Build-the-Neural-Network)

In [None]:
# Build the layers
input_layer = lasagne.layers.InputLayer(shape=shape, input_var=input_var)
src_layer = lasagne.layers.InputLayer(shape=shape, input_var=src_var)
# feature = lasagne.layers.DenseLayer(
#                 input_layer,
#                 num_units=np.prod(shape[1:]),
#                 nonlinearity=lasagne.nonlinearities.tanh,
#                 # W=lasagne.init.Uniform(range=0.01, std=None, mean=0.0),
#                 )
feature = lasagne.layers.DenseLayer(
                input_layer,
                num_units=np.prod(shape[1:]),  # Should have same number as the input dimension
                nonlinearity=None,
                # W=lasagne.init.Uniform(range=0.01, std=None, mean=0.0),
                )
reshaper = lasagne.layers.ReshapeLayer(feature, (-1,) + shape[1:])
output_layer = reshaper

# Logs
logger.info('Building the neural network architecture for |{}|'.format(data_name))
logger.info('Input data expected shape : {}'.format(shape))


## Compiling 

^^[Back to the Loading of datasets](#Load-datasets)

^[Back to the init of the NN](#Build-the-Neural-Network)

In [None]:
# Logs
logger.info('Compiling the neural network for |{}|'.format(data_name))
logger.info('Input data expected shape : {}'.format(shape))

# Compilation
corrector_trainner = Trainner(squared_error_sgd_mom(output_layer, lr=label_rate, mom=0, target_var=target_var), 
                             'corrector',)

if hp_lambda != 0.0:
    print('hp_lambda != 0 : Compliling the adversarial part of the networks')
    domain_trainner = Trainner(adversarial([src_layer, output_layer], hp_lambda=hp_lambda,
                                          lr=domain_rate, mom=domain_mom),
                               'domain')


## Add preprocessing (for alignment)

^^[Back to the Loading of datasets](#Load-datasets)

^[Back to the init of the NN](#Build-the-Neural-Network)

In [None]:
from align_learn.preprocess import classwise_shuffle, exhaustive_clostest, cluster_preprocess, build_clusters

# Choose preprocessing :
#corrector_trainner.preprocess = classwise_shuffle
#corrector_trainner.preprocess = exhaustive_clostest
corrector_trainner.preprocess = cluster_preprocess

model_name = ''
if corrector_trainner.preprocess is classwise_shuffle:
    model_name = 'Classwise_Corrector'
    corrector_data['labels'] = source_data['y_train']
elif corrector_trainner.preprocess is exhaustive_clostest:
    model_name = 'K-closest_Corrector'
    corrector_data['labels'] = source_data['y_train']
elif corrector_trainner.preprocess is cluster_preprocess:
    model_name = 'Cluster_Corrector'
    n_clusters = 6
    corrector_data['k'] = -1
    y = source_data['y_train']
    classes = np.unique(y)

    # Build the clusters for target data
    centers_array, clusters_label, centers_labels = build_clusters(corrector_data['X_train'],
                                                                   y, n_clusters=n_clusters)
    corrector_data['X_train_centers'] = centers_array
    corrector_data['X_train_clusters'] = clusters_label
    corrector_data['centers_labels'] = centers_labels
    
    # Build the clusters for source data
    centers_array, clusters_label, centers_labels = build_clusters(corrector_data['y_train'],
                                                                   y, n_clusters=n_clusters)
    corrector_data['y_train_centers'] = centers_array
    corrector_data['y_train_clusters'] = clusters_label

else:
    model_name = 'Pairwise_Corrector'


# Train the neural network

^^[Back to the Loading of datasets](#Load-datasets)

^[Back to the init of the NN](#Build-the-Neural-Network)

Reset the counter and the stats

In [None]:
logger.warn('Reset the epoch counter and saved statistics')
epoch_counter = 0
final_stats = {}


## Training loop 

In [None]:
def do_n_epoch(n_epoch):
    global epoch_counter, logger, final_stats
    global corrector_data, domain_data, corrector_trainner, domain_trainner
    epoch_counter += n_epoch
    logger.info('Trainning the neural network for {} additional epochs ({} total)'.format(n_epoch, epoch_counter))
    if hp_lambda != 0.0:
        stats = training([corrector_trainner, domain_trainner], [corrector_data, domain_data],
                         num_epochs=n_epoch, logger=logger)
    else:
        stats = training([corrector_trainner,], [corrector_data,],
                     num_epochs=n_epoch, logger=logger)

    final_stats = {k: (final_stats[k]+v if k in final_stats else v) for k, v in stats.items()}

# Plot results

^^[Back to the Loading of datasets](#Load-datasets)

^[Back to the init of the NN](#Build-the-Neural-Network)

## Learning curve


## 2D Data plot

In [None]:
import visual

def my_2D_plot(source_data, target_data, corrector_data, trainer):
    """
    Plot things
    """
    # Compute the correction on test data
    corrected_data = {
        'X_test': np.array(corrector_trainner.output(corrector_data['X_test'])).reshape((-1, 2)),
        'X_train_centers': np.array(
            corrector_trainner.output(corrector_data['X_train_centers'])).reshape((-1, 2))
    }
    # Init figure and axes
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 6))
    
    # Plot data test points (source + corrected) on left fig
    visual.source_2D(source_data['X_test'], source_data['y_test'], ax=ax1);
    visual.corrected_2D(corrected_data['X_test'], source_data['y_test'], ax=ax1);

    # Plot data test points (source + target) on right fig
    visual.source_2D(source_data['X_test'], source_data['y_test'], ax=ax2);
    visual.target_2D(target_data['X_test'], target_data['y_test'], ax=ax2);
    
    # Plot cluster centers and cluster mapping 
    if 'preprocess' in corrector_data and 'X_train_centers' in corrector_data:
        idx = corrector_data['preprocess']
        X = np.array(corrector_trainner.output(corrector_data['X_train_centers'])).reshape((-1, 2))
        Y = corrector_data['y_train_centers']
        Y = Y[idx]
        visual.centers_source(Y, ax=ax1)
        visual.centers_corrected(X, ax=ax1)
        visual.mapping(X, Y, ax=ax1)
        visual.centers_source(Y, ax=ax2)
        visual.centers_target(corrector_data['X_train_centers'], ax=ax2)

    # Legends
    ax1.set_title('Source data vs Corrected data')
    handles, labels = ax1.get_legend_handles_labels()
    ax1.legend(handles, labels, bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.)

    ax2.set_title('Source data vs Target data')
    handles, labels = ax2.get_legend_handles_labels()
    ax2.legend(handles, labels, bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.)

    fig.show()


In [None]:
print(model_name, data_name)

# Play !

^^[Back to the Loading of datasets](#Load-datasets)

^[Back to the init of the NN](#Build-the-Neural-Network)

In [None]:
do_n_epoch(1)
my_2D_plot(source_data, target_data, corrector_data, corrector_trainner)
visual.learning_curve(final_stats, regex='loss');
fig, ax = visual.mat(feature.W.get_value())
_ = fig.suptitle('Weights');


In [None]:
print(np.unique(corrector_data['X_train_clusters']))