# Project Pipeline
Execute the cells step by step to obtain a prediction and score for your configuration.
In order to make the widgets work you might need to execute 
```
jupyter nbextension enable --py --sys-prefix widgetsnbextension
```
on your system.

## 1. Imports

In [1]:
import numpy as np
import time

from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix
import sklearn.preprocessing as skprep

import data_utils
import widget_ui as ui

## 2. Prepare Data Sets
### 2.1 Load Data
Load training and test data. Furthermore, create a sample set for quickly calculating transformations such as PCA.

In [2]:
all_data = data_utils.load_from('data/all_train.csv', 'data/all_test.csv')
sample_data = data_utils.load_sample_data('data/all_sample.csv')

### 2.2 Subsample Data
Sample the specified rate from the whole data set for test and training individually to reduce the data set size for the following steps.

In [3]:
data = data_utils.DataSet()
ui.subsampling_ui(all_data, data)

## 3. Preprocessing: PCA
Compute PCA on the sample data set.

In [4]:
init_pca = ui.pca_ui(sample_data)

## 4. Configure Classifiers
### 4.1 Multilayer Perceptron

In [5]:
init_mlp = ui.mlp_ui()

### 4.2 Naive Bayes

In [6]:
init_nb = ui.nb_ui()

## 5. Training

In [7]:
window_size = 500
window_reps = 10
init_classifier = None
classifier = None

##########################################

use_sample_data = Checkbox(
    value=False,
    description='Use fast sample data'
)
display(use_sample_data)

pca_checkbox = Checkbox(
    value=False,
    description='Apply PCA'
)
display(pca_checkbox)

def set_window_size(s):
    global window_size
    window_size = s

window_size_slider = IntSlider(
    value=500,
    min=100,
    max=1000,
    step=100,
    description='window size:',
    continuous_update=False,
    layout=Layout(width='80%')
)
i = interact(set_window_size, s=window_size_slider)

def set_window_reps(r):
    global window_reps
    window_reps = r

window_reps_slider = IntSlider(
    value=10,
    min=1,
    max=100,
    step=1,
    description='window reps:',
    continuous_update=False,
    layout=Layout(width='80%')
)
i = interact(set_window_reps, r=window_reps_slider)

def set_classifier(c):
    global init_classifier
    if c == 'Multilayer Perceptron':
        init_classifier = init_mlp
    else:
        init_classifier = init_nb

classifier_rb = RadioButtons(
    options=['Multilayer Perceptron', 'Naive Bayes'],
    description='Classifier:'
)
interact(set_classifier, c=classifier_rb)

def train_on_window(window):
    labels = window[:, 0]
    features = window[:, 1:29]
    
    # Optionally apply PCA
    if pca_checkbox.value:
        features = init_pca().transform(features)
    
    # Train classifier
    classifier.fit(features, labels)

def perform_training(*args):
    # Initialize classifier
    global classifier
    classifier = init_classifier()
    
    print('Training {}'.format(classifier))
    start_time = time.time()
    
    if not use_sample_data.value:
        iterator = train_data.iterrows()
        progress_max = ntrain
    else:
        iterator = sample_data.iterrows()
        progress_max = nsample
    window = np.zeros((0,29))
    
    progress = IntProgress(
        min=0,
        max=progress_max,
        step=1,
        description='Training:',
        bar_style='danger'
    )
    display(progress)
    
    for idx, row in enumerate(iterator):
        window = np.append(window, [row[1]], axis=0)
        if window.shape[0] == window_size:
            train_on_window(window)
            window = np.zeros((0,29))
            progress.value = idx
    if len(window) > 0:
        train_on_window(window)
        progress.value = progress_max

    print('Time taken: {}'.format(time.time() - start_time))

start_training = Button(
    description='Start training',
    button_style='danger'
)
display(start_training)
start_training.on_click(perform_training)

## 6. Prediction

In [8]:
def predict_on_window(window):
    features = window[:, 1:29]
    
    # Optionally apply PCA
    if pca_checkbox.value:
        features = init_pca().transform(features)
    
    # predict
    prediction = classifier.predict(features)
    return prediction

def predict(*args):
    print('Predict with {}'.format(classifier))
    
    if not use_sample_data.value:
        iterator = test_data.iterrows()
        progress_max = ntest
    else:
        iterator = sample_data.iterrows()
        progress_max = nsample
    window = np.zeros((0,29))
    conf_mat = np.zeros((2, 2))
    
    progress = IntProgress(
        min=0,
        max=progress_max,
        step=1,
        description='Predicting:',
        bar_style='info'
    )
    display(progress)
    
    for idx, row in enumerate(iterator):
        window = np.append(window, [row[1]], axis=0)
        if window.shape[0] == window_size:
            prediction = predict_on_window(window)
            conf_mat += confusion_matrix(window[:,0], prediction)
            window = np.zeros((0,29))
            progress.value = idx
    if len(window) > 0:
        prediction = predict_on_window(window)
        conf_mat += confusion_matrix(window[:,0], prediction)
        progress.value = progress_max

    print(conf_mat)

start_prediction = Button(
    description='Start prediction',
    button_style='info'
)
display(start_prediction)
start_prediction.on_click(predict)