# This notebook is a library
### It contains the followin functions:
1. **generate_data**: generates data for a learning process by means of simulation.

# Imports

In [1]:
import os
import time
import pickle
import random
import numpy as np
from datetime import datetime

import tensorflow as tf

import keras
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras import backend as K
from keras import Sequential
from keras.layers import Dense, Activation, MaxPooling1D, Dropout, BatchNormalization, Conv1D, Flatten
from keras.optimizers import Adam, SGD
from keras.regularizers import l1, l2, l1_l2
from keras.metrics import mean_squared_error

from sklearn.model_selection import train_test_split


# Generate Data

In [3]:
def generate_data(N, M, nextConfig, sample):
    """
    This function generates data for a learning process by means of simulation.
    
    The data consists of ITEMS. Each ITEM is a pair: histogram and a configuration 
    (for now, the configuration is scalar / a floating point value).
    
    A histogram consists of sample points. 
    Each sample points is drawn from a distribution, with some specific configuration.
    
    Parameter setting for distribution = Configuration, eg.., alpha, gamma, alpha+LOC
    
    Returns:
        - a matrix of histograms and 
          an array of corresponding configs (one config for each histogram row).

    Arguments:
        - N: number of ITEMS to generate (rows)
        - M: number of sample points that make a histogram (columns)
        - nextConfig: A generator that gives a new alpha/gamma/delta, in each call.
            Example: nextConfig()
                lambda = random(1,10)
                return lambda
        - sample(config, size): returns sample data points from a distribution (size = num samples).
            Example sample(alpha, size=256) 
                ys = yulesimon(alpha, size)
                return ys

    """
    
    items_matrix = np.zeros((N, M), dtype=int)
    config_array = np.zeros((N,), dtype=float)
    
    # generate items_matrix:
    #   repeat N times:
    #     draw M sample points from distribution
    for i in range(N):
        
        # start item
        config = nextConfig()

        # sample data points with current config and add to sample_matrix
        items_matrix[i, :] = sample(config, size=M)
        
        # append corresponding config
        config_array[i] = config
    
    # create histograms from rows of items_matrix
    nbins = np.max(items_matrix)

#     min_item = items_matrix.min()
#     max_item = items_matrix.max()
#     nbins = np.arange(min_item, max_item)
#     arange = (min_item, max_item) 
        
    histogram_matrix = np.apply_along_axis(
        lambda a: np.histogram(a, bins=nbins, range=(1,nbins))[0], 1, items_matrix)

    return histogram_matrix, config_array
        

# DNN Model

In [None]:
def create_dnn_model(n_features, 
                 layers, 
                 activation='relu', 
                 init='he_uniform', 
                 batch_normalization=False, 
                 dropout=0, 
                 optimizer='adam', 
                 k_reg=False, 
                 k_reg_lr=0.001, 
                 a_reg=False, 
                 a_reg_lr=0.001):

    model = Sequential()
    
    # ============
    # input-layer
    # ============
    model.add(Dense(units=layers[0]
                      , input_dim=n_features
                      , kernel_initializer=init
                      , kernel_regularizer=l2(k_reg_lr) if k_reg else None
                      , activity_regularizer=l2(a_reg_lr) if a_reg else None
                      , use_bias= (not batch_normalization)
                    ))
    
    
    if batch_normalization:
        model.add(BatchNormalization())
    
    model.add(Activation(activation))

    if dropout > 0:
        model.add(Dropout(dropout))

    # ==============
    # hidden-layers
    # ==============
    for units in layers[1:]:
        model.add(Dense(units=units
                        , kernel_initializer=init
                        , kernel_regularizer=l2(k_reg_lr) if k_reg else None
                        , activity_regularizer=l2(a_reg_lr) if a_reg else None
                        , use_bias= (not batch_normalization)
                        ))

    if batch_normalization:
        model.add(BatchNormalization())

    model.add(Activation(activation))
    
    if dropout > 0:
        model.add(Dropout(dropout))

    # =============
    # output-layer
    # =============
    model.add(Dense(units=1
                    , kernel_initializer=init
                    , kernel_regularizer=l2(k_reg_lr) if k_reg else None
                    , activity_regularizer=l2(a_reg_lr) if a_reg else None
                    , use_bias= (not batch_normalization)
                    ))
    
    if batch_normalization:
        model.add(BatchNormalization())

    model.add(Activation('linear'))

    model.compile(loss='mse', metrics=['mse'], optimizer=optimizer)

    return model

# CNN Model

In [1]:
def create_cnn_model(n_features, filters=32):
    
    model = Sequential()

    model.add(Conv1D(32, 3, activation="relu", input_shape=(n_features, 1)))
    
    model.add(MaxPooling1D(pool_size=2))

    model.add(Flatten())

    model.add(Dense(64, activation="relu"))

    model.add(Dense(1))
    
    model.compile(loss="mse", optimizer="adam")

    return model

# Training

In [None]:
def train_model(model, X_train, y_train, batch_size=32):

    # split train/val
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25)

    # early-stopping
    es_patience = 50
    es = EarlyStopping(monitor='val_loss', 
                        patience=es_patience, 
                        mode='min', 
                        restore_best_weights=True, 
                        verbose=0)
    
    # reduce learning-rate on plateau
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.95, patience=10)
    
    # model checkpoint
    if not os.path.exists('models'):
        os.makedirs('models')
    date_str = datetime.now().strftime('%d-%m-%Y_%H-%M-%S')   
    base_path           = 'models/DNN_{}'.format(date_str)
    model_path          = '{}.h5'.format(base_path)
    history_path        = '{}.history'.format(base_path)
    cp = ModelCheckpoint(filepath=model_path, monitor='val_loss', mode='min', save_best_only=True, verbose=0)

    # fit model
    history = model.fit(X_train, 
                        y_train, 
                        validation_data=(X_val, y_val), 
                        epochs=200, 
                        batch_size=batch_size, 
                        shuffle=False, 
                        callbacks=[es, reduce_lr, cp], 
                        verbose=0)
    
    # save history with model
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)
    
    # load best weights from last checkpoint
    model = keras.models.load_model(model_path)
    return model, history.history

# Learning Curves

In [None]:
def plot_learning_curves(history, train_key='loss', val_key='val_loss'):
    plt.figure(figsize=(2,2))
    plt.plot(history[train_key])
    plt.plot(history[val_key])
    plt.title('learning curves')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc='upper right')
    # plt.show()

# Create and train DNN model

In [None]:
def dnn_trial(X_train, y_train, X_test, y_test):
    dnn_model = create_dnn_model(X_train.shape[1], 
                            layers=[256,256], 
                            activation='relu', 
                            init=keras.initializers.HeUniform(), 
                            batch_normalization=True, 
                            dropout=0.0, 
                            optimizer=Adam(learning_rate=1e-2), 
                            k_reg=True,
                            k_reg_lr=1e-5, 
                            a_reg=True,
                            a_reg_lr=1e-5)

    # train
    dnn_model, history = train_model(dnn_model, X_train, y_train)
    
    # predict
    y_pred = dnn_model.predict(X_test).flatten()

    # mse
    mse = mean_squared_error(y_test, y_pred)

    # sqrt_mse
    sqrt_mse = np.sqrt(mse)

    return dnn_model, history, y_pred, sqrt_mse

# Plot histogram and log scale of a single row

In [None]:
def plot_data(H):
    """
    plot data:
        - histogram
        - log scale
        - cumsum
    """
    # plot random row
    ROW_INDEX_TO_PLOT = random.randint(0, H.shape[0] - 1)

    fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 5))
    fig.suptitle(f'plot for row-index: {ROW_INDEX_TO_PLOT}')

    # plot histogram
    f_x1 = H[ROW_INDEX_TO_PLOT,:]
    x1 = np.array(range(len(f_x1)))
    plt.subplot(1, 3, 1)
    plt.title('Histogram (alpha={:.2f})'.format(y_train[0]))
    plt.xlabel('x1 = sample value')
    plt.ylabel('f(x1) = number of samples')
    _ = plt.plot(x1, f_x1, zorder=2)
    _ = plt.scatter(x1, f_x1, zorder=1, s=2, color="blue")

    # plot log scale
    # (shift H values by one so as not to take log of zero)
    log_H = np.apply_along_axis(lambda a: np.log10(a), 1, H + 1)

    x2 = np.log10(x1+1)
    f_x2 = log_H[ROW_INDEX_TO_PLOT,:]
    
    plt.subplot(1, 3, 2)
    plt.title('Log scale')
    plt.xlabel('x2 = log ( sample value )')
    plt.ylabel('f(x2) = log ( number of samples )')
    _ = plt.plot(x2, f_x2, zorder=2)
    _ = plt.scatter(x2, f_x2, zorder=1, s=2, color="blue")

    # plot cumsum
    x3 = x2
    f_x3 = np.cumsum(f_x2[::-1])[::-1]
    plt.subplot(1, 3, 3)
    plt.title('Cumsum')
    plt.xlabel('x3 = x2 = log ( sample value )')
    plt.ylabel('f(x3) = cumsum ( f_x2 )')
    _ = plt.plot(x3, f_x3, zorder=2)
    _ = plt.scatter(x3, f_x3, zorder=1, s=2, color="blue")