# Imports

In [1]:
import os
import time
import pickle
import random
import numpy as np
from datetime import datetime

%matplotlib inline
import matplotlib.pyplot as plt

from IPython.display import Math, Latex

import pandas as pd

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
from tensorflow import keras;
from tensorflow.keras.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Activation, MaxPooling1D, Dropout, BatchNormalization, Conv1D, Flatten
from tensorflow.keras.optimizers import Adam

from sklearn.model_selection import train_test_split

# Fix random_state ?
RANDOM_STATE = None
RS = None

def reset_random_state(random_state=None):
    if None == random_state:
        RANDOM_STATE = None
        RS = None
        return
    
    RANDOM_STATE = random_state
    RS = np.random.RandomState(RANDOM_STATE)
    os.environ['PYTHONHASHSEED']=str(RANDOM_STATE)
    tf.random.set_seed(RANDOM_STATE)
    np.random.seed(RANDOM_STATE)
    random.seed(RANDOM_STATE)

reset_random_state(17)

# Generate Data

In [4]:
def log_scale(H, C=0):
    # log scale H rows, shift H values by (1.0+C) to avoid taking log(0)
    axis = len(H.shape) - 1 # 1 if 2D 0 if 1D
    return np.apply_along_axis(func1d=lambda a: np.log10(a + 1.0 + C), axis=axis, arr=H)

def generate_data(N, M, nextConfig, sample, 
                  density=False, 
                  dense_histogram=True, 
                  apply_log_scale=False):
    """
    This function generates data for a learning process by means of simulation.
    
    The data consists of ITEMS. Each ITEM is a pair: histogram and a configuration 
    (for now, the configuration is scalar / a floating point value).
    
    A histogram consists of sample points. 
    Each sample points is drawn from a distribution, with some specific configuration.
    
    Parameter setting for distribution = Configuration, eg.., alpha, gamma, alpha+LOC
    
    Returns:
        - a matrix of histograms and 
          an array of corresponding configs (one config for each histogram row).

    Arguments:
        - N: number of ITEMS to generate (rows)
        - M: number of sample points that make a histogram (columns)
        - nextConfig: A generator that gives a new alpha/gamma/delta, in each call.
            Example: nextConfig()
                lambda = random(1,10)
                return lambda
        - sample(config, size): returns sample data points from a distribution (size = num samples).
            Example sample(alpha, size=256) 
                ys = yulesimon(alpha, size)
                return ys

    """
    
    # generate samples:
    samples = np.zeros((N, M), dtype=int)
    config_array = np.zeros((N,), dtype=float)
    
    #   repeat N times:
    #     draw M sample points from distribution
    for i in range(N):
        
        # start item
        if callable(nextConfig):
            config = nextConfig()
        else:
            config = nextConfig # in case we pass a numeric config

        # sample data points with current config and add to sample_matrix
        samples[i, :] = sample(config, size=M)
        
        # append corresponding config
        config_array[i] = config
    
    # create a histogram for each row
    if dense_histogram:
        nbins = np.max(samples)
    else:
        nbins = M
    
    histogram = np.apply_along_axis(
       lambda a: np.histogram(a, bins=nbins, range=(0,nbins), density=density)[0], 1, samples)

    if apply_log_scale:
        histogram = log_scale(histogram)
    
    return samples, histogram, config_array
        

## Generate data (poisson)

In [None]:
from scipy import stats
from scipy.stats import poisson

def sample_poisson(mu, size):
    return poisson.rvs(mu=mu, loc=0, size=size, random_state=RANDOM_STATE)


def next_lambda(min_lambda=0.1, max_lambda=10):
    if None != RS:
        return RS.uniform(low=min_lambda, high=max_lambda, size=1)[0]
    return np.random.uniform(low=min_lambda, high=max_lambda, size=1)[0]

def generate_data_poisson(N, M, density=True):

    # generate histogram and corresponding lambda values
    raw, H, lambdas = generate_data(N=N, M=M, nextConfig=next_lambda, sample=sample_poisson, density=density)

    # split train/test
    # (use train_test_split so the shape of the train/test data will be the same)
    H_train, H_test, y_train, y_test = train_test_split(H, lambdas, test_size=0.25, random_state=RANDOM_STATE)
    
    return H_train, y_train, H_test, y_test

# DNN Model

In [None]:
DEFAULT_ACTIVATION = 'relu'

def create_dnn_model(n_features, 
                 layers, 
                 activation=DEFAULT_ACTIVATION, 
                 init='he_uniform', 
                 batch_normalization=False, 
                 dropout=0, 
                 optimizer='adam', 
                 k_reg=False, 
                 k_reg_lr=0.001, 
                 a_reg=False, 
                 a_reg_lr=0.001):

    model = Sequential()
    
    # ============
    # input-layer
    # ============
    model.add(Dense(units=layers[0]
                      , input_dim=n_features
                      , kernel_initializer=init
                      , kernel_regularizer=l2(k_reg_lr) if k_reg else None
                      , activity_regularizer=l2(a_reg_lr) if a_reg else None
                      , use_bias=(not batch_normalization)
                    ))
    
    
    if batch_normalization:
        model.add(BatchNormalization())
    
    model.add(Activation(activation))

    if dropout >= 0:
        model.add(Dropout(dropout))

    # ==============
    # hidden-layers
    # ==============
    for units in layers[1:]:
        model.add(Dense(units=units
                        , kernel_initializer=init
                        , kernel_regularizer=l2(k_reg_lr) if k_reg else None
                        , activity_regularizer=l2(a_reg_lr) if a_reg else None
                        , use_bias=(not batch_normalization)
                        ))

    if batch_normalization:
        model.add(BatchNormalization())

    model.add(Activation(activation))
    
    if dropout > 0:
        model.add(Dropout(dropout))

    # =============
    # output-layer
    # =============
    model.add(Dense(units=1
                    , kernel_initializer=init
                    , kernel_regularizer=l2(k_reg_lr) if k_reg else None
                    , activity_regularizer=l2(a_reg_lr) if a_reg else None
                    , use_bias= (not batch_normalization)
                    ))
    
    if batch_normalization:
        model.add(BatchNormalization())

    model.add(Activation('linear'))

    model.compile(loss='mse', metrics=['mse'], optimizer=optimizer)

    return model

# CNN Model

In [1]:
def create_cnn_model(n_features, filters=32):
    
    model = Sequential()

    model.add(Conv1D(32, 3, activation=DEFAULT_ACTIVATION, input_shape=(n_features, 1)))
    
    model.add(MaxPooling1D(pool_size=2))

    model.add(Flatten())

    model.add(Dense(64, activation=DEFAULT_ACTIVATION))

    model.add(Dense(1))
    
    model.compile(loss="mse", optimizer="adam")

    return model

# Training

In [None]:
def train_model(model, X_train, y_train, batch_size=32):

    # split train/val
    X_train, X_val, y_train, y_val = train_test_split(X_train, 
                                                      y_train, 
                                                      test_size=0.25, 
                                                      random_state=RANDOM_STATE)

    # early-stopping
    es_patience = 50
    es = EarlyStopping(monitor='val_loss', 
                        patience=es_patience, 
                        mode='min', 
                        restore_best_weights=True, 
                        verbose=0)
    
    # reduce learning-rate on plateau
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.95, patience=10)
    
    # model checkpoint
    if not os.path.exists('models'):
        os.makedirs('models')
    date_str = datetime.now().strftime('%d-%m-%Y_%H-%M-%S')   
    base_path           = f'models/DNN_{date_str}'
    model_path          = f'{base_path}.h5'
    history_path        = f'{base_path}.history'
    
    cp = ModelCheckpoint(filepath=model_path, monitor='val_loss', mode='min', save_best_only=True, verbose=0)

    # fit model
    history = model.fit(X_train, 
                        y_train, 
                        validation_data=(X_val, y_val), 
                        epochs=200, 
                        batch_size=batch_size, 
                        shuffle=False, 
                        callbacks=[es, reduce_lr, cp], 
                        verbose=0)
    
    # save history with model
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)
    
    # load best weights from last checkpoint
    model = keras.models.load_model(model_path)
    return model, history.history

# Learning Curves

In [None]:
def plot_learning_curves(history, train_key='loss', val_key='val_loss'):
    plt.figure(figsize=(2,2))
    plt.plot(history[train_key])
    plt.plot(history[val_key])
    plt.title('learning curves')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc='upper right')
    # plt.show()

# Create and train DNN model

In [None]:
def dnn_trial(X_train, y_train, X_test, y_test):
    
    dnn_model = create_dnn_model(X_train.shape[1], 
                            layers=[256,256], 
                            activation=DEFAULT_ACTIVATION, 
                            init=keras.initializers.HeUniform(), 
                            batch_normalization=True, 
                            dropout=0.0, 
                            optimizer=Adam(learning_rate=1e-2), 
                            k_reg=True,
                            k_reg_lr=1e-5, 
                            a_reg=True,
                            a_reg_lr=1e-5)
    # train
    dnn_model, history = train_model(dnn_model, X_train, y_train)
    
    # predict
    y_pred = dnn_model.predict(X_test).flatten()

    # mse
    mse = mean_squared_error(y_test, y_pred)

    # sqrt_mse
    sqrt_mse = np.sqrt(mse)

    return dnn_model, history, y_pred, sqrt_mse

# Confidence intervals

In [1]:
# TODO