# Imports

In [1]:
import os
import time
import pickle
import math
import random
import numpy as np
np.set_printoptions(precision=4)
from datetime import datetime

%matplotlib inline
import matplotlib.pyplot as plt

from IPython.display import Math, Latex

import pandas as pd

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
from tensorflow import keras;
from tensorflow.keras.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Activation, MaxPooling1D, Dropout, BatchNormalization, Conv1D, Flatten
from tensorflow.keras.optimizers import Adam

from sklearn.model_selection import train_test_split

# Fix random_state ?
SEED = 17
RANDOM_STATE = None
RS = None

def reset_random_state(random_state=None):
    if None == random_state:
        RANDOM_STATE = None
        RS = None
        return
    
    RANDOM_STATE = random_state
    RS = np.random.RandomState(RANDOM_STATE)
    os.environ['PYTHONHASHSEED']=str(RANDOM_STATE)
    tf.random.set_seed(RANDOM_STATE)
    np.random.seed(RANDOM_STATE)
    random.seed(RANDOM_STATE)

# reset_random_state(SEED)

# Generate Data

In [4]:
def log_scale(H, C=0):
    # log scale H rows, shift H values by (1.0+C) to avoid taking log(0)
    axis = len(H.shape) - 1 # 1 if 2D 0 if 1D
    return np.apply_along_axis(func1d=lambda a: np.log10(a + 1.0 + C), axis=axis, arr=H)

def generate_data(N, M, nextConfig, sample, 
                  nbins=-1, # calculate from samples
                  density=False, 
                  apply_log_scale=False):
    """
    This function generates synthetic data for a learning process by means of simulation.
    
    The data consists of ITEMS. Each ITEM is a pair: Histogram and a Configuration.
    
        - Histogram: Consists of sample points (observations).
                     The sample is drawn from a distribution using the corresponding Configuration.
    
        - Configuration: Distribution parameter setting for a specific sample, e.g., (alpha, loc)
    
    Returns:
        - A matrix [N, num_bins] of histograms and 
        - An array of corresponding configurations (one for each histogram row).
          num_bins == max(<observations from all samples>)

    Arguments:
        - N: number of ITEMS to generate (rows)
        - M: number of observations that make a histogram (columns)
        - nextConfig: A generator that returns a new configuration (alpha, loc, ...), in each call.
          Example:
              def yulesimon_nextConfig():
                  alpha = random(2.0, 3.0)
                  loc = random(0.0, 10.0)
                  return alpha, loc
        - sample(config, size): returns a sample with <size> data points (observations) from a distribution.
          Example:
              def yulesimon_sample(config, size):
                  return yulesimon(alpha=config[0], loc=config[1], size)
    """
    
    # generate samples:
    samples = np.zeros((N, M), dtype=int)
    configurations = np.zeros((N,), dtype=object)
    
    # repeat N times: draw a sample points (M observations) from distribution
    for i in range(N):
        
        # start item
        if callable(nextConfig):
            # nextConfig is a generator
            config = nextConfig()
        else:
            # in case we pass a config as a single number
            config = nextConfig

        # sample data points with current config and add to sample_matrix
        samples[i, :] = sample(config, size=M)
        
        # append corresponding config
        configurations[i] = config
    
    # create a histogram for each row
    if nbins < 0:
        nbins = np.max(samples)
    
    histogram_matrix = np.apply_along_axis(
       lambda a: np.histogram(a, bins=nbins, range=(0,nbins), density=density)[0], 1, samples)

    if apply_log_scale:
        histogram_matrix = log_scale(histogram_matrix)
    
    return samples, histogram_matrix, configurations
        

# DNN Model

In [4]:
# Multi-output regression involves predicting two or more numerical variables.
def create_dnn_model(n_features, 
                     layers, 
                     n_outputs=1, 
                     activation='relu', 
                     init='he_uniform', 
                     batch_normalization=False, 
                     dropout=0, 
                     k_reg=False, 
                     k_reg_lr=0.001, 
                     a_reg=False, 
                     a_reg_lr=0.001, 
                     loss='mae', 
                     optimizer='adam'):

    model = Sequential()
    
    # ============
    # input-layer
    # ============
    model.add(Dense(units=layers[0]
                      , input_dim=n_features
                      , kernel_initializer=init
                      , kernel_regularizer=l2(k_reg_lr) if k_reg else None
                      , activity_regularizer=l2(a_reg_lr) if a_reg else None
                      , use_bias=(not batch_normalization)
                    ))
    
    
    if batch_normalization:
        model.add(BatchNormalization())
    
    model.add(Activation(activation))

    if dropout >= 0:
        model.add(Dropout(dropout))

    # ==============
    # hidden-layers
    # ==============
    if len(layers) > 1:
        for units in layers[1:]:
            model.add(Dense(units=units
                            , kernel_initializer=init
                            , kernel_regularizer=l2(k_reg_lr) if k_reg else None
                            , activity_regularizer=l2(a_reg_lr) if a_reg else None
                            , use_bias=(not batch_normalization)
                            ))

            if batch_normalization:
                model.add(BatchNormalization())

            model.add(Activation(activation))

            if dropout > 0:
                model.add(Dropout(dropout))

    # =============
    # output-layer
    # =============
    model.add(Dense(n_outputs))

    model.compile(loss=loss, metrics=[loss], optimizer=optimizer)

    return model

# Train model

In [None]:
def train_model(model, X_train, y_train, batch_size=32):

    # split train/val
    X_train, X_val, y_train, y_val = train_test_split(X_train, 
                                                      y_train, 
                                                      test_size=0.25, 
                                                      random_state=RANDOM_STATE)

    # early-stopping
    es_patience = 50
    es = EarlyStopping(monitor='val_loss', 
                        patience=es_patience, 
                        mode='min', 
                        restore_best_weights=True, 
                        verbose=0)
    
    # reduce learning-rate on plateau
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.95, patience=10)
    
    # model checkpoint
    if not os.path.exists('models'):
        os.makedirs('models')
    date_str = datetime.now().strftime('%d-%m-%Y_%H-%M-%S')   
    base_path           = f'models/DNN_{date_str}'
    model_path          = f'{base_path}.h5'
    history_path        = f'{base_path}.history'
    
    cp = ModelCheckpoint(filepath=model_path, monitor='val_loss', mode='min', save_best_only=True, verbose=0)

    # fit model
    history = model.fit(X_train, 
                        y_train, 
                        validation_data=(X_val, y_val), 
                        epochs=200, 
                        batch_size=batch_size, 
                        shuffle=False, 
                        callbacks=[es, reduce_lr, cp], 
                        verbose=0)
    
    # save history with model
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)
    
    # load best weights from last checkpoint
    model = keras.models.load_model(model_path)
    return model, history.history

# Plot learning curves

In [None]:
def plot_learning_curves(history, train_key='loss', val_key='val_loss'):
    plt.figure(figsize=(2,2))
    plt.plot(history[train_key])
    plt.plot(history[val_key])
    plt.title('learning curves')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc='upper right')
    plt.show()

# Predict

In [3]:
def predict(dnn_model, X_test, y_test):

    # predict
    y_pred = dnn_model.predict(X_test).flatten()

    # mse
    mse = mean_squared_error(y_test, y_pred)

    # sqrt_mse
    sqrt_mse = np.sqrt(mse)

    return y_pred, sqrt_mse
