# Imports

In [None]:
import os
import time
import pickle
import random

from datetime import datetime

import math
import numpy as np
import pandas as pd

from scipy.stats import yulesimon

import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
%matplotlib inline

import seaborn as sns
sns.set_style('white')

from sklearn.model_selection import train_test_split

import tensorflow as tf

import keras
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras import backend as K
from keras import Sequential
from keras.layers import Dense, Activation, MaxPooling1D, Dropout, BatchNormalization, Conv1D, Flatten
from keras.optimizers import Adam, SGD
from keras.regularizers import l1, l2, l1_l2
from keras.metrics import mean_squared_error

# fix seed for reproducability

In [None]:
def fix_random(seed):
    os.environ['PYTONHASHSEED'] = '0'
    np.random.seed(seed)
    random.seed(seed)
    tf.random.set_seed(seed)

# Generate data
### &nbsp;&nbsp; *num_alphas*: number of alphas generated between (min_alpha) and (max_alpha) inclusive
### &nbsp;&nbsp; *samples_per_alpha*: number of samples (rows) for each alpha 
### &nbsp;&nbsp; *N* : number of random variates (number of samples drawn from yulesimon distribution)
### &nbsp;&nbsp; *M* : maximun value of random variates (length of input vectors == number of features)

In [None]:
def generate_data(N, 
                  num_alphas=100, 
                  samples_per_alpha=100, 
                  min_alpha=2.01, 
                  max_alpha=3.00, 
                  loc=0, 
                  random_alpha=True, 
                  max_M_only=False, 
                  hstack_zeros=False, 
                  random_state=0):
    '''
    params:
        N: number of RV samples (columns) per row
        num_alphas: number of alphas generated between (min_alpha) and (max_alpha) inclusive
        samples_per_alpha: number of samples (rows) for each alpha 
    '''

    if random_alpha:
        alphas = np.random.uniform(low=min_alpha, high=max_alpha, size=num_alphas)
    else:
        alphas = np.linspace(min_alpha, max_alpha, num=num_alphas)
    
    if max_M_only:

        # only return max_M
        max_M = 0
        for alpha in alphas:
            for i in range(samples_per_alpha):
                X = yulesimon.rvs(alpha, loc=loc, size=N, random_state=random_state)
                max_M = max(max_M, np.max(X))
        return max_M

    row = 0
    
    X = np.empty((num_alphas * samples_per_alpha, N+1), float)

    for alpha in alphas:
        
        # generate samples (rows) for current alpha
        for i in range(samples_per_alpha):
            X[row, 0] = alpha
            X[row, 1:] = yulesimon.rvs(alpha, loc=loc, size=N, random_state=random_state)
            row += 1

    # suffle rows
    np.random.shuffle(X)

    # separate X from y
    y = X[:, 0]
    X = X[:, 1:].astype(int)

    # create a histogram (H) from (X) rows
    nbins = np.max(X)
    H = np.apply_along_axis(lambda a: np.histogram(a, bins=nbins, density=False)[0], 1, X)

    # log scale (H) rows
    logH = np.apply_along_axis(lambda a: np.log10(a+1), 1, H)

    # if False:
    #     nbins2 = 150
    #     logH2 = np.zeros((logH.shape[0], nbins2))
    #     logH2[:,:logH.shape[1]] = logH
    #     return logH2, y, nbins2 # (nbins == M)

    if hstack_zeros:
        NUM_ZERO_COLUMNS_TO_HSTACK = 10
        zeros = np.zeros_like(logH, shape=(logH.shape[0], NUM_ZERO_COLUMNS_TO_HSTACK))
        return np.hstack((logH, zeros)), y, nbins + NUM_ZERO_COLUMNS_TO_HSTACK # (nbins == M)

    return logH, y, nbins # (nbins == M)

In [None]:
def max_every_n_elements(a, n):
    mod = a.shape[1] % n
    if mod > 0:
        a = np.hstack((a, np.zeros_like(a, shape=[a.shape[0], n - mod])))
    else:
        a = a.copy()
    b = np.reshape(a, (a.shape[0], int(a.shape[1]/n), n))
    c = np.max(b, axis=2)
    return c

In [None]:
def data_hstack_win_max(X, window_sizes=None):
    if None == window_sizes:
        # generate window sizes: 2, 4, 8, ..., half_len = int(X.shape[1]/2)
        window_sizes=[]
        half_len = int(X.shape[1]/2)
        for i in range(half_len):
            size = int(math.pow(2, i+1))
            if size > half_len:
                break
            window_sizes.append(size)
    
    # start with input copy and hstack windows
    OUT = X.copy()
    for window_size in window_sizes:
        tmp = max_every_n_elements(X, window_size)
        OUT = np.hstack((OUT, tmp))
    
    return OUT 

In [None]:
def data_hstack_win_sum(X, window_sizes=None):
    if None == window_sizes:
        # generate window sizes: 2, 4, 8, ..., half_len = int(X.shape[1]/2)
        window_sizes=[]
        half_len = int(X.shape[1]/2)
        for i in range(half_len):
            size = int(math.pow(2,i+1))
            if size > half_len:
                break
            window_sizes.append(size)
    
    # start with input copy and hstack windows
    OUT = X.copy()
    for window_size in window_sizes:
        mod = X.shape[1] % window_size
        if 0 == mod:
            tmp = np.add.reduceat(X, np.arange(0, X.shape[1], window_size), axis=1)
        else:
            tmp = np.add.reduceat(X[:, 0:-mod], np.arange(0, X.shape[1]-mod, window_size), axis=1)
        OUT = np.hstack((OUT, tmp))
    
    return OUT 

# Create CNN Model

In [None]:
def create_cnn_model(n_features, filters=32):
    
    model = Sequential()

    model.add(Conv1D(32, 3, activation="relu", input_shape=(n_features, 1)))
    
    model.add(MaxPooling1D(pool_size=2))

    model.add(Flatten())

    model.add(Dense(64, activation="relu"))

    model.add(Dense(1))
    
    model.compile(loss="mse", optimizer="adam")

    return model

# Create CNN Model (multi-layer)

In [None]:
def create_cnn_model_multi_layer(n_features, filters=32):

    model = Sequential()

    # conv layer 1
    input_size = n_features
    filters = 32
    padding = 0
    kernel_size = 7
    strides = 1

    model.add(Conv1D(filters=filters, kernel_size=kernel_size, strides=strides, activation="relu", input_shape=(input_size, 1)))
    
    # conv layer 2
    input_size = ( input_size + 2 * padding - kernel_size ) / strides  + 1
    filters = 32
    padding = 0
    kernel_size = 5
    strides = 1

    model.add(Conv1D(filters=filters, kernel_size=kernel_size, strides=strides, activation="relu", input_shape=(input_size, 1)))

    # pooling layer 1
    pool_size = 2
    model.add(MaxPooling1D(pool_size=pool_size))

    # conv layer 3
    input_size = ( int(input_size/pool_size) + 2 * padding - kernel_size ) / strides  + 1
    filters = 32
    padding = 0
    kernel_size = 3
    strides = 1

    model.add(Conv1D(filters=filters, kernel_size=kernel_size, strides=strides, activation="relu", input_shape=(input_size, 1)))

    # pooling layer 2
    pool_size = 2
    model.add(MaxPooling1D(pool_size=pool_size))

    # flatten
    model.add(Flatten())

    # dense layer 1
    model.add(Dense(64, activation="relu"))

    # output layer
    model.add(Dense(1))
    
    model.compile(loss="mse", optimizer="adam", metrics=["mse"])

    return model

# Create DNN Model

In [None]:
def create_dnn_model(n_features, 
                 layers, 
                 activation='relu', 
                 init='he_uniform', 
                 batch_normalization=False, 
                 dropout=0, 
                 optimizer='adam', 
                 k_reg=False, 
                 k_reg_lr=0.001, 
                 a_reg=False, 
                 a_reg_lr=0.001):

    model = Sequential()
    
    # ============
    # input-layer
    # ============
    model.add(Dense(units=layers[0]
                      , input_dim=n_features
                      , kernel_initializer=init
                      , kernel_regularizer=l2(k_reg_lr) if k_reg else None
                      , activity_regularizer=l2(a_reg_lr) if a_reg else None
                      , use_bias= (not batch_normalization)
                    ))
    
    
    if batch_normalization:
        model.add(BatchNormalization())
    
    model.add(Activation(activation))

    if dropout > 0:
        model.add(Dropout(dropout))

    # ==============
    # hidden-layers
    # ==============
    for units in layers[1:]:
        model.add(Dense(units=units
                        , kernel_initializer=init
                        , kernel_regularizer=l2(k_reg_lr) if k_reg else None
                        , activity_regularizer=l2(a_reg_lr) if a_reg else None
                        , use_bias= (not batch_normalization)
                        ))

    if batch_normalization:
        model.add(BatchNormalization())

    model.add(Activation(activation))
    
    if dropout > 0:
        model.add(Dropout(dropout))

    # =============
    # output-layer
    # =============
    model.add(Dense(units=1
                    , kernel_initializer=init
                    , kernel_regularizer=l2(k_reg_lr) if k_reg else None
                    , activity_regularizer=l2(a_reg_lr) if a_reg else None
                    , use_bias= (not batch_normalization)
                    ))
    
    if batch_normalization:
        model.add(BatchNormalization())

    model.add(Activation('linear'))

    model.compile(loss='mse', metrics=['mse'], optimizer=optimizer)

    return model

# Training

In [None]:
def train(model, X_train, y_train, filters=32, batch_size=32, random_state=0):

    # split train/val
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, 
        test_size=0.25, random_state=random_state)

    # early-stopping
    es_patience = 50
    es = EarlyStopping(monitor='val_loss', 
                        patience=es_patience, 
                        mode='min', 
                        restore_best_weights=True, 
                        verbose=0)
    
    # model checkpoint
    if not os.path.exists('models'):
        os.makedirs('models')
    date_str = datetime.now().strftime('%d-%m-%Y_%H-%M-%S')
    base_path       = 'models/yulesimon_{}'.format(date_str)
    model_path      = '{}.h5'.format(base_path)
    history_path    = '{}.history'.format(base_path)
    
    cp = ModelCheckpoint(filepath=model_path, monitor='val_loss', mode='min', save_best_only=True, verbose=0)
    
    # reduce learning-rate on plateau
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.95, patience=10)
    
    # fit model
    history = model.fit(X_train, 
                        y_train, 
                        validation_data=(X_val, y_val), 
                        epochs=200, 
                        batch_size=batch_size, 
                        shuffle=False, 
                        callbacks=[es, reduce_lr, cp], 
                        verbose=0)
    
    # save history with model
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)
    
    # load best weights from last checkpoint
    model = keras.models.load_model(model_path)
    return model, history.history

In [None]:
def plot_learning_curves(history, train_key='loss', val_key='val_loss'):
    plt.figure(figsize=(2,2))
    plt.plot(history[train_key])
    plt.plot(history[val_key])
    plt.title('learning curves')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc='upper right')
    plt.show()
    print('loss:', np.min(history['loss']))
    print('val_loss:', np.min(history['val_loss']))    

# Trials

In [None]:
def trial(nn='CNN', N_range=[5,6], random_states=[0, 17], hstack_zeros=False):
    print('Processing...')
    print()

    a_N = np.array([])
    a_sqrt_mse = np.array([])
    a_avg_abs_errors = np.array([])
    a_std_abs_errors = np.array([])

    # change N [32..2048]
    for i in N_range:

        N = 2**i

        a_N = np.append(a_N, N)

        # data generation (fix random state)
        DATA_RANDOM_STATE = 0
        fix_random(seed=DATA_RANDOM_STATE)
        X, y, M = generate_data(N=N, 
                                num_alphas=100, 
                                samples_per_alpha=100, 
                                random_alpha=True, 
                                max_M_only=False, 
                                hstack_zeros=hstack_zeros, 
                                random_state=DATA_RANDOM_STATE)

        # reshape X for Conv1D
        if nn.startswith('CNN'):
            X = X.reshape(X.shape[0], X.shape[1], 1)
        elif 'DNN_WIN' == nn:
            X = data_hstack_win_max(X, window_sizes=None)

        # split train/test
        print('input.shape:', X.shape)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=DATA_RANDOM_STATE)

        # create model
        if nn.startswith('CNNM'): # CNN-multi-layers
            model = create_cnn_model_multi_layer(X.shape[1], filters=32)
        elif nn.startswith('CNN'):
            model = create_cnn_model(X.shape[1], filters=32)
        elif nn.startswith('DNN'):
            model = create_dnn_model(X_train.shape[1], 
                            layers=[256,256], 
                            activation='relu', 
                            init=keras.initializers.HeUniform(seed=DATA_RANDOM_STATE), 
                            batch_normalization=True, 
                            dropout=0.0, 
                            optimizer=Adam(lr=1e-2), 
                            k_reg=True, 
                            k_reg_lr=1e-5, 
                            a_reg=True, 
                            a_reg_lr=1e-5)
        else:
            raise RuntimeError("nn: '{}' not supported".format(nn))

        # average sqrt_mse among different random states (fixed N)
        avg_sqrt_mse = 0

        # array of absolute errors
        avg_abs_errors = np.zeros_like(y_test)

        # change random state
        for rs in random_states:

            # fix random
            fix_random(seed=rs)
        
            # training
            model, history = train(model, X_train, y_train, batch_size=32, random_state=rs)

            # predict
            y_pred = model.predict(X_test).flatten()

            # mse
            mse = mean_squared_error(y_test, y_pred)

            # sqrt_mse
            sqrt_mse = np.sqrt(mse)

            # avg_sqrt_mse (accumulator)
            avg_sqrt_mse += sqrt_mse
            
            #print('N = {}, M = {}, random_state = {}    =>    sqrt_mse = {:.6f}'.format(N, M, rs, sqrt_mse))

            # absolute errors
            avg_abs_errors += np.abs(y_test - y_pred)

        # avg errors across random_states
        avg_abs_errors = avg_abs_errors / len(random_states)
        
        # avg errors across samples (t_test)
        a_avg_abs_errors = np.append(a_avg_abs_errors, np.average(avg_abs_errors))

        # std across samples
        a_std_abs_errors = np.append(a_std_abs_errors, np.std(avg_abs_errors))

        # avg_sqrt_mse
        avg_sqrt_mse = avg_sqrt_mse / len(random_states)
        a_sqrt_mse = np.append(a_sqrt_mse, avg_sqrt_mse)
        print('N: {} \t avg_sqrt_mse = {:.6f}'.format(N, avg_sqrt_mse))
        print()

    # return a_N, a_sqrt_mse, a_avg_abs_errors, a_std_abs_errors 
    return {
        'nn': nn, 
        'a_N':a_N, 
        'a_sqrt_mse':a_sqrt_mse, 
        'a_avg_abs_errors':a_avg_abs_errors, 
        'a_std_abs_errors':a_std_abs_errors
    }