In [2]:
# !pip install numpy
# !pip install scipy
# !pip install pandas
# !pip install matplotlib
# !pip install sklearn
# !pip install keras
# !pip install tensorflow

## Read Data
TODO

In [6]:
import os
import numpy as np
import pandas as pd

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

def read_csvs(csv_files, shuffle_samples=True, verbose=True):
    '''Read one or more csv files into a single DataFrame'''
    df = pd.DataFrame()
    for csv in csv_files:
        if verbose:
            print('loading:', csv)
        df.append(pd.read_csv(csv))
    
    if shuffle_samples:
        df = shuffle(df)
    
    return df

## Create Model
TODO

In [18]:
import keras
from keras import backend as K
from keras import Sequential
from keras.layers import Dense, Activation, Dropout, BatchNormalization
from keras.optimizers import Adam
from keras.regularizers import l1, l2
from keras.metrics import mean_squared_error

def create_model(n_features, 
                 layers, 
                 activation='relu', 
                 init='he_uniform', 
                 batch_normalization=False, 
                 dropout=0, 
                 optimizer='adam', 
                 optimizer_lr=0.01, 
                 k_reg=False, 
                 k_reg_lr=0.001, 
                 a_reg=False, 
                 a_reg_lr=0.001, 
                 metrics=['mse']):

    model = Sequential()
    
    # ============
    # input-layer
    # ============
    model.add(Dense(units=layers[0], 
                      input_dim=n_features, 
                      kernel_initializer=init, 
                      activation=activation))
                      # kernel_regularizer=l2(k_reg_lr) if k_reg else None, 
                      # activity_regularizer=l2(a_reg_lr) if a_reg else None)
    
    
    if batch_normalization:
        model.add(BatchNormalization())

    if dropout > 0:
        model.add(Dropout())

    # ==============
    # hidden-layers
    # ==============
    for units in layers[1:]:
        model.add(Dense(units=units, 
                        kernel_initializer=init, 
                        kernel_regularizer=l2(k_reg_lr) if k_reg else None, 
                        activity_regularizer=l2(a_reg_lr) if a_reg else None))

    if batch_normalization:
        model.add(BatchNormalization())

    model.add(Activation(activation))
    
    if dropout > 0:
        model.add(Dropout())

    # =============
    # output-layer
    # =============
    model.add(Dense(units=1, kernel_initializer=init))
                  # kernel_regularizer=l2(k_reg_lr) if k_reg else None, 
                  # activity_regularizer=l2(a_reg_lr) if a_reg else None)
    return model


## Evaluate Model
TODO

In [None]:
def evaluate(model, X, y):
    y_pred = model.predict(X)
    return y_pred

## Run
TODO

In [32]:
import time
import datetime
import pickle
import random
import tensorflow as tf
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# fix random seed for reproducability
def fix_random(seed):
    os.environ['PYTONHASHSEED'] = '0'
    np.random.seed(seed)
    random.seed(seed)
    tf.random.set_seed(seed)
fix_random(seed=0)

In [24]:
%%time

BATCH_SIZE = 1024
MODEL_PREFIX = 'yulesimon'

def run(X_train, y_train):
    model = create_model(X_train.shape[1], 
                         layers=[64, 64, 64], 
                         activation='relu', 
                         init='he_uniform', 
                         batch_normalization=False, 
                         dropout=0.15, 
                         optimizer='adam', 
                         optimizer_lr=0.0001, 
                         k_reg=False, 
                         k_reg_lr=1e-5, 
                         a_reg=False, 
                         a_reg_lr=1e-6, 
                         metrics=['mse'])
    
    # split train/validation
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1)
    
    # early-stopping
    es_patience = 100
    es_ = EarlyStopping(monitor='val_loss', 
                        patience=es_patience, 
                        mode='min', 
                        restore_best_weights=True, 
                        verbose=1)
    
    # model checkpoint
    date_str = datetimet.now().strftime('%d-%m-%Y_%H-%M-%S')
    model_path = 'models/{}_{}.h5'.format(MODEL_PREFIX, date_str)
    print('model path:', model_path)
    
    cp = ModelCheckpoint(filepath=model_path, monitor='val_loss', mode='min', save_best_only=True, verbose=1)
    
    # reduce learning-rate on plateau
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.9, patience=min(30, int(es_patience/2)))
    
    # fit model
    history = model.fit(X_train, 
                        y_train, 
                        validation_data=(X_val, y_val), 
                        ephochs=10000, 
                        batch_size=BATCH_SIZE, 
                        shuffle=False, callbacks=[es, reduce_lr, cp], 
                        verbose=1)
    
    history_unique_name = 'models/{}_{}.history'.format(MODEL_PREFIX, date_str)
    with open(history_unique_name, 'wb') as f:
        pickle.dump(history, f)
    
    # load best weights from last checkpoint
    model = keras.models.load_model(model_path)
        
    return model, history

if False:
    run(X_train, y_train)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 5.48 µs
