In [1]:
import sys
import os
# Append the library path to PYTHONPATH, so library can be imported.
sys.path.append(os.path.dirname(os.getcwd()))
import shutil
import datetime

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler

from library import plot, bs
from library import network as nw
from library import common as cm

In [2]:
%run setup.py
%run Load_Clean_aux.py normal

seed = 666
np.random.seed(seed)

Loading Normal data sets!

Load and clean the training and validation data.
Original data size is 756804
We remove in-the-money samples. 387229 samples (51.17%) are removed. We have 48.83% of original data left, yielding a size of 369575.
We shrink moneyness range. 0 samples (0.00%) are removed. We have 48.83% of original data left, yielding a size of 369575.
We remove samples when S1 is not available. 1404 samples (0.38%) are removed. We have 48.65% of original data left, yielding a size of 368171.


Clean and load all Monte Carlo test data.

Load Monte Carlo set 1
We remove in-the-money samples. 107339 samples (52.08%) are removed. We have 47.92% of original data left, yielding a size of 98752.
We shrink moneyness range. 0 samples (0.00%) are removed. We have 47.92% of original data left, yielding a size of 98752.
We remove samples when S1 is not available. 1965 samples (1.99%) are removed. We have 46.96% of original data left, yielding a size of 96787.


Load Monte Carlo set 2
We re

In [3]:
hypers = {
    'nodes_per_layer': (30, 30),
    'reg_alpha': 1e-4,
    'lr': 1e-4,
    'epochs': 5, #1000
    'outact': 'linear'
}
ori_fea_save = ['delta_bs', '1_over_sqrt_tau', 'vega_n']

In [4]:
"""
Note here, we use the same directory structure for the permuted and non-permuted data. 
"""
def create_sub_res_dirs(sub_res):
    sub_res_dirs = {
        'ckp': sub_res + 'ckp/',
        'history': sub_res + 'history/',
        'pnl': sub_res + 'pnl/',
        'plot': sub_res + 'plot/'
    }
    for key, value in sub_res_dirs.items():
        os.makedirs(value, exist_ok=True)
    shutil.copy('setup.py', sub_res)
    return sub_res_dirs

### No permutations

In [5]:
PERMUTE = False
df_val_save = df_train.loc[df_train['period0'] == 1].copy()
df_train_save = df_train.loc[df_train['period0'] == 0].copy()

In [6]:
# The loaded `df_train` set contains both the training and the validation set. So we need to split.

for VIX in [False, True]:
    
    # Because df_train and df_val will be modified afterwards (by feature standarize),
    # so we need to initialise at the beginning for VIX=True.
    # The previous LR does not have such issue
    df_train = df_train_save.copy()
    df_val = df_val_save.copy()
    
    """ Step 1: Manage Paths
    For the non-permuted data:
    The network is only trained once. so the ckp and the history are from that training.
    Each of the pnls is for each different monte carlo, but they come from the same network.
    """
    
    if VIX:
        ori_fea = ori_fea_save + ['fake_vix']
    else:
        ori_fea = ori_fea_save
    res_dir = f'{DATA_DIR}Result/CONFIG={CONFIG}/FREQ={FREQ}_HALFMONEY={HALF_MONEY}_MINM={MIN_M}_MAXM={MAX_M}_Permute={PERMUTE}_VIX={VIX}/'
    sub_res = res_dir + 'Network/Delta_Vega/'   
    sub_res_dirs = create_sub_res_dirs(sub_res)
    
    """
    ##### Step 2: Choose feature and standardize
    Before data sets are fed to a network, all their features need to be standardized to 
    have zero mean and unit standard deviation.
    """
    use_fea = [x + '_t' for x in ori_fea] + ['cp_int']

    scaler = StandardScaler().fit(X=df_train[ori_fea])
    df_train, df_val = nw.standardize_feature([df_train, df_val], scaler, ori_fea)


    """
    ##### Step 3: Build a network and train it
    """
    sub_res_paths = {
        'ckp': sub_res_dirs['ckp'] + 'bestcp.h5',
        'history': sub_res_dirs['history'] + 'history.csv',
        'plot': sub_res_dirs['plot'] + 'losscurve.png'
    }
    if TRAIN_BY_YOURSELF is True:    
        history = nw.train_net_core(df_train, df_val, use_fea, hypers, sub_res_paths)    
        nw.plot_history(history, sub_res_paths['plot'], df_train, df_val)
    else:
        # use the provided checkpoints
        sub_res_paths['ckp'] = f'{CKP_DIR}Permute={PERMUTE}_VIX={VIX}/bestcp.h5'

    for i in range(NUM_TEST):
        df_test = mc_sets[i]

        [df_test] = nw.standardize_feature([df_test], scaler, ori_fea)
        delta = nw.test_net_core(df_test, use_fea, sub_res_paths)

        cm.store_pnl(
            df_test, delta,
            pnl_path=sub_res_dirs['pnl'] + f'pnl{i}.csv'
        )

### Permutations

In [5]:
PERMUTE = True
%run Load_Clean_aux.py normal

Loading Normal data sets!

Load and clean the training and validation data.
Original data size is 756804
We remove in-the-money samples. 387229 samples (51.17%) are removed. We have 48.83% of original data left, yielding a size of 369575.
We shrink moneyness range. 0 samples (0.00%) are removed. We have 48.83% of original data left, yielding a size of 369575.
We remove samples when S1 is not available. 1404 samples (0.38%) are removed. We have 48.65% of original data left, yielding a size of 368171.


Clean and load all Monte Carlo test data.

Load Monte Carlo set 1
We remove in-the-money samples. 107339 samples (52.08%) are removed. We have 47.92% of original data left, yielding a size of 98752.
We shrink moneyness range. 0 samples (0.00%) are removed. We have 47.92% of original data left, yielding a size of 98752.
We remove samples when S1 is not available. 1965 samples (1.99%) are removed. We have 46.96% of original data left, yielding a size of 96787.


Load Monte Carlo set 2
We re

In [6]:
"""
Prepare permutations.
"""

train_permutes, val_permutes, test_permutes = [], [], []

for i in range(NUM_TEST):
    # the union of train and test
    df_permute = df_train.append(mc_sets[i], ignore_index=True, sort=False)
    df_permute = cm.permute_core(df_permute, 0, random_seed=i)

    df_train_permuted = df_permute.loc[df_permute['period0'] == 0]
    df_val_permuted = df_permute.loc[df_permute['period0'] == 1]
    df_test_permuted = df_permute.loc[df_permute['period0'] == 2]

    train_permutes.append(df_train_permuted.copy())
    val_permutes.append(df_val_permuted.copy())
    test_permutes.append(df_test_permuted.copy())
    
del mc_sets

In [7]:
for VIX in [False, True]:
    """
    For the permuted data:
    The network is trained for the number of permutations. 
    Each of the pnls is for each permuations, and they comes from each trained network.
    """
    
    if VIX:
        ori_fea = ori_fea_save + ['fake_vix']
    else:
        ori_fea = ori_fea_save   
    res_dir = f'{DATA_DIR}Result/CONFIG={CONFIG}/FREQ={FREQ}_HALFMONEY={HALF_MONEY}_MINM={MIN_M}_MAXM={MAX_M}_Permute={PERMUTE}_VIX={VIX}/'
    sub_res = res_dir + 'Network/Delta_Vega/' 
    sub_res_dirs = create_sub_res_dirs(sub_res)
    
    for i in range(NUM_TEST):
        """
        ##### Step 2: Choose feature and standardize
        The difference of permuating version and the above version is:
        we standardize for each permutation.
        """
        use_fea = [x + '_t' for x in ori_fea] + ['cp_int']
        
        scaler = StandardScaler().fit(X=train_permutes[i][ori_fea])
        train_temp, val_temp = nw.standardize_feature([train_permutes[i], val_permutes[i]], scaler, ori_fea)

        """
        ##### Step 3: Build a network and train it
        """
        sub_res_paths = {
            'ckp': sub_res_dirs['ckp'] + f'bestcp{i}.h5',
            'history': sub_res_dirs['history'] + f'history{i}.csv',
            'plot': sub_res_dirs['plot'] + f'losscurve{i}.png'
        }
        if TRAIN_BY_YOURSELF is True: 
            history = nw.train_net_core(train_temp, val_temp, use_fea, hypers, sub_res_paths)
            nw.plot_history(history, sub_res_paths['plot'], train_temp, val_temp)
        else:
            # use the provided checkpoints
            sub_res_paths['ckp'] = f'{CKP_DIR}Permute={PERMUTE}_VIX={VIX}/bestcp{i}.h5'

        """
        Test the network for only one permuted test set.
        """
        [test_temp] = nw.standardize_feature([test_permutes[i]], scaler, ori_fea)
        delta = nw.test_net_core(test_temp, use_fea, sub_res_paths)
        cm.store_pnl(
            test_temp, delta,
            pnl_path=sub_res_dirs['pnl'] + f'pnl{i}.csv'
        )

In [None]:

with open(f'{sub_res}additional_paras.txt', 'w+') as file:
    file.write('The following is network setup.\n')
    file.write(f'Date and time = {datetime.datetime.now()}\n')
    for n, x in [
        ('Random seed', seed),
        ('Features used', use_fea),
        ('Learning rate', hypers['lr']),
        ('L2 regularization alpha', hypers['reg_alpha']),
        ('Nodes per layer', hypers['nodes_per_layer']),
        ('Number of training epochs', hypers['epochs'])
    ]:
        file.write(f'{n} = {x}\n') 