In [None]:
import sys
import os
# Append the library path to PYTHONPATH, so library can be imported.
sys.path.append(os.path.dirname(os.getcwd()))
import shutil
import datetime

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler

from library import plot, bs
from library import network as nw
from library import common as cm

In [None]:
%run setup.py
%load_ext autoreload
%autoreload 2

%run Load_Clean_aux.py normal

seed = 666
np.random.seed(seed)

In [None]:
if FEATURE_SET == 'normal_feature':
    ori_fea = ['M0', 'tau0_implvol0']
    sub_res = res_dir + 'Network/Normal_Feature/'

if FEATURE_SET == 'delta_vega':
    ori_fea = ['delta_bs', '1_over_sqrt_tau', 'vega_n']
    sub_res = res_dir + 'Network/Delta_Vega/'
    
if VIX:
    ori_fea += ['fake_vix']

    
os.makedirs(sub_res, exist_ok=True)

In [None]:
sub_res

In [None]:
hypers = {
    'nodes_per_layer': (30, 30),
    'reg_alpha': 1e-3,
    'lr': 1e-4,
    'epochs': 500, #1000
    'outact': 'linear'
}

In [None]:
"""
Note here, we use the same directory structure for the permuted and non-permuted data. 
For the non-permuted data:
    The network is only trained once. so the ckp and the history are from that training.
    Each of the pnls is for each different monte carlo, but they come from the same network.
    
For the permuted data:
    The network is trained for the number of permutations. 
    Each of the pnls is for each permuations, and they comes from each trained network.
"""

sub_res_dirs = {
    'ckp': sub_res + 'ckp/',
    'history': sub_res + 'history/',
    'pnl': sub_res + 'pnl/',
    'plot': sub_res + 'plot/'
}
for key, value in sub_res_dirs.items():
    os.makedirs(value, exist_ok=True)
shutil.copy('setup.py', sub_res)

### No permutations

In [None]:
if not PERMUTE:

    # The loaded `df_train` set contains both the training and the validation set. So we need to split.
    df_val = df_train.loc[df_train['period0'] == 1]
    df_train = df_train.loc[df_train['period0'] == 0]


    """
    ##### Step 2: Choose feature and standardize
    Before data sets are fed to a network, all their features need to be standardized to 
    have zero mean and unit standard deviation.
    """
    use_fea = [x + '_t' for x in ori_fea] + ['cp_int']

    scaler = StandardScaler().fit(X=df_train[ori_fea])
    df_train, df_val = nw.standardize_feature([df_train, df_val], scaler, ori_fea)


    """
    ##### Step 3: Build a network and train it
    """
    sub_res_paths = {
        'ckp': sub_res_dirs['ckp'] + 'bestcp.h5',
        'history': sub_res_dirs['history'] + 'history.csv',
        'plot': sub_res_dirs['plot'] + 'losscurve.png'
    }
    history = nw.train_net_core(df_train, df_val, use_fea, hypers, sub_res_paths)    
    nw.plot_history(history, sub_res_paths['plot'], df_train, df_val)
    
    for i in range(NUM_TEST):
        df_test = mc_sets[i]

        [df_test] = nw.standardize_feature([df_test], scaler, ori_fea)
        delta = nw.test_net_core(df_test, use_fea, sub_res_paths)
    
        cm.store_pnl(
            df_test, delta,
            pnl_path=sub_res_dirs['pnl'] + f'pnl{i}.csv'
        )

### Permutations

In [None]:
"""
Prepare permutations.
"""
if PERMUTE:
    train_permutes, val_permutes, test_permutes = [], [], []
    for i in range(NUM_TEST):
        # the union of train and test
        df_permute = df_train.append(mc_sets[i], ignore_index=True, sort=False)
        df_permute = cm.permute_core(df_permute, 0, random_seed=i)

        df_train_permuted = df_permute.loc[df_permute['period0'] == 0]
        df_val_permuted = df_permute.loc[df_permute['period0'] == 1]
        df_test_permuted = df_permute.loc[df_permute['period0'] == 2]

        train_permutes.append(df_train_permuted.copy())
        val_permutes.append(df_val_permuted.copy())
        test_permutes.append(df_test_permuted.copy())
    del mc_sets

In [None]:
if PERMUTE:
    use_fea = [x + '_t' for x in ori_fea] + ['cp_int']
    for i in range(NUM_TEST):
        """
        ##### Step 2: Choose feature and standardize
        The difference of permuating version and the above version is:
        we standardize for each permutation.
        """
        scaler = StandardScaler().fit(X=train_permutes[i][ori_fea])
        train_permutes[i], val_permutes[i] = nw.standardize_feature([train_permutes[i], val_permutes[i]], scaler, ori_fea)
        
        """
        ##### Step 3: Build a network and train it
        """
        sub_res_paths = {
            'ckp': sub_res_dirs['ckp'] + f'bestcp{i}.h5',
            'history': sub_res_dirs['history'] + f'history{i}.csv',
            'plot': sub_res_dirs['plot'] + f'losscurve{i}.png'
        }
        history = nw.train_net_core(train_permutes[i], val_permutes[i], use_fea, hypers, sub_res_paths)
        nw.plot_history(history, sub_res_paths['plot'], train_permutes[i], val_permutes[i])

        """
        Test the network for only one permuted test set.
        """
        [test_permutes[i]] = nw.standardize_feature([test_permutes[i]], scaler, ori_fea)
        delta = nw.test_net_core(test_permutes[i], use_fea, sub_res_paths)
        cm.store_pnl(
            test_permutes[i], delta,
            pnl_path=sub_res_dirs['pnl'] + f'pnl{i}.csv'
        )

In [None]:

with open(f'{sub_res}additional_paras.txt', 'w+') as file:
    file.write('The following is network setup.\n')
    file.write(f'Date and time = {datetime.datetime.now()}\n')
    for n, x in [
        ('Random seed', seed),
        ('Features used', use_fea),
        ('Learning rate', hypers['lr']),
        ('L2 regularization alpha', hypers['reg_alpha']),
        ('Nodes per layer', hypers['nodes_per_layer']),
        ('Number of training epochs', hypers['epochs'])
    ]:
        file.write(f'{n} = {x}\n') 