# Training

## Setup

In [2]:
# TODO: replace this with the path to your project
%cd /Users/vivekkumar/Documents/MSC_dissertation_project/2_Training

/Users/vivekkumar/Documents/MSC_dissertation_project/2_Training


In [3]:
# confirm it works by running this cell and checking the output matched the path in the above cell
!pwd

/Users/vivekkumar/Documents/MSC_dissertation_project/2_Training


In [8]:
# install the stable baselines 3 library
%pip install stable_baselines3 -q


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [11]:
import gymnasium

# utils imports
import numpy as np
import pandas as pd
import random
from sklearn.preprocessing import MinMaxScaler

# environment imports
import numpy as np
import pandas as pd
import random
import os
import json
import sys
import pickle
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon

# training imports
import torch
from stable_baselines3 import PPO  # project 4
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnNoModelImprovement

# set seed variable
seed = 42

## Utils

In [13]:
import numpy as np
import pandas as pd
import random
from sklearn.preprocessing import MinMaxScaler

# set seeds
seed = 42
random.seed(seed)
np.random.seed(seed)

# calculate profit
def calculate_profit(position_size, trade_direction, entry_price, exit_price):

    # fixed transaction cost
    t_cost = 0.0001 * position_size

    price_change = (exit_price - entry_price) / entry_price

    if trade_direction == 1:
        profit = price_change * position_size
    elif trade_direction == -1:
        profit = -price_change * position_size
    else:
        return 0
    return profit - t_cost

# get datasets
def get_data(pair, window):

    train_df = pd.read_parquet(f'/Users/vivekkumar/Documents/MSC_dissertation_project/New_trainTest/data_1/{pair}/Window_{window}/train.parquet.gzip')
    val_df = pd.read_parquet(f'/Users/vivekkumar/Documents/MSC_dissertation_project/New_trainTest/data_1/{pair}/Window_{window}/validation.parquet.gzip')
    test_df = pd.read_parquet(f'/Users/vivekkumar/Documents/MSC_dissertation_project/New_trainTest/data_1/{pair}/Window_{window}/test.parquet.gzip')

    train_df = train_df.groupby((train_df['Direction_0.00015'].shift() != train_df['Direction_0.00015']).cumsum()).first()
    val_df = train_df.groupby((val_df['Direction_0.00015'].shift() != val_df['Direction_0.00015']).cumsum()).first()
    # test_df = test_df.groupby((train_df['Direction_0.00015'].shift() != test_df['Direction_0.00015']).cumsum()).first()

    train_prices = train_df['DCC_0.00015'].values
    val_prices = val_df['DCC_0.00015'].values
    test_prices = test_df['DCC_0.00015'].values

    train_df = train_df.filter(regex='Direction|DCC')
    val_df = val_df.filter(regex='Direction|DCC')
    # test_df = test_df.filter(regex='Direction|DCC')

    # train_df, val_df, test_df = normalize_dataframes(train_df, val_df, test_df)
    # train_df, val_df, test_df = manual_normalise(train_df, val_df, test_df)

    train_data = train_df.values
    val_data = val_df.values
    test_data = test_df.values

    data_dict = {
        'train_data': train_data,
        'train_prices': train_prices,
        'val_data': val_data,
        'val_prices': val_prices,
        'test_data': test_data,
        'test_prices': test_prices
    }

    print("------- Data -------")
    print(f"Train Shape: {train_data.shape}")
    print(f"Validation Shape: {val_data.shape}")
    print(f"Test Shape: {test_data.shape}")

    return data_dict

# percentage change
def pct_change(old_value, new_value):
    change = new_value - old_value
    percentage_change = (change / old_value)
    return percentage_change

# shift by n, new start values are replace by np.nan and end values are discarded
def shift(array, shift):
    return np.concatenate(([np.nan] * shift, array[:-shift]))

# rolling window generator, left over events are discarded
def rolling_window(df, window_size, shift):
    for i in range(0, len(df) - window_size + 1, shift):
        yield df.iloc[i:i+window_size]

# take train, validation and test sets and normalise based on training data
def normalize_dataframes(train_df, val_df, test_df):
    # create a MinMaxScaler object
    scaler = MinMaxScaler()

    # fit the scaler on the train DataFrame
    scaler.fit(train_df)

    # normalize each DataFrame using the fitted scaler
    train_normalized = pd.DataFrame(scaler.transform(train_df), columns=train_df.columns)
    val_normalized = pd.DataFrame(scaler.transform(val_df), columns=val_df.columns)
    test_normalized = pd.DataFrame(scaler.transform(test_df), columns=test_df.columns)

    return train_normalized, val_normalized, test_normalized

def manual_normalise(train_df, val_df, test_df):

    DC_start_end = train_df[['Start', 'DCC']]
    train_transformed = (train_df - train_df.min()) / (train_df.max() - train_df.min())
    train_transformed[['Start', 'DCC']] = DC_start_end

    DC_start_end = val_df[['Start', 'DCC']]
    val_transformed = (val_df - train_df.min()) / (train_df.max() - train_df.min())
    val_transformed[['Start', 'DCC']] = DC_start_end

    DC_start_end = test_df[['Start', 'DCC']]
    test_transformed = (test_df - train_df.min()) / (train_df.max() - train_df.min())
    test_transformed[['Start', 'DCC']] = DC_start_end

    return train_transformed, val_transformed, test_transformed


# trend class
class Trend(object):
    def __init__(self, direction, DC_start, DCC, OS_end, DC_start_index, DCC_index, OS_end_index):
        self.direction, self.DC_start, self.DCC, self.OS_end = direction, DC_start, DCC, OS_end
        self.DC_start_index, self.DCC_index, self.OS_end_index = DC_start_index, DCC_index, OS_end_index

        self.data_dict = {
                'Direction': self.direction,
                'Start': round(self.DC_start, 6),
                'DCC': round(self.DCC, 6),
                'End': round(self.OS_end, 6),
                'Start Index': round(self.DC_start_index, 6),
                'DCC Index': round(self.DCC_index, 6),
                'End Index': round(self.OS_end_index, 6),
            }

    def __str__(self):
        return str(self.data_dict)

## Environment

In [14]:
with open('/Users/vivekkumar/Documents/MSC_dissertation_project/New_trainTest/params.json', 'r') as f:
    params = json.load(f)

class DirectionalChangeEnv(gymnasium.Env):
    def __init__(self, env_config):

        # get data from config
        data = env_config['data']
        prices = env_config['prices']

        self.full_data = data
        self.full_prices = prices

        # init state params
        self.context_length = int(params['training']['context_length'])
        self.lag = int(params['training']['lag'])
        try:
            self.start_index = random.randint(0, len(self.full_data) - (self.context_length + 1))
        except:
            self.start_index = 0
        self.end_index = self.start_index + self.context_length

        # init data
        self.data = self.full_data[self.start_index: self.end_index]
        self.prices = self.full_prices[self.start_index: self.end_index]

        # init state
        self.i = self.lag - 1
        self.state = self.data[0:self.i + 1]
        self.price = self.prices[self.i]

        # set action and observation space
        self.action_space = gymnasium.spaces.Discrete(2)  # buy, sell
        self.observation_space = gymnasium.spaces.Box(0, 2, shape=self.state.shape)  # DC start, DC end for last 5 timesteps

        # init simulation params
        self.n_prices = len(self.data) - self.lag
        self.balance = 100
        self.entry_price = None
        self.position_size = None
        self.in_position = 0  # -1 for short, 0 for no, 1 for long
        self.trading_log = []

    def step(self, action):

        self.i += 1
        self.n_prices -= 1
        self.state = self.data[self.i - (self.lag-1) : self.i + 1]
        self.mid_price = self.price

        # reward function
        if self.in_position == 0:
            if action == 0:  #  buy
                self.entry_price = self.mid_price  # long at ask price
                self.position_size = self.balance
                self.in_position = 1
                reward = 0
            elif action == 1:  # sell
                self.entry_price = self.mid_price  # short at bid price
                self.position_size = self.balance
                self.in_position = -1
                reward = 0

        elif self.in_position == -1:
            if action == 0:  #  buy
                profit = calculate_profit(self.position_size, self.in_position,
                                          self.entry_price, self.mid_price)  # exit short position at ask price
                self.trading_log.append({'Trade Index': self.i,
                                         'Position Size': self.position_size,
                                         'Trade Type': 'Short',
                                         'Entry Price': self.entry_price,
                                         'Exit Price': self.mid_price,
                                         'Profit': profit})
                reward = profit
                self.balance += profit
                self.in_position = 0
                self.position_size = None
                self.entry_price = None
            elif action == 1:  # sell
                reward = 0

        elif self.in_position == 1:
            if action == 0:  #  buy
                reward = 0
            elif action == 1:  # sell
                profit = calculate_profit(self.position_size, self.in_position,
                                          self.entry_price, self.mid_price)  # exit long position as bid price
                self.trading_log.append({'Trade Index': self.i,
                                         'Position Size': self.position_size,
                                         'Trade Type': 'Long',
                                         'Entry Price': self.entry_price,
                                         'Exit Price': self.mid_price,
                                         'Profit': profit})
                reward = profit
                self.balance += profit
                self.in_position = 0
                self.position_size = None
                self.entry_price = None

        if self.n_prices <= 0 or self.balance <= 0:
            done = True
            if self.in_position != 0:

                if self.in_position == -1:
                    trade_type = 'Short'
                    exit_price = self.mid_price
                elif self.in_position == 1:
                    trade_type = 'Long'
                    exit_price = self.mid_price

                profit = calculate_profit(self.position_size, self.in_position,
                                            self.entry_price, exit_price)
                reward = profit

                self.trading_log.append({'Trade Index': self.i,
                                         'Position Size': self.position_size,
                                         'Trade Type': trade_type,
                                         'Entry Price': self.entry_price,
                                         'Exit Price': exit_price,
                                         'Profit': profit})
                self.balance += profit
                self.in_position = 0
        else:
            done = False

        info = {'balance': self.balance,
                'trading_log': self.trading_log}

        return self.state, reward, done, False, info

    def reset(self, seed=seed, options=None):

        # generate new training set
        try:
            self.start_index = random.randint(0, len(self.full_data) - (self.context_length + 1))
        except:
            self.start_index = 0
        self.end_index = self.start_index + self.context_length
        self.data = self.full_data[self.start_index: self.end_index]
        self.prices = self.full_prices[self.start_index: self.end_index]

        # reset episode variables
        self.i = self.lag - 1
        self.state = self.data[0:self.i + 1]
        self.price = self.prices[self.i]
        self.n_prices = len(self.data) - self.lag
        self.balance = 100
        self.entry_price = None
        self.position_size = None
        self.in_position = 0  # -1 for short, 0 for no, 1 for long
        self.trading_log = []

        return self.state, {}

## Training

In [23]:
class ModelTrainer(object):
    """class to run training of model"""
    def __init__(self):

        # get data
        data_dict = get_data(pair, window)
        train_data = data_dict['train_data']
        train_prices = data_dict['train_prices']
        val_data = data_dict['val_data']
        val_prices = data_dict['val_prices']

        # configure training parameters
        env_config = {
                'data': train_data,
                'prices': train_prices
            }

        # configure algorithm
        log_path = f"./Logs/{pair}/Window_{window}"
        env = DirectionalChangeEnv(env_config)
        self.model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_path)

    def save_checkpoint(self):
        # save model
        model_folder = f'./Models/{pair}/'
        self.model.save(model_folder + f'PPO_Window_{window}')
        print('model saved')

    def train(self):
        self.model.learn(total_timesteps=200000)

## Run Experiment

In [27]:
# set seeds - this is to ensure reproducibility
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

# set script variables
pair = str('AUDUSD')  # run for all pairs: AUDUSD, EURGBP, EURUSD and USDCAD
window = int(0)  # run for all windows

# create experiment
trainer = ModelTrainer()

# train the model
trainer.train()

# save model
trainer.save_checkpoint()

------- Data -------
Train Shape: (103532, 6)
Validation Shape: (1076, 6)
Test Shape: (13039366, 9)
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


ImportError: Trying to log data to tensorboard but tensorboard is not installed.