### PPO AGENT:

#### Load the dataset and preprocess the dataframe in the required format.

Technical indicators created for the environment include:

- RSI
- MACD
- Stoch_k
- OBV
- Upper_BB
- ATR_1
- ATR_2
- ATR_5
- ATR_10
- ATR_20

In [None]:
url = 'https://anaconda.org/conda-forge/libta-lib/0.4.0/download/linux-64/libta-lib-0.4.0-h166bdaf_1.tar.bz2'
!curl -L $url | tar xj -C /usr/lib/x86_64-linux-gnu/ lib --strip-components=1
url = 'https://anaconda.org/conda-forge/ta-lib/0.4.19/download/linux-64/ta-lib-0.4.19-py310hde88566_4.tar.bz2'
!curl -L $url | tar xj -C /usr/local/lib/python3.10/dist-packages/ lib/python3.10/site-packages/talib --strip-components=3

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  4015    0  4015    0     0  12157      0 --:--:-- --:--:-- --:--:-- 12129
100  517k  100  517k    0     0   590k      0 --:--:-- --:--:-- --:--:--  590k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  4015    0  4015    0     0  18288      0 --:--:-- --:--:-- --:--:-- 18250
100  392k  100  392k    0     0   508k      0 --:--:-- --:--:-- --:--:--  508k


In [None]:
pip install stable-baselines3[extra]



In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
import talib as ta

class TechnicalIndicators:
    def __init__(self, data):
        self.data = data

    def add_momentum_indicators(self):
        self.data['RSI'] = ta.RSI(self.data['Close'], timeperiod=14)
        self.data['MACD'], self.data['MACD_signal'], self.data['MACD_hist'] = ta.MACD(self.data['Close'], fastperiod=12, slowperiod=26, signalperiod=9)
        self.data['Stoch_k'], self.data['Stoch_d'] = ta.STOCH(self.data['High'], self.data['Low'], self.data['Close'],
                                                              fastk_period=14, slowk_period=3, slowd_period=3)

    def add_volume_indicators(self):
        self.data['OBV'] = ta.OBV(self.data['Close'], self.data['Volume'])

    def add_volatility_indicators(self):
        self.data['Upper_BB'], self.data['Middle_BB'], self.data['Lower_BB'] = ta.BBANDS(self.data['Close'], timeperiod=20)
        self.data['ATR_1'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=1)
        self.data['ATR_2'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=2)
        self.data['ATR_5'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=5)
        self.data['ATR_10'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=10)
        self.data['ATR_20'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=20)

    def add_trend_indicators(self):
        self.data['ADX'] = ta.ADX(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['+DI'] = ta.PLUS_DI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['-DI'] = ta.MINUS_DI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['CCI'] = ta.CCI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=5)

    def add_other_indicators(self):
        self.data['DLR'] = np.log(self.data['Close'] / self.data['Close'].shift(1))
        self.data['TWAP'] = self.data['Close'].expanding().mean()
        self.data['VWAP'] = (self.data['Volume'] * (self.data['High'] + self.data['Low']) / 2).cumsum() / self.data['Volume'].cumsum()

    def add_all_indicators(self):
        self.add_momentum_indicators()
        self.add_volume_indicators()
        self.add_volatility_indicators()
        self.add_trend_indicators()
        self.add_other_indicators()
        return self.data

In [None]:
data = pd.read_csv('/content/gdrive/My Drive/task/xnas-itch-20230703.tbbo.csv')

# Preprocessing to create necessary columns
data['price']=data['price']/1e9
data['bid_px_00']=data['bid_px_00']/1e9
data['ask_px_00']=data['ask_px_00']/1e9

data['Close'] = data['price']
data['Volume'] = data['size']
data['High'] = data[['bid_px_00', 'ask_px_00']].max(axis=1)
data['Low'] = data[['bid_px_00', 'ask_px_00']].min(axis=1)
data['Open'] = data['Close'].shift(1).fillna(data['Close'])


ti = TechnicalIndicators(data)
df_with_indicators = ti.add_all_indicators()
market_features_df = df_with_indicators[35:]

Checking the dataset:

In [None]:
# Show all columns in pandas
pd.set_option('display.max_columns', None)

market_features_df.head(35)

Unnamed: 0,ts_recv,ts_event,rtype,publisher_id,instrument_id,action,side,depth,price,size,flags,ts_in_delta,sequence,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_ct_00,ask_ct_00,symbol,Close,Volume,High,Low,Open,RSI,MACD,MACD_signal,MACD_hist,Stoch_k,Stoch_d,OBV,Upper_BB,Middle_BB,Lower_BB,ATR_1,ATR_2,ATR_5,ATR_10,ATR_20,ADX,+DI,-DI,CCI,DLR,TWAP,VWAP
35,1688371214386057385,1688371214385893078,1,2,32,T,N,0,194.05,50,130,164307,326232,194.0,194.3,3101,19,4,10,AAPL,194.05,50,194.3,194.0,194.05,54.544543,0.006271,-0.00313,0.009401,52.52525,61.952862,-266.0,194.065621,194.017,193.968379,0.3,0.175078,0.098615,0.075141,0.072403,97.257397,30.435801,0.196362,166.666667,0.0,194.02,194.021894
36,1688371214386063777,1688371214385899379,1,2,32,T,N,0,194.05,50,130,164398,326233,194.0,194.3,3101,19,4,10,AAPL,194.05,50,194.3,194.0,194.05,54.544543,0.007108,-0.001082,0.00819,38.38384,52.525253,-266.0,194.06899,194.02,193.97101,0.3,0.237539,0.138892,0.097627,0.083783,97.361721,22.989295,0.14832,83.333333,0.0,194.020811,194.025188
37,1688371215804852019,1688371215804687301,1,2,32,T,B,0,194.21,10,130,164718,328131,194.0,194.21,3101,29,4,1,AAPL,194.21,10,194.21,194.0,194.05,85.890753,0.020446,0.003223,0.017223,40.40404,43.771044,-256.0,194.125889,194.0305,193.935111,0.21,0.22377,0.153114,0.108864,0.090094,97.458593,19.409454,0.125224,79.268293,0.000824,194.025789,194.025596
38,1688371219671476629,1688371219671312224,1,2,32,T,N,0,194.14,10,130,164405,331406,194.0,194.16,3101,400,4,1,AAPL,194.14,10,194.16,194.0,194.21,64.827662,0.025079,0.007594,0.017484,49.49495,42.760943,-266.0,194.142928,194.0375,193.932072,0.21,0.216885,0.164491,0.118978,0.096089,97.548546,16.622008,0.10724,-3.205128,-0.00036,194.028718,194.025873
39,1688371223368835585,1688371223368671235,1,2,32,T,B,0,194.13,10,130,164350,334235,194.0,194.13,3101,400,4,1,AAPL,194.13,10,194.13,194.0,194.14,62.470772,0.027625,0.011601,0.016025,57.57576,49.158249,-276.0,194.155247,194.044,193.932753,0.14,0.178442,0.159593,0.12108,0.098285,97.632074,15.068361,0.097216,-113.095238,-5.2e-05,194.03125,194.026071
40,1688371229849940201,1688371229849775570,1,2,32,T,B,0,194.13,100,130,164631,339055,194.01,194.13,101,390,2,1,AAPL,194.13,100,194.13,194.01,194.13,62.470772,0.029305,0.015141,0.014164,49.49495,52.188552,-276.0,194.165821,194.0505,193.935179,0.12,0.149221,0.151674,0.120972,0.09937,97.709636,13.871534,0.089495,-64.102564,0.0,194.033659,194.028188
41,1688371230451172473,1688371230451005195,1,2,32,T,N,0,194.02,10,0,167278,339547,194.01,194.16,101,400,2,1,AAPL,194.02,10,194.16,194.01,194.13,42.676806,0.021513,0.016416,0.005097,34.54545,47.205387,-286.0,194.16539,194.0515,193.93761,0.15,0.149611,0.151339,0.123875,0.101902,97.79382,14.463498,0.08085,-113.425926,-0.000567,194.033333,194.02846
42,1688371230451172473,1688371230451005195,1,2,32,T,A,0,194.01,1,0,167278,339548,194.01,194.16,101,400,2,1,AAPL,194.01,1,194.16,194.01,194.02,41.392784,0.014365,0.016006,-0.001641,19.49495,34.511785,-287.0,194.165066,194.052,193.938934,0.15,0.149805,0.151071,0.126487,0.104307,97.871992,13.100725,0.073232,-90.909091,-5.2e-05,194.032791,194.028488
43,1688371230451172473,1688371230451005195,1,2,32,T,A,0,194.01,100,130,167278,339549,194.01,194.16,100,400,1,1,AAPL,194.01,100,194.16,194.01,194.01,41.392784,0.008601,0.014525,-0.005924,4.444444,19.494949,-287.0,194.164727,194.0525,193.940273,0.15,0.149903,0.150857,0.128839,0.106591,97.94458,11.893862,0.066486,-61.22449,0.0,194.032273,194.031073
44,1688371230451995982,1688371230451829005,1,2,32,T,A,0,194.0,3075,0,166977,339553,194.0,194.16,3119,400,5,1,AAPL,194.0,3075,194.16,194.0,194.01,39.997126,0.003189,0.012257,-0.009069,2.222222,8.720539,-3362.0,194.164727,194.0525,193.940273,0.16,0.154951,0.152686,0.131955,0.109262,97.267563,10.755675,0.65822,-81.081081,-5.2e-05,194.031556,194.05967


In [None]:
df_with_indicators.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59271 entries, 0 to 59270
Data columns (total 47 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   ts_recv        59271 non-null  int64  
 1   ts_event       59271 non-null  int64  
 2   rtype          59271 non-null  int64  
 3   publisher_id   59271 non-null  int64  
 4   instrument_id  59271 non-null  int64  
 5   action         59271 non-null  object 
 6   side           59271 non-null  object 
 7   depth          59271 non-null  int64  
 8   price          59271 non-null  float64
 9   size           59271 non-null  int64  
 10  flags          59271 non-null  int64  
 11  ts_in_delta    59271 non-null  int64  
 12  sequence       59271 non-null  int64  
 13  bid_px_00      59271 non-null  float64
 14  ask_px_00      59271 non-null  float64
 15  bid_sz_00      59271 non-null  int64  
 16  ask_sz_00      59271 non-null  int64  
 17  bid_ct_00      59271 non-null  int64  
 18  ask_ct

#### Create the Trading Environment class for the PPO Agent

In [None]:
import gym
from gym import spaces
import numpy as np
import pandas as pd

class TradingEnvironment(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, data, daily_trading_limit):
        super(TradingEnvironment, self).__init__()
        self.data = data
        self.daily_trading_limit = daily_trading_limit
        self.current_step = 0

        # Extract state columns
        self.state_columns = ['Close', 'Volume', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist', 'Stoch_k', 'Stoch_d',
                              'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI']

        # Initialize balance, shares held, and total shares traded
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0

        # Define action space: [Hold, Buy, Sell]
        self.action_space = spaces.Discrete(3)

        # Define observation space based on state columns
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(len(self.state_columns),), dtype=np.float32
        )

    def reset(self):
        self.current_step = 0
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0
        self.cumulative_reward = 0
        self.trades = []
        return self._next_observation()

    def _next_observation(self):
        return self.data[self.state_columns].iloc[self.current_step].values

    def step(self, action):
        expected_price = self.data.iloc[self.current_step]['ask_px_00']
        actual_price = self.data.iloc[self.current_step]['price']
        transaction_time = self.data.iloc[self.current_step]['ts_in_delta']
        self._take_action(action)
        reward = 0

        if self.current_step >= len(self.data) - 1:
            self.current_step = 0
        if action != 0:
            transaction_cost = self._calculate_transaction_cost(self.data.iloc[self.current_step]['Volume'], 0.3, self.data['Volume'].mean())
            reward = self._calculate_reward(expected_price, actual_price, transaction_time, transaction_cost)
            self.cumulative_reward += reward
            if self.trades:
                self.trades[-1]['reward'] = reward
                self.trades[-1]['transaction_cost'] = transaction_cost
                self.trades[-1]['slippage'] = expected_price - actual_price
                self.trades[-1]['time_penalty'] = 100*transaction_time/1e9
        done = self.current_step == len(self.data) - 1
        obs = self._next_observation()
        info = {
        'step': self.current_step,
        'action': action,
        'price': actual_price,
        'shares': self.trades[-1]['shares'] if self.trades else 0
    }
        self.current_step += 1

        return obs, reward, done, info

    def _take_action(self, action):
        current_price = self.data.iloc[self.current_step]['Close']
        current_time = pd.to_datetime(self.data.iloc[self.current_step]['ts_event'])
        trade_info = {'step': self.current_step, 'timestamp': current_time, 'action': action, 'price': current_price, 'shares': 0, 'reward': 0, 'transaction_cost': 0, 'slippage': 0, 'time_penalty': 0}

        if action == 1: # and self.total_shares_traded < self.daily_trading_limit:  # Buy
            shares_bought = (self.balance * np.random.uniform(0.001, 0.005)) // current_price
            self.balance -= shares_bought * current_price
            self.shares_held += shares_bought
            self.total_shares_traded += shares_bought
            trade_info['shares'] = shares_bought
            if(shares_bought>0):
                self.trades.append(trade_info)
        elif action == 2: # and self.total_shares_traded < self.daily_trading_limit:  # Sell
            shares_sold = min((self.balance * np.random.uniform(0.001, 0.005)) // current_price, self.shares_held)
            self.balance += shares_sold * current_price
            self.shares_held -= shares_sold
            self.total_shares_traded -= shares_sold
            trade_info['shares'] = shares_sold
            if(shares_sold>0):
                self.trades.append(trade_info)

    def _calculate_reward(self, expected_price, actual_price, transaction_time, transaction_cost):
        slippage = expected_price - actual_price
        time_penalty = 100*transaction_time/1e9
        reward = - (slippage + time_penalty + transaction_cost)
        return reward

    def _calculate_transaction_cost(self, volume, volatility, daily_volume):
        return volatility * np.sqrt(volume / daily_volume)

    def run(self):
        self.reset()
        for _ in range(len(self.data)):
            self.step()
        return self.cumulative_reward, self.trades

    def render(self, mode='human', close=False):
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(f'Shares held: {self.shares_held}')
        print(f'Total shares traded: {self.total_shares_traded}')
        print(f'Total portfolio value: {self.balance + self.shares_held * self.data.iloc[self.current_step]["Close"]}')
        print(f'Cumulative reward: {self.cumulative_reward}')
        self.print_trades()

    def print_trades(self):
        # download all trades in a pandas dataframe using .csv
        trades_df = pd.DataFrame(self.trades)
        # Save a csv
        trades_df.to_csv('trades_ppo.csv', index=False)
        for trade in self.trades:
            print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")

#### Train the PPO Agent with the environment and for different tickers.

In [None]:
# Define the daily trading limit (total number of shares to trade per day)
daily_trading_limit = 1000

ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

env = TradingEnvironment(ticker_data, daily_trading_limit)  # Adjust window_size if needed

In [None]:
import pandas as pd
from stable_baselines3 import PPO

# Define the daily trading limit (total number of shares to trade per day)
daily_trading_limit = 1000

ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

# Create the trading environment
env = TradingEnvironment(ticker_data, daily_trading_limit)  # Adjust window_size if needed

# Define the best hyperparameters
best_hyperparameters = {'learning_rate': 0.0009931989008886031,'n_steps': 512,'batch_size': 128, 'gamma': 0.9916829193042708,'clip_range': 0.21127653449387027,'n_epochs': 6} # type: ignore

# Create the RL model with the best hyperparameters
model = PPO('MlpPolicy', env, verbose=1, **best_hyperparameters)

# Train the model
model.learn(total_timesteps=10000)

# Save the model
model.save("trading_agent")

# Evaluate the model
obs = env.reset()
for _ in range(len(ticker_data)):
    action, _states = model.predict(obs)
    obs, rewards, done, info = env.step(action)
    if done:
        break

# Render the final state
env.render()

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Step: 3996, Timestamp: 2023-07-03 13:30:01.378986146, Action: 1, Price: 193.8, Shares: 78.0, Reward: -0.10878004241219211, Transaction Cost: 0.056792142412190966, Slippage: 0.030000000000001137, Time Penalty: 0.0219879
Step: 3997, Timestamp: 2023-07-03 13:30:01.378986146, Action: 2, Price: 193.8, Shares: 107.0, Reward: -0.07738611820430272, Transaction Cost: 0.025398218204301576, Slippage: 0.030000000000001137, Time Penalty: 0.0219879
Step: 3998, Timestamp: 2023-07-03 13:30:01.378986146, Action: 2, Price: 193.8, Shares: 83.0, Reward: -0.08790640464463567, Transaction Cost: 0.03591850464463453, Slippage: 0.030000000000001137, Time Penalty: 0.0219879
Step: 3999, Timestamp: 2023-07-03 13:30:01.378986146, Action: 2, Price: 193.8, Shares: 100.0, Reward: -0.07738611820430272, Transaction Cost: 0.025398218204301576, Slippage: 0.030000000000001137, Time Penalty: 0.0219879
Step: 4000, Timestamp: 2023-07-03 13:30:01.378986146, Acti

### TRADING BLOTTER:

#### Preprocess the data for the trading blotter:

In [None]:
import pandas as pd
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt

INITIAL_CASH = 10_000_000  # $10 million

def preprocess_data(df):
    df['liquidity'] = df['bid_sz_00'] * df['bid_px_00'] + df['ask_sz_00'] * df['ask_px_00']
    return df

def calculate_rsi(data, window=14):
    delta = data.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def calculate_vol_and_liquidity(price_df, volume_df, window_size):
    # Calculate rolling statistics
    rolling_mean_vol = price_df.pct_change().rolling(window=window_size).mean()
    rolling_std_vol = price_df.pct_change().rolling(window=window_size).std()
    rolling_mean_liq = volume_df.rolling(window=window_size).mean()
    rolling_std_liq = volume_df.rolling(window=window_size).std()

    return rolling_mean_vol, rolling_std_vol, rolling_mean_liq, rolling_std_liq

def get_percentile(current_value, mean, std):
    if std > 0:
        z_score = (current_value - mean) / std
        percentile = norm.cdf(z_score)
    else:
        percentile = 0.5  # No variation
    return percentile

def get_trade_price(base_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction):
    vol_percentile = get_percentile(current_vol, mean_vol, std_vol)
    liq_percentile = get_percentile(current_liq, mean_liq, std_liq)

    # Define price adjustment scenarios based on market conditions
    if vol_percentile >= 0.9 and liq_percentile < 0.1:
        price_adjustment_percent = np.random.uniform(-0.25, -0.15)
    elif vol_percentile <= 0.1 and liq_percentile < 0.1:
        price_adjustment_percent = np.random.uniform(-0.10, -0.05)
    elif vol_percentile >= 0.9 and liq_percentile >= 0.9:
        price_adjustment_percent = np.random.uniform(-0.05, +0.10)
    else:
        price_adjustment_percent = np.random.uniform(-0.05, +0.05)  # Default for normal conditions

    # Adjust price based on trade direction
    if trade_direction == 'BUY':
        adjusted_price = base_price * (1 - price_adjustment_percent)
    else:  # SELL
        adjusted_price = base_price * (1 + price_adjustment_percent)

    return adjusted_price


  and should_run_async(code)


#### Create trading environment for the blotter

In [None]:
class TradingEnvironmentwithBlotter:
    def __init__(self, data, daily_trading_limit, window_size):
        self.data = preprocess_data(data)
        self.daily_trading_limit = daily_trading_limit
        self.window_size = window_size
        self.state_columns = ['price', 'liquidity', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist', 'Stoch_k', 'Stoch_d',
                              'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI']
        self.reset()

    def reset(self):
        self.current_step = 0
        self.balance = INITIAL_CASH
        self.shares_held = 0
        self.total_shares_traded = 0
        self.cumulative_reward = 0
        self.trades = []
        self.portfolio = {'cash': self.balance, 'holdings': {ticker: 0 for ticker in self.data['symbol'].unique()}}
        self.data['RSI'] = calculate_rsi(self.data['price'])
        self.data['pct_change'] = self.data['price'].pct_change()
        self.data['rolling_mean_vol'], self.data['rolling_std_vol'], self.data['rolling_mean_liq'], self.data['rolling_std_liq'] = calculate_vol_and_liquidity(self.data['price'], self.data['liquidity'], self.window_size)

    def step(self):
        row = self.data.iloc[self.current_step]
        current_price = row['price']
        current_time = pd.to_datetime(row['ts_event'])
        current_rsi = row['RSI']
        current_vol = row['pct_change']
        current_liq = row['liquidity']
        mean_vol = row['rolling_mean_vol']
        std_vol = row['rolling_std_vol']
        mean_liq = row['rolling_mean_liq']
        std_liq = row['rolling_std_liq']

        if current_rsi < 30:  # Entry signal based on RSI
            trade_direction = 'BUY'
            trade_price = get_trade_price(current_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction)
            trade_size = (self.portfolio['cash'] * np.random.uniform(0.001, 0.005)) / trade_price
            if self.portfolio['cash'] >= trade_size * trade_price:
                self.portfolio['cash'] -= trade_size * trade_price
                self.portfolio['holdings'][row['symbol']] += trade_size
                trade_status = 'filled'
            else:
                trade_status = 'cancelled'
        elif current_rsi > 70:  # Exit signal based on RSI
            trade_direction = 'SELL'
            if self.portfolio['holdings'][row['symbol']] > 0:
                trade_size = min(self.portfolio['holdings'][row['symbol']], self.portfolio['cash']*np.random.uniform(0.001, 0.005) / current_price)
                trade_price = get_trade_price(current_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction)
                self.portfolio['cash'] += trade_size * trade_price
                self.portfolio['holdings'][row['symbol']] -= trade_size
                trade_status = 'filled'
            else:
                trade_size = 0
                trade_status = 'cancelled'
        else:
            trade_direction = 'HOLD'
            trade_size = 0
            trade_price = current_price
            trade_status = 'skipped'

        if trade_size > 0:
            expected_price = row['ask_px_00']
            actual_price = row['price']
            transaction_time = row['ts_in_delta']
            transaction_cost = self._calculate_transaction_cost(row['Volume'], 0.3, self.data['Volume'].mean())
            slippage = expected_price - actual_price
            time_penalty = 1000 * transaction_time / 1e9
            reward = - (slippage + time_penalty + transaction_cost)

            self.cumulative_reward += reward
            self.trades.append({
                'step': self.current_step,
                'timestamp': current_time,
                'action': trade_direction,
                'price': trade_price,
                'shares': trade_size,
                'symbol': row['symbol'],
                'reward': reward,
                'transaction_cost': transaction_cost,
                'slippage': slippage,
                'time_penalty': time_penalty
            })

        self.current_step += 1
        if self.current_step >= len(self.data) - 1:
            done=True
            self.current_step = 0

    def _calculate_transaction_cost(self, volume, volatility, daily_volume):
        return volatility * np.sqrt(volume / daily_volume)

    def run(self):
        self.reset()
        for _ in range(len(self.data)):
            self.step()
        return self.cumulative_reward, self.trades

    def render(self):
        print(f'Cumulative reward: {self.cumulative_reward}')
        row = self.data.iloc[self.current_step]
        print(f'Total portfolio value: {self.portfolio["cash"] + self.portfolio["holdings"][row["symbol"]]*row["Close"]}')
        # get trades in a pandas dataframe
        trades_df = pd.DataFrame(self.trades)
        # Save a csv
        trades_df.to_csv('trades_blotter.csv', index=False)
        for trade in self.trades:
            print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Symbol: {trade['symbol']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")

#### Run the trading blotter

In [None]:
# Filter data for the specified ticker
ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

window_size = 60
daily_trading_limit = 1000
# Create the trading environment
env = TradingEnvironmentwithBlotter(ticker_data, daily_trading_limit=1000, window_size=window_size)  # Daily trading limit of 1000 shares

# Run the environment
cumulative_reward, trades = env.run()

# Render the results
env.render()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Step: 49815, Timestamp: 2023-07-03 16:29:54.454344746, Action: SELL, Price: 187.31125220816193, Shares: 0.13492620282188297, Symbol: AAPL, Reward: -0.25111421803554473, Transaction Cost: 0.08031621803554471, Slippage: 0.0, Time Penalty: 0.170798
Step: 49816, Timestamp: 2023-07-03 16:29:54.454344746, Action: SELL, Price: 186.94287190756486, Shares: 0.34572552842459076, Symbol: AAPL, Reward: -0.19619621820430158, Transaction Cost: 0.025398218204301576, Slippage: 0.0, Time Penalty: 0.170798
Step: 49817, Timestamp: 2023-07-03 16:29:54.454344746, Action: SELL, Price: 199.12917046681218, Shares: 0.33091650857639937, Symbol: AAPL, Reward: -0.42478018204301576, Transaction Cost: 0.25398218204301576, Slippage: 0.0, Time Penalty: 0.170798
Step: 49818, Timestamp: 2023-07-03 16:29:54.474421200, Action: SELL, Price: 186.553443722903, Shares: 0.35501132841058003, Symbol: AAPL, Reward: -0.4312691820430067, Transaction Cost: 0.2539821820

In [None]:
df=market_features_df.copy()

  and should_run_async(code)


In [None]:
df['timestamp']=pd.to_datetime(df['ts_recv'])

In [None]:
df.head()

  and should_run_async(code)


Unnamed: 0,ts_recv,ts_event,rtype,publisher_id,instrument_id,action,side,depth,price,size,flags,ts_in_delta,sequence,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_ct_00,ask_ct_00,symbol,Close,Volume,High,Low,Open,RSI,MACD,MACD_signal,MACD_hist,Stoch_k,Stoch_d,OBV,Upper_BB,Middle_BB,Lower_BB,ATR_1,ATR_2,ATR_5,ATR_10,ATR_20,ADX,+DI,-DI,CCI,DLR,TWAP,VWAP,timestamp
35,1688371214386057385,1688371214385893078,1,2,32,T,N,0,194.05,50,130,164307,326232,194.0,194.3,3101,19,4,10,AAPL,194.05,50,194.3,194.0,194.05,54.544543,0.006271,-0.00313,0.009401,52.525253,61.952862,-266.0,194.065621,194.017,193.968379,0.3,0.175078,0.098615,0.075141,0.072403,97.257397,30.435801,0.196362,166.666667,0.0,194.02,194.021894,2023-07-03 08:00:14.386057385
36,1688371214386063777,1688371214385899379,1,2,32,T,N,0,194.05,50,130,164398,326233,194.0,194.3,3101,19,4,10,AAPL,194.05,50,194.3,194.0,194.05,54.544543,0.007108,-0.001082,0.00819,38.383838,52.525253,-266.0,194.06899,194.02,193.97101,0.3,0.237539,0.138892,0.097627,0.083783,97.361721,22.989295,0.14832,83.333333,0.0,194.020811,194.025188,2023-07-03 08:00:14.386063777
37,1688371215804852019,1688371215804687301,1,2,32,T,B,0,194.21,10,130,164718,328131,194.0,194.21,3101,29,4,1,AAPL,194.21,10,194.21,194.0,194.05,85.890753,0.020446,0.003223,0.017223,40.40404,43.771044,-256.0,194.125889,194.0305,193.935111,0.21,0.22377,0.153114,0.108864,0.090094,97.458593,19.409454,0.125224,79.268293,0.000824,194.025789,194.025596,2023-07-03 08:00:15.804852019
38,1688371219671476629,1688371219671312224,1,2,32,T,N,0,194.14,10,130,164405,331406,194.0,194.16,3101,400,4,1,AAPL,194.14,10,194.16,194.0,194.21,64.827662,0.025079,0.007594,0.017484,49.494949,42.760943,-266.0,194.142928,194.0375,193.932072,0.21,0.216885,0.164491,0.118978,0.096089,97.548546,16.622008,0.10724,-3.205128,-0.00036,194.028718,194.025873,2023-07-03 08:00:19.671476629
39,1688371223368835585,1688371223368671235,1,2,32,T,B,0,194.13,10,130,164350,334235,194.0,194.13,3101,400,4,1,AAPL,194.13,10,194.13,194.0,194.14,62.470772,0.027625,0.011601,0.016025,57.575758,49.158249,-276.0,194.155247,194.044,193.932753,0.14,0.178442,0.159593,0.12108,0.098285,97.632074,15.068361,0.097216,-113.095238,-5.2e-05,194.03125,194.026071,2023-07-03 08:00:23.368835585


# Transformer Implementation

## Setting up the Environment

In [None]:
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW, get_linear_schedule_with_warmup
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from torch.cuda.amp import autocast, GradScaler
from sklearn.metrics import accuracy_score, f1_score
from sklearn.utils.class_weight import compute_class_weight
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings


# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Suppress warnings
warnings.filterwarnings("ignore")

In [None]:
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')
model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=3)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
 

## Data Preparation and Preprocessing for Transformer

In [None]:
ticker = 'AAPL'  # choice of ticker for trading

ticker_data = market_features_df[market_features_df['symbol'] == ticker].copy()

# Define the label based on the Close price movement
ticker_data['label'] = np.where(
    ticker_data['Close'].shift(-1) > ticker_data['Close'], 2,  # Buy
    np.where(
        ticker_data['Close'].shift(-1) < ticker_data['Close'], 0,  # Sell
        1  # Hold
    )
)

In [None]:
# Print the count of each label (Buy, Hold, Sell)
label_counts = ticker_data['label'].value_counts()
print(label_counts)

label
1    45372
0     7130
2     6734
Name: count, dtype: int64


In [None]:
# Data Splitting
train_data, test_data = train_test_split(ticker_data, test_size=0.2, shuffle=False)

train_data = train_data.iloc[:3000]
test_data = test_data.iloc[:600]

In [None]:
class Transformer_Trade_Data(Dataset):
    def __init__(self, dataframe, tokenizer, max_length):
        self.dataframe = dataframe
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        row = self.dataframe.iloc[index]

        # Prepare the text input for the model
        text_features = [
            f"{col}: {row[col]:.4f}" if isinstance(row[col], (int, float))
            else f"{col}: {row[col]}"
            for col in self.dataframe.columns if col != 'label'
        ]
        text_input = ' '.join(text_features)

        # Tokenize the input text
        encoded_inputs = self.tokenizer.encode_plus(
            text_input,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            return_attention_mask=True,
            truncation=True
        )

        # Return a dictionary of tensors
        return {
            'input_ids': torch.tensor(encoded_inputs['input_ids'], dtype=torch.long),
            'attention_mask': torch.tensor(encoded_inputs['attention_mask'], dtype=torch.long),
            'labels': torch.tensor(row['label'], dtype=torch.long)
        }

## Dataset and DataLoader Creation

In [None]:
train_dataset = Transformer_Trade_Data(train_data, tokenizer, max_length=128)
test_dataset = Transformer_Trade_Data(test_data, tokenizer, max_length=128)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

## Model Traning and Evaluation Class

In [None]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"


In [None]:
class TransformerModel:
    def __init__(self, model_name, num_labels, train_loader, test_loader, device='cuda'):
        self.tokenizer = tokenizer
        self.model = model
        self.device = device
        self.train_loader = train_loader
        self.test_loader = test_loader

        # Initialize optimizer and scheduler
        self.optimizer = AdamW(self.model.parameters(), lr=1e-5)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=1, gamma=0.9)
        self.scaler = GradScaler()

        # Compute class weights
        self.class_weights = self.compute_class_weights()

    def compute_class_weights(self):
        all_labels = []
        for batch in self.train_loader:
            labels = batch['labels'].numpy()
            if labels.ndim == 0:  # If it's a scalar, convert to a 1D array
                labels = np.array([labels])
            all_labels.append(labels)
        all_labels = np.concatenate(all_labels)
        class_weights = compute_class_weight('balanced', classes=np.unique(all_labels), y=all_labels)
        return torch.tensor(class_weights, dtype=torch.float).to(self.device)

    def train(self, num_epochs):
        for epoch in range(num_epochs):
            self.model.train()
            total_loss = 0
            all_preds = []
            all_labels = []

            progress_bar = tqdm(self.train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")

            for batch in progress_bar:
                self.optimizer.zero_grad()

                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                labels = batch['labels'].to(self.device)

                with autocast():
                    outputs = self.model(input_ids, attention_mask=attention_mask)
                    logits = outputs.logits
                    loss = torch.nn.functional.cross_entropy(logits, labels, weight=self.class_weights)

                self.scaler.scale(loss).backward()
                self.scaler.step(self.optimizer)
                self.scaler.update()

                total_loss += loss.item()

                preds = torch.argmax(logits, dim=1).cpu().numpy()
                labels_cpu = labels.cpu().numpy()
                all_preds.extend(preds)
                all_labels.extend(labels_cpu)

                progress_bar.set_postfix({'loss': f"{loss.item():.4f}"})

            epoch_accuracy = accuracy_score(all_labels, all_preds)
            self.scheduler.step()
            print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {total_loss/len(self.train_loader):.4f}, Accuracy: {epoch_accuracy:.4f}")

    def evaluate(self):
        self.model.eval()
        predictions = []
        actual_labels = []

        with torch.no_grad():
            for batch in self.test_loader:
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                labels = batch['labels'].to(self.device)

                outputs = self.model(input_ids, attention_mask=attention_mask)
                _, preds = torch.max(outputs.logits, dim=1)

                predictions.extend(preds.cpu().tolist())
                actual_labels.extend(labels.cpu().tolist())

        accuracy = accuracy_score(actual_labels, predictions)
        print(f"Test Accuracy: {accuracy:.4f}")



In [None]:
# Instantiate and train the model
model = TransformerModel(model, num_labels=3, train_loader=train_loader, test_loader=test_loader, device=device)
model.train(num_epochs=4)
model.evaluate()

Epoch 1/4: 100%|██████████| 8/8 [00:04<00:00,  1.80it/s, loss=1.1015]


Epoch 1/4, Average Loss: 1.0998, Accuracy: 0.3450


Epoch 2/4: 100%|██████████| 8/8 [00:04<00:00,  1.63it/s, loss=1.0982]


Epoch 2/4, Average Loss: 1.1002, Accuracy: 0.3420


Epoch 3/4: 100%|██████████| 8/8 [00:04<00:00,  1.68it/s, loss=1.0980]


Epoch 3/4, Average Loss: 1.0989, Accuracy: 0.3240


Epoch 4/4: 100%|██████████| 8/8 [00:05<00:00,  1.55it/s, loss=1.0976]


Epoch 4/4, Average Loss: 1.0991, Accuracy: 0.4020
Test Accuracy: 0.6700


In [None]:
torch.save(model.model.state_dict(), "/content/gdrive/My Drive/task/transformer_distil_model.pth")

## Transformer Trading Environment

In [None]:
class TradingEnvironmentwithDistilBERT:
    def __init__(self, data, daily_trading_limit, model, tokenizer, device):
        self.data = data
        self.daily_trading_limit = daily_trading_limit
        self.model = model
        self.tokenizer = tokenizer
        self.device = device
        self.state_columns = ['Close', 'Volume', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist', 'Stoch_k', 'Stoch_d',
                              'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI']
        self.reset()

    def reset(self):
        self.current_step = 0
        self.balance = INITIAL_CASH
        self.shares_held = 0
        self.total_shares_traded = 0
        self.cumulative_reward = 0
        self.trades = []
        self.portfolio = {'cash': self.balance, 'holdings': {ticker: 0 for ticker in self.data['symbol'].unique()}}
        print("Environment reset.")

    def transformer_trading_simulation(self, state):
        text = ' '.join([f"{col}: {state[col]:.4f}" if isinstance(state[col], (int, float)) else f"{col}: {state[col]}"
                 for col in self.state_columns])
        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=128,
            padding='max_length',
            return_token_type_ids=True,
            truncation=True
        )
        input_ids = torch.tensor(inputs['input_ids'], dtype=torch.long).unsqueeze(0).to(self.device)
        attention_mask = torch.tensor(inputs['attention_mask'], dtype=torch.long).unsqueeze(0).to(self.device)

        with torch.no_grad():
            outputs = self.model(input_ids, attention_mask=attention_mask)
            _, preds = torch.max(outputs.logits, dim=1)

        action = ['SELL', 'HOLD', 'BUY'][preds.item()]
        return action


    def step(self):
        if self.current_step >= len(self.data) - 1:
            return None, None, True, {}

        row = self.data.iloc[self.current_step]
        current_price = row['Close']
        current_time = pd.to_datetime(row['ts_event'])

        state = row[self.state_columns]
        action = self.transformer_trading_simulation(state)

        trade_size = 0
        trade_price = current_price
        trade_status = 'skipped'

        if action == 'BUY':
            max_shares = min(self.portfolio['cash'] // current_price, self.daily_trading_limit - self.total_shares_traded)
            trade_size = (self.portfolio['cash'] * np.random.uniform(0.001, 0.005))
            if trade_size > 0 and self.portfolio['cash'] >= trade_size * current_price:
                self.portfolio['cash'] -= trade_size * current_price
                self.portfolio['holdings'][row['symbol']] += trade_size
                self.total_shares_traded += trade_size
                trade_status = 'filled'
            else:
                trade_status = 'cancelled'
        elif action == 'SELL':
            max_shares = min(self.portfolio['holdings'][row['symbol']], self.daily_trading_limit - self.total_shares_traded)
            trade_size = min(self.portfolio['holdings'][row['symbol']], self.portfolio['cash']*np.random.uniform(0.001, 0.005) / current_price)
            if trade_size > 0:
                self.portfolio['cash'] += trade_size * current_price
                self.portfolio['holdings'][row['symbol']] -= trade_size
                self.total_shares_traded += trade_size
                trade_status = 'filled'
            else:
                trade_status = 'cancelled'


        reward = 0
        if trade_size > 0:
            expected_price = row['ask_px_00'] if action == 'BUY' else row['bid_px_00']
            actual_price = row['price']
            transaction_time = row['ts_in_delta']
            transaction_cost = self._calculate_transaction_cost(row['Volume'], 0.3, self.data['Volume'].mean())
            slippage = abs(expected_price - actual_price)
            time_penalty = 1000 * transaction_time / 1e9
            reward = - (slippage + time_penalty + transaction_cost)

            self.cumulative_reward += reward
            self.trades.append({
                'step': self.current_step,
                'timestamp': current_time,
                'action': action,
                'price': trade_price,
                'shares': trade_size,
                'symbol': row['symbol'],
                'reward': reward,
                'transaction_cost': transaction_cost,
                'slippage': slippage,
                'time_penalty': time_penalty,
                'status': trade_status
            })

        self.current_step += 1
        done = self.current_step >= len(self.data) - 1
        info = {
            'portfolio_value': self.portfolio['cash'] + sum(self.portfolio['holdings'][s] * self.data.iloc[self.current_step]['Close'] for s in self.portfolio['holdings']),
            'action': action,
            'trade_size': trade_size,
            'trade_status': trade_status
        }

        return state, reward, done, info

    def _calculate_transaction_cost(self, volume, volatility, daily_volume):
        return volatility * np.sqrt(volume / daily_volume)

    def run(self):
        self.reset()
        while True:
            state, reward, done, info = self.step()
            if done:
                break
        return self.cumulative_reward, self.trades

    def render(self):
        print(f'Cumulative reward: {self.cumulative_reward}')
        print(f'Total portfolio value: {self.portfolio["cash"] + sum(self.portfolio["holdings"].values())}')
        trades_df = pd.DataFrame(self.trades)
        trades_df.to_csv('trades_distilbert.csv', index=False)
        for trade in self.trades:
            print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Symbol: {trade['symbol']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")


In [None]:

ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker][:1000]

daily_trading_limit = 1000
INITIAL_CASH = 10_000_000  # $10 million
env = TradingEnvironmentwithDistilBERT(
    data=ticker_data,
    daily_trading_limit=daily_trading_limit,
    model=model.model,
    tokenizer=tokenizer,
    device=device
)

# Run the environment
cumulative_reward, trades = env.run()

# Render the results
env.render()

Environment reset.
Environment reset.
Cumulative reward: -144.31612272190895
Total portfolio value: 9994226.654272003
Step: 55, Timestamp: 2023-07-03 08:02:38.530006625, Action: BUY, Price: 194.19, Shares: 33915.99915244341, Symbol: AAPL, Reward: -0.5668332062883528, Transaction Cost: 0.3315332062883595, Slippage: 0.06999999999999318, Time Penalty: 0.1653
Step: 56, Timestamp: 2023-07-03 08:02:39.084188366, Action: SELL, Price: 194.18, Shares: 82.41024507122154, Symbol: AAPL, Reward: -0.3575994177411248, Transaction Cost: 0.1933154177411248, Slippage: 0.0, Time Penalty: 0.164284
Step: 57, Timestamp: 2023-07-03 08:02:43.952770224, Action: SELL, Price: 194.19, Shares: 23.91431169352097, Symbol: AAPL, Reward: -0.5374812062883516, Transaction Cost: 0.3315332062883595, Slippage: 0.03999999999999204, Time Penalty: 0.165948
Step: 58, Timestamp: 2023-07-03 08:02:43.952770224, Action: SELL, Price: 194.2, Shares: 31.54948417947289, Symbol: AAPL, Reward: -0.5474812062883425, Transaction Cost: 0.33

## Analysing and Comparing the Performance

In [None]:


class TradingPerformanceAnalyzer:
    def __init__(self, transformer_trades, simple_trades_file):
        self.transformer_trades = transformer_trades
        self.simple_trades_df = pd.read_csv(simple_trades_file)

    def compare_strategies(self):
        transformer_actions = [trade['action'] for trade in self.transformer_trades]
        simple_strategy_actions = self.simple_trades_df['action'].tolist()[:len(transformer_actions)]

        print("\n\033[1mStrategy Comparison: Transformer vs Simple\033[0m")
        print("=" * 50)
        if len(transformer_actions) > 0:
            for i in range(min(15, len(transformer_actions))):
                print(f"Step {i+1:2}: \033[94mTransformer: {transformer_actions[i]:>8}\033[0m | "
                      f"\033[91mSimple: {simple_strategy_actions[i]:>8}\033[0m")
        else:
            print("\033[93mNo actions were taken by the transformer model.\033[0m")

    def analyze_performance(self):
        print("\n\033[1mPerformance Summary\033[0m")
        print("=" * 50)

        if len(self.transformer_trades) > 0:
            total_reward_transformer = sum(trade['reward'] for trade in self.transformer_trades)
            total_trades_transformer = len(self.transformer_trades)
            profitable_trades_transformer = sum(1 for trade in self.transformer_trades if trade['reward'] > 0)

            print(f"\033[94mTransformer Model:\033[0m")
            print(f"  \033[96mTotal Profit        : ${total_reward_transformer:.2f}\033[0m")
            print(f"  \033[96mTotal Trades        : {total_trades_transformer}\033[0m")
            print(f"  \033[96mProfitable Trades   : {profitable_trades_transformer} "
                  f"({profitable_trades_transformer / total_trades_transformer * 100:.2f}%)\033[0m")
            print(f"  \033[96mAvg Profit per Trade: ${total_reward_transformer / total_trades_transformer:.2f}\033[0m")
        else:
            print("\033[93mNo trades were executed by the transformer model. Unable to provide performance summary.\033[0m")

        if len(self.simple_trades_df) > 0:
            total_reward_simple = self.simple_trades_df['reward'][:len(self.transformer_trades)].sum()
            total_trades_simple = len(self.transformer_trades)
            profitable_trades_simple = self.simple_trades_df[self.simple_trades_df['reward'] > 0].shape[0]

            print(f"\n\033[91mSimple Strategy:\033[0m")
            print(f"  \033[95mTotal Profit        : ${total_reward_simple:.2f}\033[0m")
            print(f"  \033[95mTotal Trades        : {total_trades_simple}\033[0m")
            print(f"  \033[95mProfitable Trades   : {profitable_trades_simple} "
                  f"({profitable_trades_simple / total_trades_simple * 100:.2f}%)\033[0m")
            print(f"  \033[95mAvg Profit per Trade: ${total_reward_simple / total_trades_simple:.2f}\033[0m")

    def visualize_performance(self):
        if len(self.transformer_trades) > 0:
            transformer_steps = [trade['step'] for trade in self.transformer_trades]
            transformer_rewards = [trade['reward'] for trade in self.transformer_trades]
            transformer_cumulative_rewards = np.cumsum(transformer_rewards)

            simple_steps = self.simple_trades_df['step'][:len(self.transformer_trades)]
            simple_rewards = self.simple_trades_df['reward'][:len(self.transformer_trades)]
            simple_cumulative_rewards = np.cumsum(simple_rewards)

            fig = go.Figure()

            fig.add_trace(go.Scatter(
                x=transformer_steps,
                y=transformer_rewards,
                mode='lines+markers',
                name='Trade Rewards (Transformer)',
                line=dict(color='blue')
            ))

            fig.add_trace(go.Scatter(
                x=transformer_steps,
                y=transformer_cumulative_rewards,
                mode='lines',
                name='Cumulative Reward (Transformer)',
                line=dict(color='darkblue')
            ))

            fig.add_trace(go.Scatter(
                x=simple_steps,
                y=simple_rewards,
                mode='lines+markers',
                name='Trade Rewards (Simple)',
                line=dict(color='red')
            ))

            fig.add_trace(go.Scatter(
                x=simple_steps,
                y=simple_cumulative_rewards,
                mode='lines',
                name='Cumulative Reward (Simple)',
                line=dict(color='darkred')
            ))

            fig.update_layout(
                title='Trading Performance Comparison: Transformer vs Simple Strategy',
                xaxis_title='Trading Steps',
                yaxis_title='Reward',
                legend_title='Strategy'
            )

            fig.show()
        else:
            print("\033[93mNo trades to visualize.\033[0m")


# Initialize the analyzer
analyzer = TradingPerformanceAnalyzer(
    transformer_trades=trades,
    simple_trades_file='trades_blotter.csv'
)

# Compare strategies
analyzer.compare_strategies()

# Analyze performance
analyzer.analyze_performance()

# Visualize performance
analyzer.visualize_performance()


[1mStrategy Comparison: Transformer vs Simple[0m
Step  1: [94mTransformer:      BUY[0m | [91mSimple:      BUY[0m
Step  2: [94mTransformer:     SELL[0m | [91mSimple:     SELL[0m
Step  3: [94mTransformer:     SELL[0m | [91mSimple:      BUY[0m
Step  4: [94mTransformer:     SELL[0m | [91mSimple:      BUY[0m
Step  5: [94mTransformer:     SELL[0m | [91mSimple:      BUY[0m
Step  6: [94mTransformer:     SELL[0m | [91mSimple:      BUY[0m
Step  7: [94mTransformer:     SELL[0m | [91mSimple:     SELL[0m
Step  8: [94mTransformer:     SELL[0m | [91mSimple:     SELL[0m
Step  9: [94mTransformer:     SELL[0m | [91mSimple:     SELL[0m
Step 10: [94mTransformer:     SELL[0m | [91mSimple:     SELL[0m
Step 11: [94mTransformer:     SELL[0m | [91mSimple:     SELL[0m
Step 12: [94mTransformer:     SELL[0m | [91mSimple:     SELL[0m
Step 13: [94mTransformer:     SELL[0m | [91mSimple:      BUY[0m
Step 14: [94mTransformer:     SELL[0m | [91mSimple:      BUY