## CHEME 5660: Building a Trading Bot using Model-Free Reinforcement Learning

### Introduction

### Example setup

In [1]:
import Pkg; Pkg.activate("."); Pkg.resolve(); Pkg.instantiate();

[32m[1m  Activating[22m[39m project at `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Q-Learning-TradeBot-notebook`
[32m[1m  No Changes[22m[39m to `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Q-Learning-TradeBot-notebook/Project.toml`
[32m[1m  No Changes[22m[39m to `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Q-Learning-TradeBot-notebook/Manifest.toml`


In [2]:
# load req packages -
using DataFrames
using Dates
using FileIO
using JLD2
using PrettyTables
using Distributions
using Statistics

# setup paths -
const _ROOT = pwd();
const _PATH_TO_DATA = joinpath(_ROOT, "data");

In [3]:
include("CHEME-5660-Example-CodeLib.jl");

### Setup constants and other resources

In [4]:
# how many days of historical data are we using?
d = 12;         # we nₐ buy shares of XYZ for d-periods at the open of the training data 
lotsize = 10.0; # how many shares do we buy at the open?
nₐ = 2.0;       # how many shares do we want to buy, sell each day

# what does our training data start?
cursor = d;

# how many steps in an episode?
number_of_steps_per_episodes = 36;

# setup actions states -
actions = [1,2,3]; # buy, sell, hold

# trade-ledger 
ledger = Dict{DateTime,TransactionModel}();

In [5]:
# what is my prior data -
action_distribution = Categorical([0.60,0.10,0.30]); # when exploring, we are biased toward buying

#### Load and partition the OHLC price data set

In [6]:
# load the JLD2 portfolio data file -
price_data_dictionary = load(joinpath(_PATH_TO_DATA, "CHEME-5660-Portfolio-Q-learning-5min-11-20-22.jld2"))["dd"];

# we have these ticker symbols in our data set -
ticker_symbol_array = sort(keys(price_data_dictionary) |> collect);

# Partition the data into a training and prediction set
(price_training_dict, price_prediction_dict) = partition(price_data_dictionary; fraction=0.50);

#### Establish an initial position

In [7]:
# what ticker do we want to explore?
ticker_symbol = "AMD";

# get the df_training -
df_training = price_training_dict[ticker_symbol];

# tmp -
transactions = initialize(df_training; lotsize = lotsize, periods=d);

# load these transactions into my trade-ledger 
(NR,NC) = size(transactions);
for i ∈ 1:NR
    
    # get the time stamp of the trade -
    time = transactions[i,4];
    
    # build transaction object -
    transaction = build(TransactionModel, volume=transactions[i,1], price=transactions[i,3], sense=1);
    ledger[time] =  transaction;
end

In [8]:
ledger

Dict{DateTime, TransactionModel} with 10 entries:
  DateTime("2022-11-14T09:40:00") => TransactionModel(10, 73.9349, 1)
  DateTime("2022-11-14T09:20:00") => TransactionModel(10, 74.0508, 1)
  DateTime("2022-11-14T09:25:00") => TransactionModel(10, 73.9016, 1)
  DateTime("2022-11-14T09:10:00") => TransactionModel(10, 73.531, 1)
  DateTime("2022-11-14T09:50:00") => TransactionModel(10, 74.0052, 1)
  DateTime("2022-11-14T09:15:00") => TransactionModel(10, 74.0974, 1)
  DateTime("2022-11-14T09:00:00") => TransactionModel(10, 73.4995, 1)
  DateTime("2022-11-14T09:35:00") => TransactionModel(10, 73.9788, 1)
  DateTime("2022-11-14T09:30:00") => TransactionModel(10, 73.8187, 1)
  DateTime("2022-11-14T09:05:00") => TransactionModel(10, 73.3636, 1)

In [9]:
Sₒ = vwap(ledger)

73.81814496596536

#### Run a purely random agent

In [10]:
# whate are my timesteps -
timestamp_array = sort(df_training[:,:timestamp]) |> collect

for i ∈ 1:number_of_steps_per_episodes
    
    # pick a random action -
    aᵢ = rand(action_distribution);
    
    # grab the time stamp and price data -
    current_cursor = cursor + i
    event_open_timestamp = timestamp_array[current_cursor];
    event_close_timestamp = timestamp_array[current_cursor + 1];
    price_value = price(df_training, event_close_timestamp);
    
    # we implement the action now, we get the next price -
    sense_flag = 1;
    if (aᵢ == 2)
        sense_flag = -1;
    elseif (aᵢ == 2)
        sense_flag = 0;
    end;
    
    # build a proposed trade model -
    proposed_trade = build(TransactionModel, volume = nₐ, sense = sense_flag, price = price_value);
    
    # confirm this trade -
    if (confirm(ledger, proposed_trade) == true)
        ledger[event_open_timestamp] = proposed_trade;
    end
end

In [11]:
ledger

Dict{DateTime, TransactionModel} with 46 entries:
  DateTime("2022-11-14T11:05:00") => TransactionModel(2, 73.8151, 1)
  DateTime("2022-11-14T12:00:00") => TransactionModel(2, 74.645, -1)
  DateTime("2022-11-14T09:55:00") => TransactionModel(2, 73.9185, 1)
  DateTime("2022-11-14T09:25:00") => TransactionModel(10, 73.9016, 1)
  DateTime("2022-11-14T12:40:00") => TransactionModel(2, 74.7207, 1)
  DateTime("2022-11-14T12:15:00") => TransactionModel(2, 74.707, 1)
  DateTime("2022-11-14T10:00:00") => TransactionModel(2, 74.0079, 1)
  DateTime("2022-11-14T09:40:00") => TransactionModel(10, 73.9349, 1)
  DateTime("2022-11-14T10:25:00") => TransactionModel(2, 74.12, 1)
  DateTime("2022-11-14T11:20:00") => TransactionModel(2, 73.9944, 1)
  DateTime("2022-11-14T09:50:00") => TransactionModel(10, 74.0052, 1)
  DateTime("2022-11-14T12:45:00") => TransactionModel(2, 74.7535, 1)
  DateTime("2022-11-14T10:30:00") => TransactionModel(2, 74.09, 1)
  DateTime("2022-11-14T12:50:00") => TransactionModel(2