## CHEME 5660: Building a Trading Bot using Model-Free Reinforcement Learning

### Introduction

### Example setup

In [1]:
import Pkg; Pkg.activate("."); Pkg.resolve(); Pkg.instantiate();

[32m[1m  Activating[22m[39m project at `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Q-Learning-TradeBot-notebook`
[32m[1m  No Changes[22m[39m to `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Q-Learning-TradeBot-notebook/Project.toml`
[32m[1m  No Changes[22m[39m to `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Q-Learning-TradeBot-notebook/Manifest.toml`


In [2]:
# load req packages -
using DataFrames
using Dates
using FileIO
using JLD2
using PrettyTables
using Distributions

# setup paths -
const _ROOT = pwd();
const _PATH_TO_DATA = joinpath(_ROOT, "data");

In [25]:
include("CHEME-5660-Example-CodeLib.jl");

### Setup constants and other resources

In [13]:
# how many days of historical data are we using?
m̂ = 299;   # you get to choose this
d̂ = 10;    # we nₐ buy shares of XYZ each day at the open for d̂ days 
λ̂ = 0.001; # you get to choose this: Boltzmann weighting factor λ̂ = 0.0 gives unweigted
lotsize = 10.0; # how many shares do we buy at the open?
nₐ = 1.0; # how many shares do we want to buy, sell each day

# what does our training data start?
cursor = d̂ + 1;

# setup actions states -
actions = [1,2,3]; # buy, sell, hold

# setup out states -
states = [1,2]; # s1 => current share price higher than Sbar, s2 => current share price lower than Sbar

# initialize Q -
Q = Array{Float64,2}(undef, length(states), length(actions));
fill!(Q, 0.0)

# trade-ledger 
trade_ledger = Dict{Int64,Pair{Float64,Float64}}();

# fraction we explore, versus exploit -
ϵ = 0.20;

In [5]:
# what is my prior data -
action_distribution = Categorical([0.60,0.10,0.30]); # when exploring, we are biased toward buying

#### Load, clean and partition the OHLC price data set

In [6]:
# load the JLD2 portfolio data file -
price_data_dictionary = clean(load(joinpath(_PATH_TO_DATA, "CHEME-5660-Portfolio-11-17-22.jld2"))["dd"]);

# we have these ticker symbols in our data set -
ticker_symbol_array = sort(keys(price_data_dictionary) |> collect);

# How many ticker symbols do we have?
Nₐ = length(ticker_symbol_array);

# Partition the data into a training and prediction set
(price_training_dict, price_prediction_dict) = partition(price_data_dictionary, (m̂+1));

Length violation: META was removed; dim(SPY) = 510 days and dim(META) = 260 days


#### Establish initial position

In [14]:
# what ticker do we want to explore?
ticker_symbol = "AMD";

# get the df_training -
df_training = price_training_dict[ticker_symbol];

# tmp -
(S̄, n_total_initial) = initialize_position(df_training; lotsize = lotsize, days=d̂);

# add these trades to the trade_ledger -
for d ∈ 1:d̂
    price = df_training[d,:open];
    trade_ledger[d] = lotsize => price
end

#### Run a purely exploratory system

In [16]:
for i ∈ cursor:nrow(df_training)
    
    # pick a random action -
    aᵢ = rand(action_distribution);
    
    if aᵢ == 1
        
        # we are buying -
        Sᵢ = df_training[i, :volume_weighted_average_price];
        
        # enter a transction in the ledger -
        trade_ledger[i] = lotsize => Sᵢ
        
    elseif aᵢ == 2
        
        # we are selling -
        Sᵢ = df_training[i, :volume_weighted_average_price];
        proposed_trade =  -lotsize => Sᵢ
        
        if (confirm(trade_ledger, proposed_trade) == true)
            # trade is ok, let's add it to the ledger
            trade_ledger[i] = proposed_trade
        end
    end
end

In [24]:
trade_ledger[13]

10.0 => 86.7819