## CHEME 5660: Building a Trading Bot using Model-Free Reinforcement Learning

### Introduction

### Example setup

In [1]:
import Pkg; Pkg.activate("."); Pkg.resolve(); Pkg.instantiate();

[32m[1m  Activating[22m[39m project at `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Q-Learning-TradeBot-notebook`
[32m[1m  No Changes[22m[39m to `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Q-Learning-TradeBot-notebook/Project.toml`
[32m[1m  No Changes[22m[39m to `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Q-Learning-TradeBot-notebook/Manifest.toml`


In [2]:
# load req packages -
using DataFrames
using Dates
using FileIO
using JLD2
using PrettyTables
using Distributions
using Statistics
using DataFrames
using Plots
using Colors

# setup paths -
const _ROOT = pwd();
const _PATH_TO_DATA = joinpath(_ROOT, "data");

In [3]:
include("CHEME-5660-Example-CodeLib.jl");

### Setup constants and other resources

In [4]:
# 5-mim risk-free rate
rÃÑ = 0.0403;
risk_free_rate = ((1+rÃÑ)^(1/19656) - 1);

# how many days of historical data are we using?
d = 1;       # we n‚Çê buy shares of XYZ for d-periods at the open of the training data 
n‚Çê = 1.0;    # how many shares do we want to buy, sell each day
œµÃÇ = 0.5;    # cutoff

# setup actions states -
actions = [1,2,3]  ; # buy, sell, hold
states = [1,2,3,4] ; # states defined below -

#### Load and partition the OHLC price data set

In [5]:
# load the JLD2 portfolio data file -
price_data_dictionary = load(joinpath(_PATH_TO_DATA, "CHEME-5660-Portfolio-Q-learning-5min-11-20-22.jld2"))["dd"];

# we have these ticker symbols in our data set -
ticker_symbol_array = sort(keys(price_data_dictionary) |> collect);

# Partition the data into a training and prediction set
(price_training_dict, price_prediction_dict) = partition(price_data_dictionary; fraction=0.90);

#### Establish an initial position

In [6]:
# what ticker do we want to explore?
ticker_symbol = "AMD";

# get the df_training -
df_training = price_data_dictionary[ticker_symbol];

# get the df_prediction -
# df_prediction = price_prediction_dict[ticker_symbol];

#### Establish state classes

In [7]:
# fit a distribution to vwap data -
vwap_data = df_training[!,:volume_weighted_average_price];
Nd = fit_mle(Normal, vwap_data);

# get parameters -
Œ∏ = params(Nd);

# setup price -
S‚Çí = Œ∏[1];
œÉÃÇ = Œ∏[2];

# print -
println("Long-term price S‚Çí = $(S‚Çí) USD/share with œÉÃÇ = $(œÉÃÇ)")

Long-term price S‚Çí = 74.35207976318632 USD/share with œÉÃÇ = 1.3875013494498032


In [8]:
# test -
s = state(76.8; Œº = S‚Çí, œÉ = œÉÃÇ, œµ=œµÃÇ)

2

In [9]:
# setup œµ sim -
œµ = 0.1;
number_of_trials = 100;
policy_array = Array{Int64,2}(undef, 4, number_of_trials);

# setup categorical distribution for drawing a random action -
action_distribution = Categorical([0.5,0.25,0.25]);

for t ‚àà 1:number_of_trials
    
    # initialize an empty ledger -
    ledger_df = DataFrame(
        time = DateTime[],
        n = Float64[],
        price = Float64[],
        s = Int64[],
        action = Int64[]
    );

    # initialize an empty Q -
    Q_array = Array{Float64,2}(undef, length(states), length(actions));
    fill!(Q_array, 0.0);
    Q_array[1:2,2] .= 10.0;
    Q_array[3:4,1] .= 10.0;

    # build Q model -
    QMODEL = QLearningModel();
    QMODEL.Œ≥ = 0.95;
    QMODEL.Œ± = 0.05;
    QMODEL.ùíÆ = states;
    QMODEL.ùíú = actions;
    QMODEL.Q = Q_array;
    
    # we buy 100 shares at initial price in the data -
    transaction = (
        time = df_training[1,:timestamp],
        n = 100.0,
        action = 1,
        price = price(df_training,1),
        s = state(price(df_training,1); Œº = S‚Çí, œÉ = œÉÃÇ, œµ=œµÃÇ) 
    );
    push!(ledger_df, transaction)
    
    # main random simulation -
    for i ‚àà 2:(nrow(df_training) - 1)


        # get data from the df_training -
        p = price(df_training, i);
        p‚Ä≤ = price(df_training, i+1);

        # convert that to the current state -
        s = state(p; Œº = S‚Çí, œÉ = œÉÃÇ, œµ=œµÃÇ);
        s‚Ä≤ = state(p‚Ä≤; Œº = S‚Çí, œÉ = œÉÃÇ, œµ=œµÃÇ);

        # roll a random number -
        if (rand() < œµ)

            # roll a random action - 
            a·µ¢ = rand(action_distribution);
            if (a·µ¢ == 1) # random action: buy

                # compute a buy action -
                transaction = (
                    time = df_training[i,:timestamp],
                    n = n‚Çê,
                    action = 1,
                    price = p, 
                    s = s
                );
                push!(ledger_df, transaction)

            elseif (a·µ¢ == 2) # random action: sell

                # compute a buy action -
                transaction = (
                    time = df_training[i,:timestamp],
                    n = n‚Çê,
                    action = 2,
                    price = p,
                    s = s
                );
                push!(ledger_df, transaction)

            elseif (a·µ¢ == 3) # random action: hold

                # compute a buy action -
                transaction = (
                    time = df_training[i,:timestamp],
                    n = n‚Çê,
                    action = 3,
                    price = p,
                    s = s
                );
                push!(ledger_df, transaction)
            end
        else

            # ok, what action does my best guess say that I should take?
            policy = œÄ(QMODEL.Q);
            a·µ¢ = policy[s];

            # compute a buy action -
            transaction = (
                time = df_training[i,:timestamp],
                n = n‚Çê,
                action = a·µ¢,
                price = p,
                s = s
            );
            push!(ledger_df, transaction)
        end

        # we've update the ledger - compute the return per share if we sold everything -
        R·µ¢ = liquidate(ledger_df,  p‚Ä≤);

        # update the QMODEL -
        update!(QMODEL,s,a·µ¢,R·µ¢,s‚Ä≤);
    end
    
    pvec = œÄ(Q_array)
    policy_array[1,t] = pvec[1]
    policy_array[2,t] = pvec[2]
    policy_array[3,t] = pvec[3]
    policy_array[4,t] = pvec[4]
end

In [17]:
idx = findall(x->x==1, policy_array[3,:])

96-element Vector{Int64}:
   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
   ‚ãÆ
  88
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100

In [11]:
# # initialize an empty ledger -
# prediction_ledger_df = DataFrame(
#     time = DateTime[],
#     n = Float64[],
#     price = Float64[],
#     s = Int64[],
#     action = Int64[]
# );

# # initial buy transaction -
# transaction = (
#     time = df_prediction[1,:timestamp],
#     n = 100.0,
#     action = 1,
#     price = price(df_prediction,1),
#     s = state(price(df_prediction,1); Œº = S‚Çí, œÉ = œÉÃÇ) 
# );
# push!(prediction_ledger_df, transaction);

In [12]:
policy_array[:,1]

4-element Vector{Int64}:
 2
 2
 1
 3

In [13]:
# wealth simulation -
number_of_prediction_steps = nrow(df_prediction);
# for i ‚àà 2:(number_of_prediction_steps - 1)
    
#     # compute the state -
#     p = price(df_prediction, i);
#     s = state(p; Œº = S‚Çí, œÉ = œÉÃÇ)
    
#     # policy -
#     if (s == 1)
#         a·µ¢ = 2;
#     elseif (s == 2)
#         a·µ¢ = 2;
#     elseif (s == 3)
#         a·µ¢ = 1;
#     elseif (s == 4)
#         a·µ¢ = 1;
#     end
    
#     # compute a buy action -
#     transaction = (
#         time = df_prediction[i,:timestamp],
#         n = n‚Çê,
#         action = a·µ¢,
#         price = p,
#         s = s
#     );
#     push!(prediction_ledger_df, transaction)
# end

LoadError: UndefVarError: df_prediction not defined