This notebook is for testing a DQN agent with a two hidden layer neural network in a backtesting environment. Agents/models built in this notebook are the main models 

---

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from support_code.model import DQN
from support_code.env import TradingEnv
from support_code.functions import fetch_data, buy_and_hold, pickle_model, corr

In [None]:
import warnings
warnings.simplefilter('ignore')

---
---
---
### Random back testing

Load in data and initialize environment for backtesting a random-action agent:

In [None]:
df = fetch_data('sp500_df')

starting_balance = 1_000_000

env = TradingEnv(df, balance_init=starting_balance)
env.verbose=1

In [None]:
np.random.seed(None)
env.seed(None)

Perform a round of backtesting where actions are chosen at random:

In [None]:
Perform a round of backtesting where actions are chosen at random:done = False
obs = env.reset()
start = env.current_step

stock_performance = []
model_performance = []
actions = []

while not done:
    
    stock_performance.append(df.loc[env.current_step]['close'])
    model_performance.append(env.net_worth)
        
    action = np.random.randint(env.action_space.n)
    obs, rewards, done, info = env.step(action)

    actions.append(env._actions[action])
    
end = env.current_step

---

Plot performance of random agent:

In [None]:
Plot performance of random agent:fig = plt.figure(figsize=(15,10))
ax1 = fig.add_subplot(211)
ax2 = fig.add_subplot(212)

ax1.plot(model_performance, 'b-', label='stochastic model')
ax1.set_ylabel('portfolio value')
ax1.axhline(starting_balance, alpha=0.5, color='blue')
ax1.legend()

ax2.plot(stock_performance, 'r-', label='stock history')
ax2.set_ylabel('stock value')
ax2.set_xlabel('time')
ax2.legend()

plt.show();

Plot the random actions:

In [None]:
Plot the random actions:plt.figure(figsize=(15,5))
plt.plot(actions, 'r.')
plt.plot(actions, 'b-')
plt.xlabel('step')
plt.ylabel('portion of portfolio bought/sold')

plt.show();

Find the correlation between price and actions

In [None]:
Find the correlation between price and actionscorr(actions, stock_performance)

---
---
---
### Training/backtesting

Initialize environment for training DQN agent:

In [None]:
df = fetch_data('sp500_df')
#df.shape

In [None]:
np.random.seed(None)

starting_balance = 1_000_000

env = TradingEnv(df, balance_init=starting_balance)
env.verbose=0
env.seed(None)

#print(env.action_space, env.observation_space)

Initialize DQN agent with two hidden layer neural network:

In [None]:
Initialize DQN agent with three hidden layer neural network:dqn_solver = DQN(
    action_space=env.action_space.n, 
    state_space=env.observation_space.shape,
    batch_size=64,
    memory_size=1_000_000,
    alpha=1.0,
    alpha_decay=0.999,
    alpha_min=0.1,
    gamma=0.99,
    )

dqn_solver.verbose = 0
#dqn_solver.model.summary()

In [None]:
history = []

Perform training by repeating backtests:

In [None]:
Perform training by repeating backtests:for i in np.arange(750, 2500):
    
    # Reset the evironment at the top of each episode
    state = env.reset()
    
    stock_performance = []
    model_performance = []
    reward_trace = []    
    actions = []
    
    start = env.current_step
    done = False    
    
    # The model will iterate until a terminal state is reached
    while not done:
                
        # Select an action by passing the current observation/state to the DQN
        action = dqn_solver.act(state)
        
        # The environment takes a step according to that action and returns the new state, the reward, and the terminal status
        next_state, reward, done, info = env.step(action)
                
        # Commit to the DQN's memory the relevant information
        dqn_solver.remember(state, action, reward, next_state, done)
        
        # Update the current state
        state = next_state

        actions.append(action)
        reward_trace.append(reward)
        model_performance.append(env.net_worth)  
        stock_performance.append(df.loc[env.current_step]['close'])
            
    print('================================================================================================================')
    print(i+1)
    print('FINAL PROFIT', env.net_worth-env.balance_init)
    print('TOTAL REWARD:', np.mean(reward_trace))
    print('DAY RANGE:', start, env.current_step)
    print('EXPLORATION:', dqn_solver.alpha)
    print('MEMORY SIZE:', len(dqn_solver.memory))
    print('================================================================================================================')
    
    # After each episode, perform experience replay
    test = dqn_solver.replay()  
    
    print('\n\n\n')
    
    history.append({
        'stock': stock_performance,
        'model': model_performance,
        'actions': actions,
        'rewards': np.mean(reward_trace),
        })

X, y = test

---

In [None]:
i = -2

Calculate buy and hold performance for given training instance:

In [None]:
Calculate buy and hold performance for given training instance:long = buy_and_hold(
    balance_init=env.balance_init,
    back_prices=history[i]['stock'],
    fee=env.fee
    )

Plot model performance and buy and hold performance for given instance:

In [None]:
Plot model performance and buy and hold performance for given instance:fig = plt.figure(figsize=(15,10))
ax1 = fig.add_subplot(211)
ax2 = fig.add_subplot(212)

ax1.plot(long, 'b--', alpha=0.667, label='buy and hold')
ax1.plot(history[i]['model'], 'b-', label='backtesting model')
ax1.axhline(env.balance_init, alpha=0.333, color='blue')
ax1.set_ylabel('portfolio value')
ax1.set_title('training')
ax1.legend()

ax2.plot(history[i]['stock'], 'r-', label='stock history')
ax2.set_ylabel('stock value')
ax2.set_xlabel('time')
ax2.legend()

plt.show();

Plot actions:

In [None]:
Plot actions:plt.figure(figsize=(15,5))
plt.plot(history[i]['actions'], 'b-', alpha=0.6667)
plt.plot(history[i]['actions'], 'r.')

plt.yticks(np.arange(dqn_solver.action_space), np.linspace(-1,1, dqn_solver.action_space))
plt.xlabel('step')
plt.ylabel('portion of portfolio bought/sold')
plt.show();

Plot average reward during training:

In [None]:
Plot average reward during training:rewards = np.array([history[i]['rewards'] for i in np.arange(len(history))])

In [None]:
Find correlation between prices and actions:plt.figure(figsize=(12,6))
plt.plot(rewards, 'b-')
plt.xlabel('episode number')
plt.ylabel('total reward per episode')
plt.show();

Find correlation between prices and actions:

In [None]:
corr(history[i]['stock'], history[i]['actions'])

---

Save the modle for further testing and deployment:

In [None]:
pickle_model(dqn_solver, path='model_info_sp500_1')