In [1]:
import os
import time
import random
from collections import deque

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

from functions import fetch_data, pickle_model
from env import TradingEnvironment

In [2]:
df1 = fetch_data('fb_df')
df2 = fetch_data('aapl_df')
df3 = fetch_data('nflx_df')
df4 = fetch_data('goog_df')
df5 = fetch_data('sp500_df')

---
### Random backtesting

In [None]:
stocks = {'aapl':df2, 'nflx':df3, 'goog':df4}
#stocks = {'aapl':df2}
starting_balance = 1_000_000

env = TradingEnvironment(stocks, starting_balance)

In [None]:
done = False
obs = env.reset()
start = env.current_step

agent_performance = []
long_performance = []
rewards = []

while not done:
    
    agent_performance.append(env.agent_portfolio.net_worth)
    long_performance.append(env.long_portfolio.net_worth)
        
    actions = {'aapl':np.random.uniform(-1,1), 'nflx':np.random.uniform(-1,1), 'goog':np.random.uniform(-1,1)}
    #actions = {'aapl':1, 'nflx':1, 'goog':1}
    #actions = {'aapl':np.random.uniform(-1,1)}

    obs, reward, done, info = env.step(actions)
    
    rewards.append(reward)
    

In [None]:
dates = env.stocks['aapl'].loc[start:env.current_step-1:-1, 'date']

In [None]:
plt.figure(figsize=(15,8))
plt.plot(dates, agent_performance, label='model')
plt.plot(dates, long_performance, label='long')
plt.legend()

In [None]:
plt.figure(figsize=(15,8))
plt.plot(rewards)

---
### Training

In [3]:
import matplotlib.pyplot as plt
from IPython.display import clear_output

%matplotlib inline

def live_plot(data_dict, figsize=(15,5)):

    clear_output(wait=True)
    fig = plt.figure(figsize=figsize)
    ax = fig.add_subplot()
    
    ax.plot(data['episode_reward'])
    
    return ax

In [4]:
import torch
import collections
import datetime as dt
from td3 import Agent

In [None]:
df2['date'] = pd.to_datetime(df2['date'])
df2 = df2[df2['date'].dt.year >= 2018]

In [None]:
df3['date'] = pd.to_datetime(df3['date'])
df3 = df3[df3['date'].dt.year >= 2018]

In [None]:
df4['date'] = pd.to_datetime(df4['date'])
df4 = df4[df4['date'].dt.year >= 2018]

In [5]:
stocks = {'aapl':df2, 'nflx':df3, 'goog':df4}
starting_balance = 1_000_000
env = TradingEnvironment(stocks, starting_balance)

In [6]:
seed = 10

env.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)

In [7]:
state_dim = np.product(env.observation_space.shape)
action_dim = env.action_space.shape[0]
max_action = float(env.action_space.high[0])

In [8]:
save_models = True

In [9]:
if all([save_models, not os.path.exists('./pytorch_models')]):
    os.makedirs('./pytorch_models')
if all([save_models, not os.path.exists('./pytorch_models/all_td3')]):
    os.makedirs('./pytorch_models/all_td3')

In [10]:
file_name = f'TD3_TradingEnv_{seed}'
print(file_name)

TD3_TradingEnv_10


In [11]:
buffer_size = 1e6
batch_size = 100

gamma = 0.99
tau = 5e-3
policy_freq = 2
lr = 1e-3

policy_noise = 0.2
noise_clip = 0.5
expl_noise = 0.1

In [12]:
agent = Agent(
    state_dim,
    action_dim,
    max_action,
    eta=lr,
)

In [13]:
starting_step = 1e4
max_steps = 1000

episodes = 1500
total_steps = 0
report = 100
training = False

In [14]:
reward_trace = collections.defaultdict(list)

In [15]:
for episode in np.arange(1, episodes):             
    
    obs = env.reset()
    obs.flatten()
    
    done = False
    episode_reward = []
    steps = 0
    
    
    
    while not done:        
        
        if total_steps == starting_step:
            print('Begin training')
            training = True
            
        if not training:
            action = env.action_space.sample()
        else:
            action = agent.select_action(np.array(obs))
            
            if expl_noise != 0:
                noise = np.random.normal(0, expl_noise, size=env.action_space.shape[0])
                action = (action+noise).clip(env.action_space.low, env.action_space.high)

                
        new_obs, reward, done, info = env.step(action)
        
        new_obs = new_obs.flatten()
        done = (done) or (steps >= max_steps)

        episode_reward.append(reward)
        agent.replay_buffer.add((obs, new_obs, action, reward, int(done)))
        
        obs = new_obs
        steps += 1
        total_steps += 1



        
    agent.train(steps, batch_size, gamma, tau, policy_noise, noise_clip, policy_freq)
    reward_trace['episode_reward'].append(sum(episode_reward))
    reward_trace['episode_profit'].append(env.agent_portfolio.profits[-1])
    reward_trace['episode_net_worth'].append(env.net_worth)
    
    ax = live_plot(reward_trace)
    plt.show()
    
    if episode % report == 0:
        
        print('Episode:', episode)
        print('Average score:', np.mean(reward_trace['episode_reward'][-report:]))
        print('Exploration noise:', reward_trace['exploration_noise'][-1])
        print('Training:', training)
        time.sleep(1)
        
        agent.save(
            f'{file_name}_{episode}', 
            './pytorch_models/all_td3',
        )
        

AssertionError: 