In [35]:
# for compatible with python 3
from __future__ import print_function
import os
# os.environ["KERAS_BACKEND"] = "theano"
import numpy as np
from utils.data import read_stock_history, index_to_date, date_to_index, normalize
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [36]:
# Seaborn, useful for graphics
import seaborn as sns

# Import Bokeh modules for interactive plotting
import bokeh.io
#import bokeh.mpl
import bokeh.plotting

# Magic function to make matplotlib inline; other style specs must come AFTER
%matplotlib inline

# This enables SVG graphics inline.  There is a bug, so uncomment if it works.
# %config InlineBackend.figure_formats = {'svg',}

# This enables high resolution PNGs. SVG is preferred, but has problems
# rendering vertical and horizontal lines
%config InlineBackend.figure_formats = {'png', 'retina'}
matplotlib.rcParams['figure.figsize'] = (10, 6)
plt.rc('legend', fontsize=20)
# JB's favorite Seaborn settings for notebooks
rc = {'lines.linewidth': 2, 
      'axes.labelsize': 18, 
      'axes.titlesize': 18, 
      'axes.facecolor': 'DFDFE5'}
sns.set_context('notebook', rc=rc)
sns.set_style('darkgrid', rc=rc)

# Set up Bokeh for inline viewing
bokeh.io.output_notebook()

In [37]:
from model.ddpg.actor import ActorNetwork
from model.ddpg.critic import CriticNetwork
from model.ddpg.ddpg import DDPG
from model.ddpg.ornstein_uhlenbeck import OrnsteinUhlenbeckActionNoise

import numpy as np
import tflearn
import tensorflow as tf

from stock_trading import StockActor, StockCritic, obs_normalizer, get_model_path, get_result_path, \
                          test_model, get_variable_scope, test_model_multiple
    
from model.supervised.lstm import StockLSTM
from model.supervised.cnn import StockCNN

In [4]:
import random
from utils.data import create_target_dataset

In [5]:
history, abbreviation = read_stock_history('utils/datasets/stocks_history_2.h5')

# create new dataset with 100 random stocks from entire dataset
random.seed(30)
new_list = random.sample(abbreviation,100)

create_target_dataset(new_list,filepath='utils/datasets/stocks_history_target3.h5')

In [6]:
# dataset for 100 stocks by splitting timestamp
history, abbreviation = read_stock_history(filepath='utils/datasets/stocks_history_target3.h5')
history = history[:, :, :4]

# 100 stocks are all involved. We choose first 3 years as training data
num_training_time = 1095
target_stocks = abbreviation
target_history = np.empty(shape=(len(target_stocks), num_training_time, history.shape[2]))

for i, stock in enumerate(target_stocks):
    target_history[i] = history[abbreviation.index(stock), :num_training_time, :]

# and last 2 years as testing data.
testing_stocks = abbreviation
testing_history = np.empty(shape=(len(testing_stocks), history.shape[1] - num_training_time, 
                               history.shape[2]))
for i, stock in enumerate(testing_stocks):
    testing_history[i] = history[abbreviation.index(stock), num_training_time:, :]

In [8]:
# common settings
batch_size = 64
action_bound = 1.
tau = 1e-3

In [18]:
models = []
model_names = []
window_length_lst = [3,7]
predictor_type_lst = ['cnn']
use_batch_norm = True

In [42]:
nb_classes = len(target_stocks) + 1
print(target_history.shape)
print(testing_history.shape)

(100, 1095, 4)
(100, 730, 4)


In [30]:
from environment.portfolio import PortfolioEnv
env = PortfolioEnv(target_history, target_stocks, window_length = 1)
print(env.window_length)
print(env.num_stocks)
print(env.sim.cost)

Start date: 2013-06-04
1
100
0.0025


In [53]:
# Reshape training and testing data
trainX = np.zeros(((target_history.shape[1]-env.window_length)*(env.num_stocks+1), 4, env.window_length + 1, env.num_stocks))
testX = np.zeros(((testing_history.shape[1]-env.window_length)*(env.num_stocks+1), 4, env.window_length + 1, env.num_stocks))
trainY = np.zeros(((target_history.shape[1]-env.window_length)*(env.num_stocks+1), env.num_stocks+1))
testY = np.zeros(((testing_history.shape[1]-env.window_length)*(env.num_stocks+1), env.num_stocks+1))
for i in range(target_history.shape[1]-env.window_length):
    for stockHeld in range(env.num_stocks + 1):
        for j in range(4):
            for windowIndex in range(env.window_length):
                for stock in range(env.num_stocks):
                    trainX[i*(env.num_stocks+1)+stockHeld][j][windowIndex][stock] = target_history[stock][i+env.window_length-windowIndex-1][j]
            if stockHeld > 0:
                trainX[i*(env.num_stocks+1)+stockHeld][j][env.window_length][stockHeld-1] = 1

for i in range(testing_history.shape[1]-env.window_length):
    for stockHeld in range(1, env.num_stocks + 1):
        for j in range(4):
            for windowIndex in range(env.window_length):
                for stock in range(env.num_stocks):
                    testX[i*(env.num_stocks+1)+stockHeld][j][windowIndex][stock] = testing_history[stock][i+env.window_length-windowIndex-1][j]
            if stockHeld > 0:
                testX[i*(env.num_stocks+1)+stockHeld][j][env.window_length][stockHeld-1] = 1
for i in range(target_history.shape[1]-env.window_length):
    for stockHeld in range(env.num_stocks+1):
        bestRate = 1.0
        bestStock = -1
        for stock in range(env.num_stocks):
            trading_cost = env.sim.cost
            if stockHeld-1 == stock:
                trading_cost = 0
            # print(trainX[i*(env.num_stocks+1) + stockHeld][3][0][stock], target_history[stock][i+env.window_length-1][3])
            rate = target_history[stock][i+env.window_length-1][3]/target_history[stock][i+env.window_length-1][0] - trading_cost
            if rate > bestRate:
                bestRate = rate
                bestStock = stock
        trainY[i*(env.num_stocks+1) + stockHeld][bestStock+1] = 1
for i in range(testing_history.shape[1]-env.window_length):
    for stockHeld in range(env.num_stocks+1):
        bestRate = 1.0
        bestStock = -1
        for stock in range(env.num_stocks):
            trading_cost = env.sim.cost
            if stockHeld-1 == stock:
                trading_cost = 0
            rate = testing_history[stock][i+env.window_length-1][3]/testing_history[stock][i+env.window_length-1][0] - trading_cost
            if rate > bestRate:
                bestRate = rate
                bestStock = stock
        testY[i*(env.num_stocks+1) + stockHeld][bestStock+1] = 1

In [48]:
from model.supervised.cnn import StockCNN
# instantiate CNN model
cnn_model = StockCNN(nb_classes=100, window_length=4)
cnn_model.build_model(load_weights=False)

Built model from scratch


In [49]:
# starts to train the model, hopefully it would work
cnn_model.train(trainX, trainY, testX, testY)

ValueError: Error when checking input: expected conv2d_4_input to have shape (101, 4, 1) but got array with shape (101, 7, 1)

In [59]:
# common settings
batch_size = 64
action_bound = 1.
tau = 1e-3

In [60]:
models = []
model_names = []
window_length_lst = [3,7]
predictor_type_lst = ['cnn']
use_batch_norm = True

In [61]:
nb_classes = len(target_stocks) + 1
print(target_history.shape)
print(testing_history.shape)

(100, 1095, 4)
(100, 730, 4)


In [62]:
from environment.portfolio import PortfolioEnv, MultiActionPortfolioEnv
env = PortfolioEnv(target_history, target_stocks)

Start date: 2013-03-02


In [63]:
# instantiate environment, 100 stocks, with trading cost, window_length 3, start_date sample each time
# load weights = FALSE
for window_length in window_length_lst:
    for predictor_type in predictor_type_lst:
        name = 'DDPG_window_{}_predictor_{}'.format(window_length, predictor_type)
        model_names.append(name)
        tf.reset_default_graph()
        sess = tf.Session()
        tflearn.config.init_training_mode()
        action_dim = [nb_classes]
        state_dim = [nb_classes, window_length]
        variable_scope = get_variable_scope(window_length, predictor_type, use_batch_norm)
        with tf.variable_scope(variable_scope):
            actor = StockActor(sess, state_dim, action_dim, action_bound, 1e-4, tau, batch_size, predictor_type, 
                               use_batch_norm)
            critic = StockCritic(sess=sess, state_dim=state_dim, action_dim=action_dim, tau=1e-3,
                                 learning_rate=1e-3, num_actor_vars=actor.get_num_trainable_vars(), 
                                 predictor_type=predictor_type, use_batch_norm=use_batch_norm)
            actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim))

            model_save_path = get_model_path(window_length, predictor_type, use_batch_norm)
            summary_path = get_result_path(window_length, predictor_type, use_batch_norm)

            ddpg_model = DDPG(env, sess, actor, critic, actor_noise, obs_normalizer=obs_normalizer,
                              config_file='config/stock.json', model_save_path=model_save_path,
                              summary_path=summary_path)
            ddpg_model.initialize(load_weights=False, verbose=False)
            models.append(ddpg_model)

Build model from scratch
Build model from scratch


In [64]:
ddpg_model.train()

'NoneType' object has no attribute 'name'
'NoneType' object has no attribute 'name'
Episode: 0, Reward: 0.89, Qmax: 0.0190
Episode: 1, Reward: 0.97, Qmax: 0.0207
Episode: 2, Reward: 0.61, Qmax: 0.0188
Episode: 3, Reward: 0.45, Qmax: 0.0174
Episode: 4, Reward: 0.59, Qmax: 0.0172
Episode: 5, Reward: 0.48, Qmax: 0.0174
Episode: 6, Reward: 0.87, Qmax: 0.0181
Episode: 7, Reward: 0.66, Qmax: 0.0179
Episode: 8, Reward: 0.51, Qmax: 0.0174
Episode: 9, Reward: 0.47, Qmax: 0.0185
Episode: 10, Reward: 0.75, Qmax: 0.0206
Episode: 11, Reward: 0.72, Qmax: 0.0226
Episode: 12, Reward: 0.84, Qmax: 0.0239
Episode: 13, Reward: 0.47, Qmax: 0.0241
Episode: 14, Reward: 0.65, Qmax: 0.0259
Episode: 15, Reward: 0.61, Qmax: 0.0274
Episode: 16, Reward: 0.67, Qmax: 0.0292
Episode: 17, Reward: 0.57, Qmax: 0.0297
Episode: 18, Reward: 0.51, Qmax: 0.0319
Episode: 19, Reward: 0.42, Qmax: 0.0336
Episode: 20, Reward: 0.53, Qmax: 0.0343
Episode: 21, Reward: 0.68, Qmax: 0.0360
Episode: 22, Reward: 0.48, Qmax: 0.0367
Episod

KeyboardInterrupt: 