An agent is trained to determine the weights in a portfolio in consecutive 10 days (an episode include 10 steps) using continuous control, specifically, DDPG.

It **does not** work when episode varies (`batch_num` > 1) because financial time series are very unstationary.

In [81]:
from portfolio_env import PortfolioEnv
env = gym.make('Portfolio-v0',
               features=['Close'],
               stocks = ['GOOGL'],
               batch_num = 1,
               batch_size = 10,
               window=10)

In [82]:
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Input, Concatenate, Conv2D, Reshape
from keras.optimizers import Adam
from keras import backend as K
K.set_image_dim_ordering('th')

from rl.agents import DDPGAgent
from rl.memory import SequentialMemory
from rl.random import OrnsteinUhlenbeckProcess, GaussianWhiteNoiseProcess

# First, we build two networks for actor and critic seperately
observation_input_raw = Input(shape=(1,)+env.observation_space.shape, name='observation_input')
observation_input = Reshape(env.observation_space.shape)(observation_input_raw)
x = Conv2D(32, (1, 3), activation='relu')(observation_input)
x = Conv2D(16, (1, int(x.shape[-1])))(x)
x = Conv2D(1, (1, 1))(x)
x = Flatten()(x)
action = Activation('softmax')(x)
actor = Model(inputs=observation_input_raw, outputs=action)
print(actor.summary())

nb_actions = env.action_space.shape[0]
action_input = Input(shape=(nb_actions,), name='action_input')
observation_input_raw = Input(shape=(1,)+env.observation_space.shape, name='observation_input')
observation_input = Reshape(env.observation_space.shape)(observation_input_raw)
x = Conv2D(32, (1, 3), activation='relu')(observation_input)
x = Conv2D(16, (1, int(x.shape[-1])))(x)
x = Concatenate(axis=1)([Reshape((1, -1, 1))(action_input), x]) # insert action here
x = Conv2D(1, (1, 1))(x)
x = Flatten()(x)
# the structure above is the same as actor except the inserted action
x = Dense(1)(x)
Q = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input_raw], outputs=Q)
print(critic.summary())

# Then, we configure and compile our agent. You can use every built-in Keras optimizer
memory = SequentialMemory(limit=100, window_length=1)
random_process = GaussianWhiteNoiseProcess(size=nb_actions, mu=0.5, sigma=.001)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, random_process=random_process, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, gamma=.99, target_model_update=1e-3, batch_size=env.batch_size)
agent.compile(Adam(lr=.0001, clipnorm=1.), metrics=['mae'])

agent.fit(env, nb_steps=1000, verbose=1)


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
observation_input (InputLaye (None, 1, 1, 2, 10)       0         
_________________________________________________________________
reshape_73 (Reshape)         (None, 1, 2, 10)          0         
_________________________________________________________________
conv2d_145 (Conv2D)          (None, 32, 2, 8)          128       
_________________________________________________________________
conv2d_146 (Conv2D)          (None, 16, 2, 1)          4112      
_________________________________________________________________
conv2d_147 (Conv2D)          (None, 1, 2, 1)           17        
_________________________________________________________________
flatten_49 (Flatten)         (None, 2)                 0         
_________________________________________________________________
activation_49 (Activation)   (None, 2)                 0         
Total para

<keras.callbacks.History at 0x144092e80>

In [83]:
# Finally, evaluate our algorithm
agent.test(env, nb_episodes=1)

Testing for 1 episodes ...
[[ 0.         -0.00273799]
 [ 0.02092412  0.97907591]]
[[0.         0.07033118]
 [0.0189471  0.98105294]]
[[0.         0.00624899]
 [0.05702892 0.94297111]]
[[ 0.         -0.00585757]
 [ 0.02701478  0.97298527]]
[[ 0.         -0.00300064]
 [ 0.17282525  0.82717472]]
[[0.         0.03441954]
 [0.15587924 0.84412074]]
[[ 0.         -0.0017722 ]
 [ 0.25957999  0.74042004]]
[[0.         0.03439327]
 [0.23003884 0.76996112]]
[[ 0.         -0.07111026]
 [ 0.14755371  0.85244632]]
[[0.         0.00601182]
 [0.10145867 0.89854127]]
Episode 1: reward: 0.058, steps: 10


<keras.callbacks.History at 0x1443cae48>