In [8]:
import gym
import gym_anytrading
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import backend as K
import random
import pandas as pd

tf.keras.backend.clear_session()


In [9]:
class Agent:
    def __init__(self, actor_lr, critic_lr, discount_factor=0.99, n_actions=2):
        self.actor_lr = actor_lr
        self.critic_lr = critic_lr
        self.discount_factor = discount_factor
        self.n_actions = n_actions
        
        self.actor, self.critic, self.policy = self.build_actor_critic_network()
        self.action_space = [i for i in range(self.n_actions)]
        
    def build_actor_critic_network(self):
        visible = layers.Input(shape=(10,2))
        flatten = layers.Flatten()(visible)
        delta = layers.Input(shape=[1])
        dense1 = layers.Dense(32, activation='relu')(flatten)
        dense2 = layers.Dense(64, activation='relu')(dense1)
        probs = layers.Dense(self.n_actions, activation='softmax')(dense2)
        values = layers.Dense(1, activation='linear')(dense2)
        
        def custom_loss(y_true, y_pred):
            out = K.clip(y_pred, 1e-8, 1-1e-8)
            log_likelihood = y_true * K.log(out)
            
            return K.sum(-log_likelihood * delta)
        actor = keras.Model(inputs=[visible, delta], outputs=[probs])
        actor.compile(optimizer=keras.optimizers.Adam(lr=self.actor_lr), loss=custom_loss)
        critic = keras.Model(inputs=[visible], outputs=[values])
        critic.compile(optimizer=keras.optimizers.Adam(lr=self.critic_lr), loss='mse')
        
        policy = keras.Model(inputs=[visible], outputs=[probs])
        return actor, critic, policy
    def choose_action(self, observation):
        state = observation[np.newaxis, :]
        probabilities = self.policy.predict(state)[0]
        action = np.random.choice(self.action_space, p=probabilities)
        
        return action
    
    def learn(self, state, action, reward, state2, done):
        state = state[np.newaxis, :]
        state2 = state2[np.newaxis, :]
        critic_value = self.critic.predict(state)
        critic_value2 = self.critic.predict(state2)
        
        if done:
            critic_value2 = 0
        
        target = reward + self.discount_factor * critic_value2 
        delta = target - critic_value
        
        actions = np.zeros([1, self.n_actions])
        actions[np.arange(1), action] = 1.0
        
        self.actor.fit([state, delta], actions)
        self.critic.fit(state, target)
        

In [10]:
env = gym.make('forex-v0', frame_bound=(50, 2000), window_size=10)
agent = Agent(actor_lr = 0.002, critic_lr = 0.002)
observation = env.reset()
done = False
while not done:
    action = agent.choose_action(observation)
    observation2, reward, done, info = env.step(action)
    agent.learn(observation, action, reward, observation2, done)
    observation = observation2

plt.cla()
env.render_all()
plt.show()

_SymbolicException: Inputs to eager execution function cannot be Keras symbolic tensors, but found [<tf.Tensor 'input_2:0' shape=(None, 1) dtype=float32>]