In [8]:
# check if GPU is available

import utils_sa
import numpy as np
from QNetwork_agent import Agent_Q
import syntheticCA_singleagent as sca 
from collections import deque


financial_params, ac_params = utils_sa.get_env_param()

In [9]:
financial_params

0,1,2,3
Annual Volatility:,12%,Bid-Ask Spread:,0.125
Daily Volatility:,0.8%,Daily Trading Volume:,5000000.0


In [10]:
ac_params

0,1,2,3
Total Number of Shares to Sell:,1000000,Fixed Cost of Selling per Share:,$0.062
Starting Price per Share:,$50.00,Trader's Risk Aversion:,1e-06
Price Impact for Each 1% of Daily Volume Traded:,$2.5e-06,Permanent Impact Constant:,2.5e-07
Number of Days to Sell All the Shares:,60,Single Step Variance:,0.144
Number of Trades:,60,Time Interval between trades:,1.0


In [11]:
# Create simulation environment
env = sca.MarketEnvironment()

# Initialize Feed-forward DNNs for Actor and Critic models. 
agent = Agent_Q(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(), seed= 1225)

# Set the liquidation time
lqt = 60

# Set the number of trades
n_trades = 60

# Set trader's risk aversion
tr = 1e-6

# Set the number of episodes to run the simulation
episodes = 10000

shortfall_hist = np.array([])
shortfall_deque = deque(maxlen=100)
eps=1.0

for episode in range(episodes): 
    # Reset the enviroment
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb = tr)

    # set the environment to make transactions
    env.start_transactions()

    for i in range(n_trades + 1):
      
        # Predict the best action for the current state. 
        action = agent.act(cur_state, eps)
        
        # Action is performed and new state, reward, info are received. 
        new_state, reward, done, info = env.step(action)
        
        # current state, action, reward, new state are stored in the experience replay
        agent.step(cur_state, action, reward, new_state, done)
        
        # roll over new state
        cur_state = new_state

        if info.done:
            shortfall_hist = np.append(shortfall_hist, info.implementation_shortfall)
            shortfall_deque.append(info.implementation_shortfall)
            break
        eps = max(0.01, 0.996*eps)
    if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
        print('\rEpisode [{}/{}]\tAverage Shortfall: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque)))        

print('\nAverage Implementation Shortfall: ${:,.2f} \n'.format(np.mean(shortfall_hist)))


Episode [100/10000]	Average Shortfall: $2,452,311.91
Episode [200/10000]	Average Shortfall: $2,207,101.86
Episode [300/10000]	Average Shortfall: $2,236,042.74
Episode [400/10000]	Average Shortfall: $3,087,920.07
Episode [500/10000]	Average Shortfall: $2,284,140.30
Episode [600/10000]	Average Shortfall: $2,503,269.96
Episode [700/10000]	Average Shortfall: $2,163,945.99
Episode [800/10000]	Average Shortfall: $2,913,090.60
Episode [900/10000]	Average Shortfall: $2,372,812.54
Episode [1000/10000]	Average Shortfall: $2,032,112.44
Episode [1100/10000]	Average Shortfall: $2,586,947.32
Episode [1200/10000]	Average Shortfall: $2,873,284.42
Episode [1300/10000]	Average Shortfall: $2,528,042.85
Episode [1400/10000]	Average Shortfall: $2,551,133.43
Episode [1500/10000]	Average Shortfall: $3,043,167.47


KeyboardInterrupt: 