In [1]:
"""
Magic commands
"""
%matplotlib inline
%load_ext autoreload
%autoreload 2

"""
Python standard packages
"""
import os
import sys

import numpy as np
import pandas as pd

import openpyxl

import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, clear_output

import scipy.stats as stats
import random
from statsmodels.tsa.stattools import acf
import statsmodels.api as sm
"""
Own packages
"""
sys.path.insert(0, "C:\\code\\speciale\\") # vesl path
sys.path.insert(0, "C:\\Users\\lucas\\OneDrive\\Skrivebord\\repo\\speciale\\") # ljb path

from market_simulation_study.agent import RandomAgent, InvestorAgent, TrendAgent, RLAgent, MarketMakerAgent
from market_simulation_study.agent import Memory, MuPolicyNetwork, ActionValueNetwork, GaussianPolicyNetwork, GaussianBinomialPolicyNetwork, ActorCriticAgent
from market_simulation_study.environment import MarketEnvironment

from torch.optim import Adam

from codelib.plotting import dist_vs_normal_plot, volume_contribution_plot, DefaultStyle, default_colors
sns.set_theme("paper","whitegrid")
DefaultStyle()
"""
Initializations
"""
pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)

In [2]:
# Declare parameters
n_random_agents = 4
n_investor_agents = 2
n_trend_agents = 4
n_mm_agents = 15
n_rl_agents = 1

n_agents = n_random_agents + n_investor_agents + n_trend_agents + n_rl_agents + n_mm_agents

time_periods = 1000

price_list = [100 + np.random.normal(scale = 0.01) for j in range(100)]
ex_list = np.array([[0]*n_agents, [0]*n_agents])
fee = 0
slippage = 0
mean_buy_price = 99.5
mean_sell_price = 100.5
delta = 1
atp = 0
total_buy_volume = 0
total_sell_volume = 0

state0 = {"market_prices": price_list, "volume": ex_list, 'fee': fee, "mean_buy_price": mean_buy_price,
          "mean_sell_price": mean_sell_price, 'slippage': slippage, 
          'total_buy_volume': total_buy_volume, 'total_sell_volume': total_sell_volume}#, 'all_traded_prices': atp}

def reset1():
    agents = []

    investor_agents = []

    inv_intensity = 0.02

    if n_investor_agents >= 1:
        investor_agents.append(InvestorAgent(agent_id = 0,
                                         delta = delta,
                                         intensity = inv_intensity / 4,
                                         buy_price_margin = 0.0025,
                                         sell_price_margin = 0.010,
                                         buy_volume = 15,
                                         sell_volume = 30, 
                                         n_orders = 10))
    if n_investor_agents == 2:  
        investor_agents.append(InvestorAgent(agent_id = 1,
                                             delta = delta,
                                             intensity = inv_intensity / 2,
                                             n_orders = 6,
                                             buy_price_margin  = 0.005,
                                             sell_price_margin = 0.020,
                                             buy_volume = 25,
                                             sell_volume = 50,
                                             can_short = True))

    random_agents = [RandomAgent(agent_id = j + n_investor_agents,
                                 delta = delta,
                                 noise_range = (0.0001, 0.0003),
                                 mid_price_noise = 0.0025,
                                 n_coin_flips = 3, 
                                 coin_bias_sell = 0.5,
                                 coin_bias_buy = 0.5) for j in range(n_random_agents)]

    trend_agents = [TrendAgent(agent_id = j + n_random_agents + n_investor_agents,
                                     delta = delta,
                                      moving_average_one = np.random.randint(10, 15),
                                      moving_average_two = np.random.randint(20, 30),
                                      price_margin = 0.005) for j in range(n_trend_agents)]

    mm_agents = [MarketMakerAgent(agent_id = j + n_random_agents + n_investor_agents + n_trend_agents,
                                  delta = delta,
                                  gamma = 0.00005,
                                  gamma2 = np.random.uniform(0.5, 0.75), #np.random.randint(1, 1),
                                  spread_zero = np.random.uniform(0.1, 0.1), 
                                  n_volume = 3) for j in range(n_mm_agents)]


    agents.append(investor_agents)
    agents.append(random_agents)
    agents.append(trend_agents)
    agents.append(mm_agents)

    return agents

agents = reset1()

In [3]:
#####################
# PARAMETERS
#####################
"""Neural Networks"""
max_action_value = 0.035  # Prices
min_action_value = -0.035  # Prices
max_action_value_two = 5  # Volumes
min_action_value_two = 0  # Volumes

action_dims = 2
state_dims = 12
fc1_dims = 256
fc2_dims = 256
learning_rate = 3e-7
norm_sigma = 0.005
n_gradient_steps_per_update = 1
update_every = 10
batch_size = 8

"""RL Agent"""
discount_factor = 0.95
agent_id = n_random_agents + n_investor_agents + n_trend_agents + n_mm_agents
def reset2(agents):
    ######################
    # AGENT INITIALIZATION
    ######################
    # policy = GaussianPolicyNetwork(max_action_value     = max_action_value,
    #                                min_action_value     = min_action_value,
    #                                max_action_value_two = max_action_value_two,
    #                                min_action_value_two = min_action_value_two,
    #                                input_dims           = state_dims,
    #                                action_dims          = action_dims,
    #                                fc1_dims             = fc1_dims,
    #                                fc2_dims             = fc2_dims)

    policy = MuPolicyNetwork(max_action_value     = max_action_value,
                                   min_action_value     = min_action_value,
                                   max_action_value_two = max_action_value_two,
                                   min_action_value_two = min_action_value_two,
                                   input_dims           = state_dims,
                                   action_dims          = action_dims,
                                   fc1_dims             = fc1_dims,
                                   fc2_dims             = fc2_dims,
                                   sigma                = norm_sigma,
                                   name                 = 'policy_last_dropout_new_loss_1205NOSAVE',
                                   n_volume = 3,
                                   dropout = 0.2
                                   )

    policy_optimiser = Adam(policy.parameters(), lr = learning_rate, weight_decay=1e-5)

    qf = ActionValueNetwork(input_dims = state_dims + action_dims,
                            fc1_dims = fc1_dims,
                           fc2_dims = fc2_dims,
                           name = 'qf_last_dropout_new_loss_1205NOSAVE')
    qf_optimiser = Adam(qf.parameters(), lr = learning_rate, weight_decay=1e-5)

    vf = ActionValueNetwork(input_dims = state_dims,
                            name = 'vf_last_dropout_new_loss_1205NOSAVE')
    vf_optimiser = Adam(vf.parameters(), lr = learning_rate, weight_decay=1e-5)

    rl_agents = [ActorCriticAgent(policy           = policy, 
                                  policy_optimiser = policy_optimiser,
                                  qf               = qf,
                                  qf_optimiser     = qf_optimiser,
                                  vf               = vf,
                                  vf_optimiser     = vf_optimiser, 
                                  discount_factor  = discount_factor, 
                                  agent_id         = agent_id,
                                  init_state       = state0,
                                  batch_size       = batch_size,
                                 position_penalty  = 0.1)]

    agents.append(rl_agents)
    
    agents = [item for sublist in agents for item in sublist]
    
    return agents

agents = reset2(agents)

In [13]:
time_periods = 500
n_episodes = 1500
rl_profit = []
market_prices = []
inv1_pos = []
inv2_pos = []
market_volume = []
atps = np.array([np.nan])
stack_autocorrels = False

save_count = 10
mm_volumes = np.zeros(int(time_periods / save_count))
random_volumes = np.zeros(int(time_periods / save_count))
investor_volumes = np.zeros(int(time_periods / save_count))
trend_volumes = np.zeros(int(time_periods / save_count))
rl_volumes = np.zeros(int(time_periods / save_count))

mm_pnls = np.zeros(int(time_periods / save_count))
random_pnls = np.zeros(int(time_periods / save_count))
investor_pnls = np.zeros(int(time_periods / save_count))
trend_pnls = np.zeros(int(time_periods / save_count))
rl_pnls = np.zeros(int(time_periods / save_count))

bps = []
sps = []

#pd.DataFrame([stacked_volumes, stacked_volumes])
all_traded_volumes = np.zeros((n_episodes, 5, int(time_periods / save_count)))
all_total_volumes = np.zeros((n_episodes, int(time_periods / save_count)))
all_pnls_volumes = np.zeros((n_episodes, 5, int(time_periods / save_count)))
all_policy_loss = []
all_qf_loss = []
all_vf_loss = []
all_rl_end_rewards = np.zeros(n_episodes)
all_mus_1 = []
all_mus_2 = []
all_rl_positions = np.zeros((n_episodes, time_periods))
old_data = np.load('rl_market_experiment_data.npy',allow_pickle='TRUE').item()
old_rf_data = old_data.copy()

overwrite_old = False

def run_episode(episode):
    agents = reset1()
    agents = reset2(agents)
    agents[-1].load_models(file_qf = "nn_models/qf_last_dropout_new_loss",
                          file_vf = "nn_models/vf_last_dropout_new_loss",
                          file_policy = "nn_models/policy_last_dropout_new_loss")
#    for episode in range(n_episodes):
    mm_volumes = np.zeros(int(time_periods / save_count))
    random_volumes = np.zeros(int(time_periods / save_count))
    investor_volumes = np.zeros(int(time_periods / save_count))
    trend_volumes = np.zeros(int(time_periods / save_count))
    rl_volumes = np.zeros(int(time_periods / save_count))

    mm_pnls = np.zeros(int(time_periods / save_count))
    random_pnls = np.zeros(int(time_periods / save_count))
    investor_pnls = np.zeros(int(time_periods / save_count))
    trend_pnls = np.zeros(int(time_periods / save_count))
    rl_pnls = np.zeros(int(time_periods / save_count))
    # TO REDUCE RUNTIME RESET AGENTS AND LOAD MODELS
#     if episode % 2 == 0 and episode != 0:
#         all_vf_loss.append(agents[-1].vf_loss_mem)
#         all_qf_loss.append(agents[-1].qf_loss_mem)
#         all_policy_loss.append(agents[-1].policy_loss_mem)
#         agents = reset1()
#         agents = reset2(agents)
#         #agents[-1].load_models("nn_models/policy_nn_nn", "nn_models/qf_nn_nn", "nn_models/vf_nn_nn")
#         agents[-1].load_models()

    agents[-1].reset()
    env = MarketEnvironment(state0, use_last_traded_price = True)
    # Initialize agents
    for j in range(n_agents):
            agents[j].reset()
            agents[j].update(state0)

    # START EPISODE
    rl_profit = []
    rl_reward = []
    rl_positions = []
    agent_class = []
    rl_buy_prices = []
    rl_sell_prices = []
    rl_buy_volume = []
    rl_sell_volume = []
    market_prices = []
    mus = np.array([[0, 0, 0, 0]])
    for time in range(time_periods):

        agents, state = env.step(agents)

        for j in range(n_agents):
            agents[j].update(state)


        #################################
        # RL AGENT UPDATE
        #################################
        if time % 10 == 0:# or episode < 10:
            agents[-1].update(state, exploration_mode = True)
        else:
            agents[-1].update(state, exploration_mode = False)

        # Store RL DATA
        agent_class.append(agents[-1].agent_class)
        rl_profit.append(agents[-1].pnl)
        rl_reward.append(agents[-1].memory.rewards[-1].detach().numpy())
        rl_positions.append(agents[-1].position)
        rl_sell_prices.append(agents[-1].sell_price)
        rl_buy_prices.append(agents[-1].buy_price)
        rl_sell_volume.append(agents[-1].sell_volume)
        rl_buy_volume.append(agents[-1].buy_volume)

        # mus
        #mus.append([agents[-1].mu1, agents[-1].mu2, agents[-1].mu3, agents[-1].mu4])
        mus = np.vstack((mus, [agents[-1].mu1.detach().numpy(), agents[-1].mu2.detach().numpy(), 
                               3, 3]))

        #########################################
        # SAVE VOLUMES
        #########################################

        if save_count:

            if time % save_count == 0:
                for agent in agents:
                    agent.calculate_profit_and_loss(state)
                    if agent.agent_class == "MM":
                        mm_volumes[int(time / save_count)] += np.abs(agent.all_trades[:, 1]).sum()
                        mm_pnls[int(time / save_count)] += agent.pnl
                    elif agent.agent_class == "Random":
                        random_volumes[int(time / save_count)] += np.abs(agent.all_trades[:, 1]).sum()
                        random_pnls[int(time / save_count)] += agent.pnl
                    elif agent.agent_class == "Investor":
                        investor_volumes[int(time / save_count)] += np.abs(agent.all_trades[:, 1]).sum()
                        investor_pnls[int(time / save_count)] += agent.pnl
                    elif agent.agent_class == "Trend":
                        trend_volumes[int(time / save_count)] += np.abs(agent.all_trades[:, 1]).sum()
                        trend_pnls[int(time / save_count)] += agent.pnl
                    elif agent.agent_class == "ActorCritic":
                        rl_volumes[int(time / save_count)] += np.abs(agent.all_trades[:, 1]).sum()
                        rl_pnls[int(time / save_count)] += agent.pnl

        #########################################
        # UPDATE PLOTS
        #########################################

        inv1_pos.append(agents[0].position)
        inv2_pos.append(agents[1].position)
        market_volume.append(state["volume"])
        market_prices.append(state["market_prices"][-1])

    #########################################
    # END OF EPISODE
    #########################################
    if stack_autocorrels:
        mp = pd.DataFrame(atps)
        returns = (mp / mp.shift(1)  - 1).dropna()
        if episode == 0:
            all_stacked_returns = returns.values.flatten()
        else:
            all_stacked_returns = np.hstack((all_stacked_returns, returns.values.flatten()))



    mm_volumes[1:] = mm_volumes[1:] - mm_volumes[:-1]
    random_volumes[1:] = random_volumes[1:] - random_volumes[:-1]
    investor_volumes[1:] = investor_volumes[1:] - investor_volumes[:-1]
    trend_volumes[1:] = trend_volumes[1:] - trend_volumes[:-1]
    rl_volumes[1:] = rl_volumes[1:] - rl_volumes[:-1]
    total_volumes = mm_volumes + investor_volumes + trend_volumes + random_volumes + rl_volumes

    mm_volumes = mm_volumes / total_volumes
    investor_volumes = investor_volumes / total_volumes
    trend_volumes = trend_volumes / total_volumes
    random_volumes = random_volumes / total_volumes
    rl_volumes = rl_volumes / total_volumes

    stacked_volumes = np.vstack((investor_volumes, trend_volumes, random_volumes, mm_volumes, rl_volumes))
    stacked_pnls = np.vstack((investor_pnls / n_investor_agents, trend_pnls / n_trend_agents, 
                              random_pnls / n_random_agents, mm_pnls / n_mm_agents,
                             rl_pnls))

    all_traded_volumes[episode,:,:] = stacked_volumes
    all_pnls_volumes[episode,:,:] = stacked_pnls
    all_total_volumes[episode,:] = total_volumes
    rl_end_rewards = np.sum(agents[-1].memory.rewards[:]).detach().numpy()[0]
    all_rl_end_rewards[episode] = np.sum(agents[-1].memory.rewards[:]).detach().numpy()[0]
    all_rl_positions[episode,:] = rl_positions

    out = {'volumes': all_traded_volumes, 'pnls': all_pnls_volumes,
          'end_rewards': all_rl_end_rewards, 'vf_loss': all_vf_loss,
          'qf_loss': all_qf_loss, 'policy_loss': all_policy_loss,
          'positions': all_rl_positions, 'total_volumes': all_total_volumes}

    #################################
    # VISUALISATION
    #################################

    pol_loss = all_policy_loss
    pol_loss = [item for sublist in pol_loss for item in sublist]

    qf_loss = all_qf_loss
    qf_loss = [item for sublist in qf_loss for item in sublist]

    vf_loss = all_vf_loss
    vf_loss = [item for sublist in vf_loss for item in sublist]

    return stacked_volumes, stacked_pnls, total_volumes, rl_end_rewards, rl_positions

In [14]:
import time
t0 = time.time()
res = run_episode(1)
t1 = time.time()
total = t1-t0

... loading checkpoint ...
... loading checkpoint ...
... loading checkpoint ...


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


In [17]:
from tqdm.contrib.concurrent import process_map
no_episodes_to_run = 5
t0 = time.time()
results = process_map(run_episode, range(no_episodes_to_run = 2), max_workers = 5)
t1 = time.time()
total = t1-t0
print("time: ", total)

ModuleNotFoundError: No module named 'tqdm'