In [1]:
from UCLSE.rl_trader import RLTrader
from UCLSE.rl_env import RLEnv
from UCLSE.custom_timer import CustomTimer
from UCLSE.environment import yamlLoad
import os
import numpy as np
import pandas as pd

In this notebook we will see how an Reinforcement Learning experiment can easily be setup in BUCLSE.

Firstly we define a dictionary sufficient to define a trading environment as before.

In [2]:
pa=os.getcwd()
config_name='UCLSE\\test\\fixtures\\mkt_cfg.yml'
config_path=os.path.join(pa,config_name)

environ_dic=yamlLoad(config_path)


def geometric_q():
    return np.random.geometric(0.6)

environ_dic['quantity_f']=geometric_q

timer=CustomTimer(start=0,end=600,step=1/80)

environ_dic['timer']=timer

environ_dic

{'start_time': 0.0,
 'end_time': 600.0,
 'supply_price_low': 95,
 'supply_price_high': 95,
 'demand_price_low': 105,
 'demand_price_high': 105,
 'interval': 30,
 'timemode': 'drip-poisson',
 'buyers_spec': {'GVWY': 10, 'SHVR': 10, 'ZIC': 10, 'ZIP': 10},
 'sellers_spec': {'GVWY': 10, 'SHVR': 10, 'ZIC': 10, 'ZIP': 10},
 'n_trials': 1,
 'trade_file': 'UCLSE\\\\test\\\\output\\\\avg_balance.csv',
 'trade_record': 'UCLSE\\\\test\\\\output\\\\transactions.csv',
 'trial': 1,
 'verbose': False,
 'dump_each_trade': True,
 'quantity_f': <function __main__.geometric_q()>,
 'timer': time: 0 time left: 48000.0 start: 0 end: 600 step: 0.0125}

Next we define a RL trader. This has a parent type trader so shares all of the methods needed to intereact with BUCLSE. We will give it some initial inventory at a certain average cost.

In [3]:
rl_trader=RLTrader( ttype='RL', tid='RL', balance=0, n_quote_limit=100
                   ,inventory=1,direction='Long',avg_cost=120,timer=timer)

adding exchange to RL trader  RL


Just check that the trader has inventory

In [4]:
rl_trader.inventory

1

Next we instantiate the RL environment. This is a subclass of the openai gym gym.EnV
This means it has render and step methods.

The RL environment will set up a Market_session object, (and through that associated exchange, traders and supply_demand objects).

The RL environment will actually iterate through a number of steps until the order book has depth = thresh on both sides

In [5]:
lobenv=RLEnv(RL_trader=rl_trader,environ_dic=environ_dic,thresh=4)


using timer start time=0, end time=600, instead
overwriting timer step size from: 0.01 to 0.0125
adding exchange to RL trader  RL


check what time it is:

In [6]:
timer.time

29.4875

The render method returns the lob

In [7]:
lobenv.render()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,tid,tid
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,otype,Ask,Bid
price,time,qid,qty,Unnamed: 4_level_2,Unnamed: 5_level_2
74.0,29.4625,2261,2,,B35
79.0,29.4875,2263,1,,B33
88.0,29.3375,2251,1,,B34
96.0,29.3,2248,1,,B37
97.0,29.325,2250,2,S06,
97.0,29.375,2254,1,S07,
97.0,29.4125,2257,2,S15,
97.0,29.45,2260,2,S08,
104.0,29.175,2238,1,S25,
104.0,29.3125,2249,1,S24,


The RL trader at this point can now submit actions, see results and receive rewards though the step method.

In [8]:
lobenv.action_dic

{(0, 0, 0): Do nothing,
 (1, 0, 0): cancel Bid,
 (1, -1, 1): Cross bid-ask spread and fill Bid quantity 1 at best,
 (1, 0, 1):  submit or replace Bid with spread 0 and quantity 1,
 (1, 1, 1):  submit or replace Bid with spread 1 and quantity 1,
 (1, 1, 2):  submit or replace Bid with spread 2 and quantity 1,
 (1, 1, 3):  submit or replace Bid with spread 3 and quantity 1,
 (1, 1, 4):  submit or replace Bid with spread 4 and quantity 1,
 (1, 1, 5):  submit or replace Bid with spread 5 and quantity 1,
 (-1, 0, 0): cancel Ask,
 (-1, -1, 1): Cross bid-ask spread and fill Ask quantity 1 at best,
 (-1, 0, 1):  submit or replace Ask with spread 0 and quantity 1,
 (-1, 1, 1):  submit or replace Ask with spread 1 and quantity 1,
 (-1, 2, 1):  submit or replace Ask with spread 2 and quantity 1,
 (-1, 3, 1):  submit or replace Ask with spread 3 and quantity 1,
 (-1, 4, 1):  submit or replace Ask with spread 4 and quantity 1,
 (-1, 5, 1):  submit or replace Ask with spread 1 and quantity 1}

Demonstrate how to submit an action with the standard step method. This returns the state of the lob, reward and a boolean stopping variable.

In [9]:
lobenv.sess.process_verbose=True #turn on the verbal messages from the exchange for exposition
print('LOB before')
print(lobenv.render())

print('RL trader does action (1,0,1) which is to add a bid order at 0 spread to best bid for quantity 1')
_,reward,done,_=lobenv.step((1,0,1)) 
print('LOB after')
print(lobenv.render())

LOB before
                        tid     
otype                   Ask  Bid
price time    qid  qty          
74.0  29.4625 2261 2    NaN  B35
79.0  29.4875 2263 1    NaN  B33
88.0  29.3375 2251 1    NaN  B34
96.0  29.3000 2248 1    NaN  B37
97.0  29.3250 2250 2    S06  NaN
      29.3750 2254 1    S07  NaN
      29.4125 2257 2    S15  NaN
      29.4500 2260 2    S08  NaN
104.0 29.1750 2238 1    S25  NaN
      29.3125 2249 1    S24  NaN
114.0 29.4250 2258 1    S21  NaN
117.0 29.4750 2262 2    S30  NaN
RL trader does action (1,0,1) which is to add a bid order at 0 spread to best bid for quantity 1
oid gen RL_29.4875_1
QUID: order.quid=2264
RESPONSE: Addition
QUID: order.quid=2265
RESPONSE: Overwrite
LOB after
                        tid     
otype                   Ask  Bid
price time    qid  qty          
74.0  29.4625 2261 2    NaN  B35
79.0  29.4875 2263 1    NaN  B33
88.0  29.3375 2251 1    NaN  B34
96.0  29.3000 2248 1    NaN  B37
      29.4875 2264 1    NaN   RL
97.0  29.3250 2250 

Check the last few changes happening at exchange

In [10]:
pd.DataFrame(lobenv.sess.exchange.tape).tail(5)

Unnamed: 0,oid,otype,p1_qid,p2_qid,party1,party2,price,qid,qty,tape_time,tid,time,type
2307,-37,Bid,,,,,74.0,2261.0,2,29.4625,B35,29.4625,New Order
2308,-72,Ask,,,,,117.0,2262.0,2,29.475,S30,29.475,New Order
2309,-35,Bid,,,,,79.0,2263.0,1,29.4875,B33,29.4875,New Order
2310,RL_29.4875_1,Bid,,,,,96.0,2264.0,1,29.4875,RL,29.4875,New Order
2311,-72,Ask,,,,,117.0,2265.0,2,29.5,S30,29.5,New Order


Cancel the order with action (-1,0,0)

In [11]:
_,reward,done,_=lobenv.step((1,0,0))
lobenv.render()

QUID: order.quid=2266
RESPONSE: Overwrite


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,tid,tid
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,otype,Ask,Bid
price,time,qid,qty,Unnamed: 4_level_2,Unnamed: 5_level_2
74.0,29.4625,2261,2,,B35
79.0,29.4875,2263,1,,B33
88.0,29.3375,2251,1,,B34
96.0,29.3,2248,1,,B37
97.0,29.325,2250,2,S06,
97.0,29.375,2254,1,S07,
97.0,29.4125,2257,2,S15,
97.0,29.45,2260,2,S08,
104.0,29.175,2238,1,S25,
104.0,29.3125,2249,1,S24,


In [12]:
pd.DataFrame(lobenv.sess.exchange.tape).tail(5)

Unnamed: 0,oid,otype,p1_qid,p2_qid,party1,party2,price,qid,qty,tape_time,tid,time,type
2309,-35,Bid,,,,,79.0,2263.0,1,29.4875,B33,29.4875,New Order
2310,RL_29.4875_1,Bid,,,,,96.0,2264.0,1,29.4875,RL,29.4875,New Order
2311,-72,Ask,,,,,117.0,2265.0,2,29.5,S30,29.5,New Order
2312,RL_29.4875_1,Bid,,,,,96.0,2264.0,1,29.5,RL,29.4875,Cancel
2313,-63,Ask,,,,,112.0,2266.0,1,29.5125,S21,29.5125,New Order


In [13]:
Lift the ask

SyntaxError: invalid syntax (<ipython-input-13-14a8882b0e0d>, line 1)

In [14]:
_,reward,done,_=lobenv.step((1,-1,1))

print(lobenv)
print(reward,done)
lobenv.render()

oid gen RL_29.5125_2
QUID: order.quid=2267
RESPONSE: Addition
Bid  leg 0  lifts best  Ask 97.0
counterparty S06 price 97.0
Partial fill situation
partial fill passive side  2250.000001 Order(tid='S06', otype='Ask', price=97.0, qty=1, time=29.325, qid=2250.000001, oid=-48)
[{'type': 'Trade', 'tape_time': 29.5125, 'price': 97.0, 'party1': 'S06', 'party2': 'RL', 'qty': 1, 'p1_qid': 2250, 'p2_qid': 2267.0}]
ammend trade  Order(tid='S06', otype='Ask', price=97.0, qty=1, time=29.325, qid=2250.000001, oid=-48)
QUID: order.quid=2269
RESPONSE: Overwrite
<RLEnv instance>
-1 True


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,tid,tid
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,otype,Ask,Bid
price,time,qid,qty,Unnamed: 4_level_2,Unnamed: 5_level_2
74.0,29.4625,2261.0,2,,B35
79.0,29.4875,2263.0,1,,B33
88.0,29.3375,2251.0,1,,B34
96.0,29.525,2269.0,1,,B37
97.0,29.325,2250.000001,1,S06,
97.0,29.375,2254.0,1,S07,
97.0,29.4125,2257.0,2,S15,
97.0,29.45,2260.0,2,S08,
104.0,29.175,2238.0,1,S25,
104.0,29.3125,2249.0,1,S24,


In [15]:
pd.DataFrame(lobenv.sess.exchange.tape).tail(5)

Unnamed: 0,oid,otype,p1_qid,p2_qid,party1,party2,price,qid,qty,tape_time,tid,time,type
2313,-63,Ask,,,,,112.0,2266.0,1,29.5125,S21,29.5125,New Order
2314,RL_29.5125_2,Bid,,,,,97.0,2267.0,1,29.5125,RL,29.5125,New Order
2315,-48,Ask,,,,,97.0,2250.000001,1,29.5125,S06,29.325,Ammend
2316,,,2250.0,2267.0,S06,RL,97.0,,1,29.5125,,,Trade
2317,-39,Bid,,,,,96.0,2269.0,1,29.525,B37,29.525,New Order


In [16]:
print(rl_trader)

[TID: RL type: RL balance: 0 blotter:     BS        oid  order qty  order_issue_time  p1_qid  p2_qid party1 party2  price  profit  qty status tid  time type
0  Buy  RL_23.6_3          1              23.6    1900  1910.0    S37     RL     96       0    1   full  RL  23.6  Bid orders: OrderedDict() n_trades: 1 profitpertime: 0.0]


In [17]:
rl_trader.inventory

2