## import package

In [2]:

import pickle
import time
import numpy as np
import pandas as pd
import argparse
import re
from envs import TradingEnv
from agent import DQNAgent
from utils import get_data, get_scaler, maybe_make_dir, plot_all


## config setting

In [2]:
mode='train' # "train" or "test"
stock_name = "tech"
stock_table = "tech_table"
episode=50
batch_size=32
initial_invest=1000000
weights='a trained model weights'

## preprocess setting

In [3]:
maybe_make_dir('weights')
maybe_make_dir('portfolio_val')

timestamp = time.strftime('%Y%m%d%H%M')

data = get_data(stock_name, stock_table)
train = round(data.shape[1]*0.70)
test = train+1
train_data = data[:, :test]
test_data = data[:, test:]

env = TradingEnv(train_data, initial_invest)
state_size = env.observation_space.shape
action_size = env.action_space.n
agent = DQNAgent(state_size, action_size, mode)
scaler = get_scaler(env)

portfolio_value = []

[[0, 60606.0], [0, 16511.0], [0, 4329.0], [0, 139860.0], [0, 136986.0], [0, 33.0], [0, 121.125], [0, 462.0], [0, 14.300000190734863], [0, 14.600000381469728], [0, 2000000]]
243
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 32)                384       
                                                                 
 dense_1 (Dense)             (None, 32)                1056      
                                                                 
 dense_2 (Dense)             (None, 243)               8019      
                                                                 
Total params: 9,459
Trainable params: 9,459
Non-trainable params: 0
_________________________________________________________________
None


In [4]:

if mode == 'test':
    # remake the env with test data
    env = TradingEnv(test_data, initial_invest)
    # load trained weights
    agent.load(weights)
    # when test, the timestamp is same as time when weights was trained
    timestamp = re.findall(r'\d{12}', weights)[0]
    # daily_portfolio_value = [env.init_invest]
    daily_portfolio_value = []

for e in range(episode):
    state = env.reset()

    # print(state)
    state = scaler.transform([state])
    for time in range(env.n_step):
        action = agent.act(state)
        next_state, reward, done, info = env.step(action)
        # next_state[-1] = next_state[-1][0] # for test
        next_state = scaler.transform([next_state])
        if mode == 'train':
            agent.remember(state, action, reward, next_state, done)
        if mode == "test":
            daily_portfolio_value.append(info['cur_val'])
        state = next_state
        if done:

            # if args.mode == "test" and e % 100 == 0:
                # plot_all(stock_name, daily_portfolio_value, env, test + 1)
            daily_portfolio_value = []
            print("episode: {}/{}, episode end value: {}".format(
                e + 1, episode, info['cur_val']))
            portfolio_value.append(info['cur_val']) # append episode end portfolio value

            break
        if mode == 'train' and len(agent.memory) > batch_size:
            agent.replay(batch_size)
    if mode == 'train' and (e + 1) % 10 == 0:  # checkpoint weights
        agent.save('weights/{}-dqn.h5'.format(timestamp))

print("mean portfolio_val:", np.mean(portfolio_value))
print("median portfolio_val:", np.median(portfolio_value))
# save portfolio value history to disk
with open('portfolio_val/{}-{}.p'.format(timestamp, mode), 'wb') as fp:
    pickle.dump(portfolio_value, fp)



episode: 1/50, episode end value: 1015988.0100250244
episode: 2/50, episode end value: 944455.9788703918
episode: 3/50, episode end value: 1182673.9769935608


KeyboardInterrupt: 

In [5]:
env.observation_space.shape
env.observation_space.high,env.observation_space.low
# help(env.observation_space)

(array([6.06060000e+04, 1.65110000e+04, 4.32900000e+03, 1.39860000e+05,
        1.36986000e+05, 3.30000000e+01, 1.21125000e+02, 4.62000000e+02,
        1.43000002e+01, 1.46000004e+01, 2.00000000e+06]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))

In [6]:

import itertools
action_combo = list(map(list, itertools.product([0, 1, 2], repeat=10)))
print(env.n_stock,len(action_combo))
action_combo
# plot_all(stock_name, daily_portfolio_value, env, test + 1)

5 59049


[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 2],
 [0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 1, 1],
 [0, 0, 0, 0, 0, 0, 0, 0, 1, 2],
 [0, 0, 0, 0, 0, 0, 0, 0, 2, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 2, 1],
 [0, 0, 0, 0, 0, 0, 0, 0, 2, 2],
 [0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 1, 0, 1],
 [0, 0, 0, 0, 0, 0, 0, 1, 0, 2],
 [0, 0, 0, 0, 0, 0, 0, 1, 1, 0],
 [0, 0, 0, 0, 0, 0, 0, 1, 1, 1],
 [0, 0, 0, 0, 0, 0, 0, 1, 1, 2],
 [0, 0, 0, 0, 0, 0, 0, 1, 2, 0],
 [0, 0, 0, 0, 0, 0, 0, 1, 2, 1],
 [0, 0, 0, 0, 0, 0, 0, 1, 2, 2],
 [0, 0, 0, 0, 0, 0, 0, 2, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 2, 0, 1],
 [0, 0, 0, 0, 0, 0, 0, 2, 0, 2],
 [0, 0, 0, 0, 0, 0, 0, 2, 1, 0],
 [0, 0, 0, 0, 0, 0, 0, 2, 1, 1],
 [0, 0, 0, 0, 0, 0, 0, 2, 1, 2],
 [0, 0, 0, 0, 0, 0, 0, 2, 2, 0],
 [0, 0, 0, 0, 0, 0, 0, 2, 2, 1],
 [0, 0, 0, 0, 0, 0, 0, 2, 2, 2],
 [0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
 [0, 0, 0, 0, 0, 0, 1, 0, 0, 2],
 [0, 0, 0,

In [None]:
print(env.cur_step,'',env.stock_price[0],env.stock_owned[0])
display(env.stock_price)
display(pd.DataFrame(env.stock_price_history).head())
display(pd.DataFrame(env.stock_price_history[:,512]).head())
display(pd.DataFrame(env.stock_owned))

512  40.99937438964844 0


array([  40.99937439,   75.09999847,   94.80000305,   70.09999847,
         84.69999695,   29.        ,  544.        ,   23.45000076,
        646.        ,   16.14999962,  142.        ,   83.40000153,
         18.39999962,  450.        ,  322.        ,  215.5       ,
        252.        ,   63.09999847,  281.85327148,   83.80000305,
         10.5       ,  109.5       ,   11.80000019,    6.98999977,
         15.95454502,   22.03922272,   20.20464325,   41.81818008,
         41.5       ,   25.39984512,   19.42307663,   32.90000153,
         13.55631828,   23.10000038,   23.45477295,  309.        ,
       4630.        ,  212.        ,  108.5       ,   68.40000153,
         66.        ,   20.32172012, 1010.        ,   89.09999847,
         47.09999847,  273.10604858])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,503,504,505,506,507,508,509,510,511,512
0,29.571323,30.218575,30.097216,30.178122,30.906281,30.987186,31.108545,30.339935,30.420841,30.623108,...,40.332718,40.951756,40.475574,40.380337,40.570808,40.761284,41.094612,41.142231,40.951756,40.999374
1,67.199997,67.5,67.5,67.300003,67.900002,67.599998,67.599998,67.900002,68.099998,68.699997,...,73.699997,74.0,73.599998,75.0,74.599998,74.400002,74.5,74.199997,75.0,75.099998
2,98.099998,98.5,98.5,98.400002,99.5,102.5,101.0,101.0,101.5,102.0,...,92.699997,94.5,93.699997,94.0,93.699997,94.0,94.800003,95.199997,94.800003,94.800003
3,77.0,78.199997,78.0,78.0,79.5,81.699997,82.699997,82.0,81.099998,82.0,...,68.900002,70.300003,69.800003,69.900002,70.0,69.800003,70.400002,70.400002,70.099998,70.099998
4,103.5,103.5,104.5,104.0,105.5,110.0,108.0,109.0,108.5,109.0,...,82.5,84.300003,84.0,83.900002,83.900002,84.900002,85.400002,85.199997,84.699997,84.699997


Unnamed: 0,0
0,40.999374
1,75.099998
2,94.800003
3,70.099998
4,84.699997


Unnamed: 0,0
0,0
1,0
2,0
3,0
4,200
5,200
6,200
7,200
8,200
9,0


In [None]:
print(len(env._get_obs()), env.n_stock+env.n_stock+1)
print(sum(env.stock_owned*env.stock_price),env.cash_in_hand)

for a,b in zip(env.stock_owned,env.stock_price):
    print(a,b)


93 93
1228229.9991607666 6398.248481750488
0 40.99937438964844
0 75.0999984741211
0 94.8000030517578
0 70.0999984741211
200 84.69999694824219
200 29.0
200 544.0
200 23.450000762939453
200 646.0
0 16.149999618530273
0 142.0
0 83.4000015258789
1000 18.39999961853028
1000 450.0
1000 322.0
800 215.5
0 252.0
0 63.09999847412109
0 281.853271484375
0 83.80000305175781
0 10.5
0 109.5
0 11.800000190734863
0 6.989999771118164
0 15.954545021057127
0 22.03922271728516
0 20.20464324951172
0 41.81818008422852
0 41.5
0 25.399845123291016
0 19.423076629638672
0 32.900001525878906
0 13.556318283081056
0 23.100000381469727
0 23.45477294921875
0 309.0
0 4630.0
0 212.0
0 108.5
0 68.4000015258789
0 66.0
0 20.321720123291016
0 1010.0
0 89.0999984741211
0 47.09999847412109
0 273.1060485839844


In [None]:
action_vec = action_combo[8]
print(action_vec)
for i,a in enumerate(action_vec):
    print(i,a,f'action is {a}')
    for j in range(i, 4 * i):
        print(f' ({i},{a})  i,a \n',j,'← j')


[0, 0, 2, 2]
0 0 action is 0
1 0 action is 0
 (1,0)  i,a 
 1 ← j
 (1,0)  i,a 
 2 ← j
 (1,0)  i,a 
 3 ← j
2 2 action is 2
 (2,2)  i,a 
 2 ← j
 (2,2)  i,a 
 3 ← j
 (2,2)  i,a 
 4 ← j
 (2,2)  i,a 
 5 ← j
 (2,2)  i,a 
 6 ← j
 (2,2)  i,a 
 7 ← j
3 2 action is 2
 (3,2)  i,a 
 3 ← j
 (3,2)  i,a 
 4 ← j
 (3,2)  i,a 
 5 ← j
 (3,2)  i,a 
 6 ← j
 (3,2)  i,a 
 7 ← j
 (3,2)  i,a 
 8 ← j
 (3,2)  i,a 
 9 ← j
 (3,2)  i,a 
 10 ← j
 (3,2)  i,a 
 11 ← j


In [3]:
np.random.rand()

0.6528565952146332