<a href="https://colab.research.google.com/github/zzzevaka/qlearn/blob/master/Test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/zzzevaka/qlearn.git

Cloning into 'qlearn'...
remote: Enumerating objects: 14, done.[K
remote: Counting objects: 100% (14/14), done.[K
remote: Compressing objects: 100% (10/10), done.[K
remote: Total 14 (delta 2), reused 13 (delta 1), pack-reused 0[K
Unpacking objects: 100% (14/14), done.


In [0]:
import numpy as np
import pandas as pd

In [0]:
from qlearn.agents.dqn_agent import DQNAgent
from qlearn.envs.trading_env import TradingEnv

In [0]:
data = pd.read_csv('qlearn/brent.csv', index_col='index', parse_dates=['index'])

In [7]:
diff_data = data.diff()[1:]
def normalize(x):
    for k, v in x.items():
        if v < 0:
            x[k] = 1
        elif v > 0:
            x[k] = 3
        else:
            x[k] = 2
    return x

diff_data = diff_data.apply(normalize, axis=1)
diff_data.shape

(107360, 5)

In [0]:
def reshape_with_window(df, difference_values, columns, shape3d=False):
    m, f = df.shape
    columns = list(columns)
    ret_df = df[columns].values
    if shape3d:
        ret_df = ret_df.reshape(m, 1, f)
    max_diff_value = max(difference_values)
    for t in difference_values:
        new_columns = ['{}_{}'.format(c, t) for c in columns]
        tmp_df = df.drop(columns, axis=1)
        tmp_df[new_columns] = df[columns].shift(t)
        tmp_df = tmp_df.values
        if shape3d:
            tmp_df = tmp_df.reshape(m, 1, f)
        ret_df = np.hstack([tmp_df, ret_df])
    return ret_df[max_diff_value:]

In [0]:
reshape_columns = ['<OPEN>', '<HIGH>', '<LOW>', '<CLOSE>', '<VOL>']
df = reshape_with_window(diff_data, [1,2,3,4,5,6,7], reshape_columns, True)

In [14]:
X = df[:-1]
Y = diff_data[-df.shape[0]:]['<CLOSE>'][1:]

print(X.shape)
print(Y.shape)

(107352, 8, 5)
(107352,)


In [0]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, shuffle=False)

In [0]:
train_env = TradingEnv(X_train, Y_train, order_close_threshold=0.1, order_tax=0.015)
state_size = train_env.observation_space.shape[1]
action_size = train_env.action_space.n

In [0]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, Flatten
from tensorflow.keras.optimizers import Adam

def create_model(df):
    model = Sequential()
    model.add(LSTM(20, input_shape=df.shape[1:]))
    model.add(Dropout(0.1))
    model.add(Flatten(input_shape=df.shape[1:]))
    model.add(Dense(10, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(10, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(action_size, activation='softmax'))
    model.compile(loss='mse', optimizer='Adam', metrics=['accuracy'])
    
    return model

model = create_model(X_train)

In [26]:
agent = DQNAgent(model=model, action_size=action_size)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 20)                2080      
_________________________________________________________________
dropout_3 (Dropout)          (None, 20)                0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 20)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                210       
_________________________________________________________________
dropout_4 (Dropout)          (None, 10)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 10)                110       
_________________________________________________________________
dropout_5 (Dropout)          (None, 10)               

In [0]:
import logging
logging.basicConfig(level=logging.INFO)

In [28]:
EPISODES = 2
BATCH_SIZE = 512

for e in range(EPISODES):
    state = train_env.reset()
    actions = [0,0,0]
    for step in range(train_env.n_step):
        action = agent.act(state)
        actions[action] += 1
        next_state, reward, done, info = train_env.step(action)
        agent.remember(state, action, reward, next_state, done)
        
        if done:
            logging.info(f'episode: {e+1}/{EPISODES}, end value: {train_env.wallet_amount}. actions: {actions}')
        if len(agent.memory) > BATCH_SIZE:
            agent.replay(BATCH_SIZE)

DEBUG:root:step: 1, reward: -1.03, amount: 179.4
DEBUG:root:step: 2, reward: -1.03, amount: 161.89000000000001
DEBUG:root:step: 3, reward: 0.97, amount: 177.41000000000003
DEBUG:root:step: 4, reward: -1.03, amount: 159.90000000000003
DEBUG:root:step: 5, reward: -1.03, amount: 144.45000000000005
DEBUG:root:step: 6, reward: -1.03, amount: 130.03000000000006
DEBUG:root:step: 7, reward: 0, amount: 130.03000000000006
DEBUG:root:step: 8, reward: 0, amount: 130.03000000000006
DEBUG:root:step: 9, reward: 1.9849999999999999, amount: 155.83500000000006
DEBUG:root:step: 10, reward: -2.045, amount: 125.16000000000007
DEBUG:root:step: 11, reward: 0, amount: 125.16000000000007
DEBUG:root:step: 12, reward: -2.015, amount: 100.98000000000008
DEBUG:root:step: 13, reward: -2.0149999999999997, amount: 80.83000000000007
DEBUG:root:step: 14, reward: 1.985, amount: 96.71000000000006
DEBUG:root:step: 15, reward: 1.9549999999999998, amount: 114.30500000000006
DEBUG:root:step: 16, reward: -1.015, amount: 103.1

KeyboardInterrupt: ignored