In [1]:
import numpy as np
from tqdm import tqdm
import logging
import coloredlogs
from keras.models import load_model

from agent import Agent, State
from utils import (
    get_state,
    get_data,
    format_currency,
    format_position,
    show_train_result,
    switch_k_backend_device,
    get_norm_candle,
    get_npast_data,
    get_data_small,
)

debug = True
logging.basicConfig(filename='training.log', encoding='utf-8', level=logging.DEBUG, force=True)
coloredlogs.install(level="DEBUG")
# switch_k_backend_device()

In [2]:
# Agent Params
# window_size = 10
strategy = "t-dqn"
pretrained = False
model_name = 'DQN-Trader'

# Training Params
batch_size = 32
ep_count = 100

In [3]:
filename = './data/btc_hourly_2018-2022.csv'
data = get_data(filename)
price_data = data[:,-3]
split_idx = round(len(data) * 0.7)
train_data, val_data = data[:split_idx], data[split_idx:]
initial_offset = val_data[1, -3] - val_data[0, -3]

# data = get_data_small(filename)
# state = get_state(data, 0, window_size + 1)
# print(state)

In [4]:
def create_state(data, t, lstm_model, balance, invested):
    candle = get_norm_candle(data, t)
    lstm_in = get_npast_data(data, t, 5)
    lstm_out = lstm_model.predict(lstm_in)[0][0]
    state = State(balance, invested, candle, lstm_out)

    return state

# perception models
lstm_model = load_model("../time_series_forecast/models/lstm_model_10")





In [5]:
init_state = create_state(data, 0, lstm_model, 1000, 0)

agent = Agent(init_state.size(), strategy=strategy, pretrained=pretrained, model_name=model_name)

9


In [6]:
def train_model(agent, episode, data, ep_count=100, batch_size=32):
    total_profit = 0
    data_length = len(data) - 1

    avg_loss = []

    state = init_state

    for t in tqdm(range(data_length), total=data_length, leave=True, desc='Episode {}/{}'.format(episode, ep_count)):  
        reward = 0
        next_state = create_state(data, t + 1, lstm_model, state.balance, state.invested)

        # select an action
        action = agent.choose_action(state)

        # BUY
        if action[0] == 1:
            # agent.inventory.append(data[t])
            dollar_amount = action[1] * state.balance
            btc_amount = dollar_amount / price_data[t]

            next_state.balance -= dollar_amount
            next_state.invested += btc_amount
            if debug:
                logging.debug("# train # Buy at: {} | Amount: {}".format(format_currency(price_data[t]), format_currency(dollar_amount)))

        # SELL
        elif action[0] == 2:
            # bought_price = agent.inventory.pop(0)
            btc_amount = action[1] * state.invested
            dollar_amount = price_data[t] * btc_amount

            next_state.balance += dollar_amount
            next_state.invested -= btc_amount

            curr_net = next_state.get_net_worth(price_data[t])
            init_net = init_state.get_net_worth(price_data[t])

            new_profit = curr_net - init_net
            delta = new_profit - total_profit
            reward = delta #max(delta, 0)
            total_profit += delta
            if debug:
                logging.debug("-- train -- Sell at: {} | Amount: {} | Profit: {}".format(
                    format_currency(price_data[t]), format_currency(dollar_amount), format_position(total_profit)))

        # HOLD
        else:
            pass

        done = (t == data_length - 1)
        agent.remember(state.to_array(), action[0], reward, next_state.to_array(), done)

        if len(agent.memory) > batch_size:
            loss = agent.train_experience_replay(batch_size)
            avg_loss.append(loss)

        state = next_state

    if episode % 10 == 0:
        agent.save(episode)

    return (episode, ep_count, total_profit, np.mean(np.array(avg_loss)))


def evaluate_model(agent, data, debug):
    total_profit = 0
    data_length = len(data) - 1

    history = []
    # agent.inventory = []
    
    state = init_state

    for t in range(data_length):        
        reward = 0
        next_state = create_state(data, t + 1, lstm_model, state.balance, state.invested)
        
        # select an action
        action = agent.choose_action(state)

        # BUY
        if action[0] == 1:
            # agent.inventory.append(data[t])
            dollar_amount = action[1] * state.balance
            btc_amount = dollar_amount / price_data[t]

            next_state.balance -= dollar_amount
            next_state.invested += btc_amount
            if debug:
                logging.debug("# Validation # Buy at: {} | Amount: {}".format(format_currency(price_data[t]), format_currency(dollar_amount)))

        # SELL
        elif action[0] == 2:
            # bought_price = agent.inventory.pop(0)
            btc_amount = action[1] * state.invested
            dollar_amount = price_data[t] * btc_amount

            next_state.balance += dollar_amount
            next_state.invested -= btc_amount

            curr_net = next_state.get_net_worth(price_data[t])
            init_net = init_state.get_net_worth(price_data[t])

            new_profit = curr_net - init_net
            delta = new_profit - total_profit
            reward = delta #max(delta, 0)
            total_profit += delta
            if debug:
                logging.debug("# Validation # Sell at: {} | Amount: {} | Profit: {}".format(
                    format_currency(price_data[t]), format_currency(dollar_amount), format_position(total_profit)))

        # HOLD
        else:
            logging.debug("# Validation # HOLD")
            history.append((data[t], "HOLD"))

        done = (t == data_length - 1)
        agent.memory.append((state.to_array(), action[0], reward, next_state.to_array(), done))

        state = next_state
        if done:
            return total_profit, history

In [7]:
for episode in range(1, ep_count + 1):
        train_result = train_model(agent, episode, train_data, ep_count=ep_count,
                                        batch_size=batch_size)
        val_result, _ = evaluate_model(agent, val_data, debug)
        show_train_result(train_result, val_result, initial_offset)

Episode 1/100:   0%|          | 0/23280 [00:00<?, ?it/s][32m2022-05-29 20:53:18[0m [35mLAPTOP-81OAJ65V[0m [34mroot[31004][0m [1;30mDEBUG[0m [32m-- train -- Sell at: $8740.99 | Amount: $0.00 | Profit: +$0.00[0m
[32m2022-05-29 20:53:18[0m [35mLAPTOP-81OAJ65V[0m [34mroot[31004][0m [1;30mDEBUG[0m [32m# train # Buy at: $8728.49 | Amount: $623.71[0m
Episode 1/100:   0%|          | 3/23280 [00:00<19:39, 19.74it/s][32m2022-05-29 20:53:18[0m [35mLAPTOP-81OAJ65V[0m [34mroot[31004][0m [1;30mDEBUG[0m [32m# train # Buy at: $8708.32 | Amount: $6.19[0m
[32m2022-05-29 20:53:18[0m [35mLAPTOP-81OAJ65V[0m [34mroot[31004][0m [1;30mDEBUG[0m [32m# train # Buy at: $8795.90 | Amount: $164.39[0m
Episode 1/100:   0%|          | 5/23280 [00:00<21:02, 18.44it/s][32m2022-05-29 20:53:18[0m [35mLAPTOP-81OAJ65V[0m [34mroot[31004][0m [1;30mDEBUG[0m [32m-- train -- Sell at: $8760.00 | Amount: $521.93 | Profit: +$1.62[0m
[32m2022-05-29 20:53:18[0m [35mLAPTOP-81OAJ65V

KeyboardInterrupt: 