In [2]:
import pandas as pd
import numpy as np
import importlib
import sys
import time
import tqdm
import logging
from utils import *

from importlib import reload
import utils
reload(utils)
from utils import *

pd.set_option('display.float_format', lambda x: '%.3f' % x)


In [3]:

def hold(actions):
    # encourage selling for profit and liquidity
    next_probable_action = np.argsort(actions)[1]
    if next_probable_action == 2 and len(agent.inventory) > 0:
        max_profit = stock_prices[t] - min(agent.inventory)
        if max_profit > 0:
            sell(t)
            actions[next_probable_action] = 1 # reset this action's value to the highest
            return 'Hold', actions

def buy(t):
    if agent.balance > stock_prices[t]:
        agent.balance -= stock_prices[t]
        agent.inventory.append(stock_prices[t])
        return 'Buy: ${:.2f}'.format(stock_prices[t])

def sell(t):
    if len(agent.inventory) > 0:
        agent.balance += stock_prices[t]
        bought_price = agent.inventory.pop(0)
        profit = stock_prices[t] - bought_price
        global reward
        reward = profit
        return 'Sell: ${:.2f} | Profit: ${:.2f}'.format(stock_prices[t], profit)


In [4]:

model_name = 'DQN'
stock_name = '0050_2008_2018'
window_size = 10
num_episode = 5
initial_balance = 50000

stock_prices = stock_close_prices(stock_name)
stock_margin = stock_margins(stock_name)
trading_period = len(stock_prices) - 1  # 訓練期間，input stock data的總日期
returns_across_episodes = []
num_experience_replay = 0
action_dict = {0: 'Hold', 1: 'Buy', 2: 'Sell'}

logging.basicConfig(filename=f'logs/{model_name}_training_{stock_name}.log', filemode='w',
                    format='[%(asctime)s.%(msecs)03d %(filename)s:%(lineno)3s] %(message)s', 
                    datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO)

logging.info(f'Trading Object:           {stock_name}')
logging.info(f'Trading Period:           {trading_period} days')
logging.info(f'Window Size:              {window_size} days')
logging.info(f'Training Episode:         {num_episode}')
logging.info(f'Model Name:               {model_name}')
logging.info('Initial Portfolio Value: ${:,}'.format(initial_balance))

# select learning model
model = importlib.import_module(f'agents.{model_name}')
agent = model.Agent(state_dim=13 + 3, balance=initial_balance,model_name=model_name)

saved_models\DQN_16_dim.h5


In [5]:

import utils
reload(utils)
from utils import *

for e in tqdm.tqdm(range(1, num_episode + 1)):
    logging.info(f'\nEpisode: {e}/{num_episode}')

    agent.reset() # reset to initial balance and hyperparameters
    state = generate_combined_state(0, window_size, stock_prices, stock_margin, agent.balance, len(agent.inventory)) 
    # 將prince_state與portfolio_state 橫向串連起來橫向串連，作為input state

    for t in range(1, trading_period + 1):
        if t % 1000 == 0:
            logging.info(f'\n-------------------Period: {t}/{trading_period}-------------------')

        reward = 0
        next_state = generate_combined_state(t, window_size, stock_prices, stock_margin, agent.balance, len(agent.inventory))
        # display(pd.DataFrame(next_state))
        previous_portfolio_value = len(agent.inventory) * stock_prices[t] + agent.balance
        # print(t,'\ninventory',agent.inventory,'\ninventory len',len(agent.inventory),'\nstock prices',stock_prices[t], \
        #     '\ninventory*stockprices + balance = ',len(agent.inventory)*stock_prices[t],'+', agent.balance,\
        #     '\nprevious portfolio value',previous_portfolio_value,'\n')
        
        if model_name == 'DDPG':
            actions = agent.act(state, t)
            action = np.argmax(actions)
        else:
            actions = agent.model.predict(state)[0]
            action = agent.act(state)
            
        # execute position
        logging.info('Step: {}\tHold signal: {:.4} \tBuy signal: {:.4} \tSell signal: {:.4}'.format(t, actions[0], actions[1], actions[2]))
        if action != np.argmax(actions): logging.info(f"\t\t'{action_dict[action]}' is an exploration.")
        if action == 0: # hold
            execution_result = hold(actions)
        if action == 1: # buy
            execution_result = buy(t)      
        if action == 2: # sell
            execution_result = sell(t)        
        
        # check execution result
        if execution_result is None:
            reward -= treasury_bond_daily_return_rate() * agent.balance  # missing opportunity
        else:
            if isinstance(execution_result, tuple): # if execution_result is 'Hold'
                actions = execution_result[1]
                execution_result = execution_result[0]
            logging.info(execution_result)    
                        
        # calculate reward
        current_portfolio_value = len(agent.inventory) * stock_prices[t] + agent.balance
        unrealized_profit = current_portfolio_value - agent.initial_portfolio_value
        reward += unrealized_profit

        agent.portfolio_values.append(current_portfolio_value)
        agent.return_rates.append((current_portfolio_value - previous_portfolio_value) / previous_portfolio_value)

        done = True if t == trading_period else False
        agent.remember(state, actions, reward, next_state, done)

        # update state
        state = next_state

        # experience replay
        if len(agent.memory) > agent.buffer_size:
            num_experience_replay += 1
            loss,mini_batch = agent.experience_replay()
            logging.info('Episode: {}\tLoss: {:.2f}\tAction: {}\tReward: {:.2f}\tBalance: {:.2f}\tNumber of Stocks: {}'.format(e, loss, action_dict[action], reward, agent.balance, len(agent.inventory)))
            agent.tensorboard.on_batch_end(num_experience_replay, {'loss': loss, 'portfolio value': current_portfolio_value})

        if done:
            portfolio_return = evaluate_portfolio_performance(agent, logging)
            returns_across_episodes.append(portfolio_return)


  0%|          | 0/5 [00:37<?, ?it/s]


KeyboardInterrupt: 

In [25]:
import random
mini_batch
random.sample(range(60),20)

[27, 5, 57, 49, 15, 14, 36, 55, 24, 37, 10, 7, 39, 17, 22, 13, 18, 6, 43, 23]

In [13]:
    if model_name == 'DQN':
        agent.model.save(os.path.join(f'saved_models',f'{model_name}_{agent.state_dim}_dim.h5'))
    elif model_name == 'DDQN':
        agent.model.save('saved_models/DDQN_ep' + str(e) + '.h5')
        agent.model_target.save('saved_models/DDQN_ep' + str(e) + '_target.h5')
    elif model_name == 'DDPG':
        agent.actor.model.save_weights('saved_models/DDPG_ep{}_actor.h5'.format(str(e)))
        agent.critic.model.save_weights('saved_models/DDPG_ep{}_critic.h5'.format(str(e)))
    logging.info('model saved')