In [1]:
import warnings
warnings.filterwarnings("ignore")

import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch

from DataLoader.DataLoader import DataLoader
from DataLoader.DataBasedAgent import DataBasedAgent
from DataLoader.DataRLAgent import DataRLAgent
import DeepRLAgent.VanillaInput.Train as Train
from PatternDetectionInCandleStick.Evaluation import Evaluation
import distinctipy

import utils
from importlib import reload
import re
from utils import setup_logger

Train = reload(Train)
DeepRL = Train.Train
utils = reload(utils)
from utils import add_train_portfo, add_test_portfo, plot_return, calc_return, plot_action_point, calc_bh
pd.options.display.max_colwidth = 100

device = "cpu"
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
CURRENT_PATH = os.getcwd()

In [2]:
def train(
    DATASET_NAME, 
    split_point='2018-01-01', 
    begin_date='2010-01-01', 
    end_date='2020-08-24', 
    initial_investment=1000,
    transaction_cost=0.0001,
    load_from_file=True,
    reward_type="profit",
    seed=42, 
    state_mode=1,
    n_episodes=5,
    lamb=0.0001,
    GAMMA=0.7, 
    n_step=5, 
    BATCH_SIZE=10, 
    ReplayMemorySize=20,
    TARGET_UPDATE=5,
    window_size=None, 
    train_portfolios={},
    test_portfolios={},
    arms={},
    show_all = False,
    ratio_threshold=0.9,
):
    data_loader = DataLoader(DATASET_NAME, split_point=split_point, begin_date=begin_date, end_date=end_date, load_from_file=load_from_file)
    
    dataTrain_agent = DataRLAgent(data_loader.data_train, state_mode, 'action_encoder_decoder', device, GAMMA, n_step, BATCH_SIZE, window_size, transaction_cost)
    dataTest_agent = DataRLAgent(data_loader.data_test, state_mode, 'action_encoder_decoder', device, GAMMA, n_step, BATCH_SIZE, window_size, transaction_cost)
    # NOTE 这俩是b&h
    dataTrain_base = DataBasedAgent(data_loader.data_train, data_loader.patterns, 'action_deepRL', device, GAMMA, n_step, BATCH_SIZE, transaction_cost)
    dataTest_base = DataBasedAgent(data_loader.data_test, data_loader.patterns, 'action_deepRL', device, GAMMA, n_step, BATCH_SIZE, transaction_cost)
    
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)

    agent = DeepRL(data_loader, dataTrain_agent, dataTest_agent, 
                DATASET_NAME,  state_mode, window_size, transaction_cost,
                BATCH_SIZE=BATCH_SIZE, GAMMA=GAMMA, ReplayMemorySize=ReplayMemorySize,
                TARGET_UPDATE=TARGET_UPDATE, n_step=n_step, arms=arms)

    agent.train(arms, n_episodes, ratio_threshold, seed, begin_date, end_date)

    agent_eval = agent.test(initial_investment=initial_investment, test_type='train', model_dir="")
    train_portfolio = agent_eval.get_daily_portfolio_value()
    
    agent_test = agent.test(initial_investment=initial_investment, test_type='test', model_dir="")
    test_portfolio = agent_test.get_daily_portfolio_value()

    # 选出最优的arm    
    max_index = int(sorted(arms, key=lambda x: x["theta"], reverse=True)[0]["index"])
    arm = arms[max_index]
    final_reward_type = f"{arm['name']}_{arm['lamb']}_{seed}"

    final_model_name = f'DQN-stock:{DATASET_NAME}-final_reward:{final_reward_type}-epochs:{n_episodes}-seed:{seed}'

    add_train_portfo(train_portfolios, final_model_name, train_portfolio)
    add_test_portfo(test_portfolios, final_model_name, test_portfolio)

    if show_all:
        path = f"./Results/{DATASET_NAME}/{begin_date}~{end_date}/{seed}/train"
        dirs = os.listdir(path)
        for _dir in dirs:
            if f"_{seed}.pkl" not in _dir: continue
            if _dir == f"model_{seed}.pkl": continue
            
            model_dir = f"{path}/{_dir}"
            agent_eval = agent.test(initial_investment=initial_investment, test_type='train', model_dir=model_dir)
            train_portfolio = agent_eval.get_daily_portfolio_value()
            
            agent_test = agent.test(initial_investment=initial_investment, test_type='test', model_dir=model_dir)
            test_portfolio = agent_test.get_daily_portfolio_value()

            reward_type = re.findall("model_(.*?).pkl", _dir)[0]
            model_name = f'DQN-stock:{DATASET_NAME}-reward:{reward_type}-epochs:{n_episodes}-seed:{seed}'

            add_train_portfo(train_portfolios, model_name, train_portfolio)
            add_test_portfo(test_portfolios, model_name, test_portfolio)

    # plot_action_point(
    #     "test", 
    #     dataTrain_agent, 
    #     dataTest_agent, 
    #     data_loader, 
    #     "DQN", 
    #     DATASET_NAME, 
    #     begin=0, end=100
    # )
    
    calc_bh(train_portfolios, test_portfolios, data_loader, initial_investment)
    indexes = calc_return(data_loader, train_portfolios, test_portfolios)
    
    flag_biggest = False
    flag_top_3 = False
    if indexes.T["sharpe_train"][final_model_name] == indexes.T["sharpe_train"].max():
        flag_biggest = True
    if indexes.T["sharpe_train"][final_model_name] in indexes.T["sharpe_train"].sort_values(ascending=False).values[:3]:
        flag_top_3 = True
    
    path = f"./Results/{DATASET_NAME}/{begin_date}~{end_date}/{seed}/train_log/"
    logger, handler = setup_logger(f'{DATASET_NAME}-{seed}-final', f'{path}/{seed}.log')
    logger.info(f"symbol: {DATASET_NAME}, seed: {seed}, final reward type: {indexes.T['sharpe_train'][final_model_name]}")
    logger.info(f"symbol: {DATASET_NAME}, seed: {seed}, top 3: {indexes.T['sharpe_train'].sort_values(ascending=False).values[:3]}")
    logger.info(f"result: biggest: {flag_biggest}, top 3: {flag_top_3}")
    logger.removeHandler(handler)
    return indexes


In [3]:
train_portfolios = {}
test_portfolios = {}

In [4]:
DATASET_NAME = r'AAPL'

initial_investment = 1000

kwargs = {
    "DATASET_NAME": DATASET_NAME, 
    "begin_date": '2016-01-01', 
    "end_date": '2019-01-01', 
    "split_point": '2018-01-01', 
    "load_from_file": True, 
    "transaction_cost": 0.0000,
    "initial_investment": initial_investment,
    "state_mode": 1,
    "seed": 42, 
    "GAMMA": 0.7, 
    "n_step": 5, 
    "BATCH_SIZE": 10, 
    "ReplayMemorySize": 20,
    "TARGET_UPDATE": 5,
    "window_size": None, 
    "train_portfolios": train_portfolios,
    "test_portfolios": test_portfolios,
    "lamb": 0.0,
}

# NOTE reward_types: profit, sharpe, volatility, regularized
# NOTE sharpe: lamb:0.01；volatility: lamb: 10

def add_arm(arms, name, lamb):
    arm = { "index": len(arms), "name": name, "theta": 0, "a": 1, "b": 1, "sharpe_list": [], "cumreturn_list": [], "lamb": lamb, "used": 0},
    arms.extend(arm)


files = os.listdir("./Data/")
ls = []
for _file in files[:1]:
    # NOTE: seed相当于是simulation
    for seed in range(20):
        print(_file, seed)
        
        train_portfolios = {}
        test_portfolios = {}

        # model_files = os.listdir("./Results/AAPL/Train/")
        # for m_file in model_files: os.remove(f"./Results/AAPL/Train/{m_file}")

        arms = []
        add_arm(arms, "old_profit", 0)
        add_arm(arms, "future_profit_1", 0)
        add_arm(arms, "future_profit_10", 0)

        kwargs.update({
            "DATASET_NAME": _file,
            "reward_type": "",
            "seed": seed,
            "n_episodes": 20,
            "arms": arms,
            "show_all": True,
            "ratio_threshold": 3,
            "train_portfolios": train_portfolios,
            "test_portfolios": test_portfolios,
        })
        
        indexes = train(**kwargs)
        ls.append(indexes)

AAPL 0
AAPL 1
AAPL 2
AAPL 3
AAPL 4
AAPL 5
AAPL 6
AAPL 7
AAPL 8


KeyboardInterrupt: 

In [5]:


a = []
for _file in files:
    symbol = _file.replace(".csv", "")
    keys = [key for key in test_portfolios.keys() if symbol in key]
    
    ls = []
    ls2 = []
    for key in keys:
        profit_percentage = pd.DataFrame(test_portfolios[key]).pct_change(1)
        total_return = test_portfolios[key][-1] / test_portfolios[key][0] - 1 
        if profit_percentage.std()[0] > 0:
            sharpe = np.sqrt(252) * profit_percentage.mean()[0] / profit_percentage.std()[0]
        else:
            sharpe = 0
        ls.append(sharpe)
        ls2.append(total_return)
    if len(ls) == 0: continue
    df = pd.read_csv(f"./Data/{_file}/{_file}.csv")
    df = df.iloc[2013:]
    pct = df["Close"].pct_change(1)
    bh_return = df["Close"].iloc[-1] / df["Close"].iloc[0] - 1
    bh_sharpe = np.sqrt(252) * pct.mean() / pct.std()
    a.append(symbol)
    print(f"stock: {symbol}, sharpe: {round(np.median(ls), 4)}, bh_sharpe: {round(bh_sharpe, 4)}, total return: {round(np.median(ls2), 4)}, bh_return: {round(bh_return, 4)}")



In [52]:
# colors = distinctipy.get_colors(len(train_portfolios.items()))
# plot_return("train", DATASET_NAME, data_loader, train_portfolios, test_portfolios, colors, indexes)
# plot_return("test", DATASET_NAME, data_loader, train_portfolios, test_portfolios, colors, indexes)

In [80]:
# indexes.loc[(indexes.index != 'mdd_date_train') & (indexes.index != 'mdd_date_test')].mean(axis=1).round(4)