In [9]:
import warnings
warnings.filterwarnings("ignore")

import re
import os
import random
import collections
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch

from DataLoader.DataLoader import DataLoader
from DataLoader.DataBasedAgent import DataBasedAgent
from DataLoader.DataRLAgent import DataRLAgent
import DeepRLAgent.VanillaInput.Train as Train
from PatternDetectionInCandleStick.Evaluation import Evaluation
import distinctipy

from importlib import reload

Train = reload(Train)
DeepRL = Train.Train
from utils_best_arm import add_train_portfo, add_test_portfo, plot_return, calc_return, plot_action_point, setup_logger
pd.options.display.max_colwidth = 100

device = "cpu"
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
CURRENT_PATH = os.getcwd()

In [10]:
def find_best_arm(_file, begin_date, end_date):
    # TODO: find best arm of all seeds
    rewards = []
    for seed in range(100):
        path = f"./Results/{_file}/{begin_date}~{end_date}/{seed}/train_log/{seed}.log"
        with open(path, "r", encoding="utf8") as f:
            content = f.read()
            reward = re.findall(f"final reward type: (.*?)_{seed}\n", content)[0]
            rewards.append(reward)

    best_reward = sorted(collections.Counter(rewards,).items(), key=lambda x: x[1], reverse=True)[0][0]
    return best_reward


_file = "AAPL"
begin_date, end_date = "2016-01-01", "2019-01-01"
find_best_arm(_file, begin_date, end_date)

'regularized_0.1'

In [11]:
def train(
    DATASET_NAME, 
    split_point='2018-01-01', 
    begin_date='2010-01-01', 
    end_date='2020-08-24', 
    initial_investment=1000,
    transaction_cost=0.0001,
    load_from_file=True,
    reward_type="profit",
    seed=42, 
    state_mode=1,
    n_episodes=5,
    lamb=0.0001,
    GAMMA=0.7, 
    n_step=5, 
    BATCH_SIZE=10, 
    ReplayMemorySize=20,
    TARGET_UPDATE=5,
    window_size=None, 
    train_portfolios={},
    test_portfolios={},
    arms=[],
    show_all = False,
    ratio_threshold=0.9,
):
    data_loader = DataLoader(DATASET_NAME, split_point=split_point, begin_date=begin_date, end_date=end_date, load_from_file=load_from_file)
    
    dataTrain_agent = DataRLAgent(data_loader.data_train, state_mode, 'action_encoder_decoder', device, GAMMA, n_step, BATCH_SIZE, window_size, transaction_cost)
    dataTest_agent = DataRLAgent(data_loader.data_test, state_mode, 'action_encoder_decoder', device, GAMMA, n_step, BATCH_SIZE, window_size, transaction_cost)
    
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)

    agent = DeepRL(data_loader, dataTrain_agent, dataTest_agent, 
                DATASET_NAME,  state_mode, window_size, transaction_cost,
                BATCH_SIZE=BATCH_SIZE, GAMMA=GAMMA, ReplayMemorySize=ReplayMemorySize,
                TARGET_UPDATE=TARGET_UPDATE, n_step=n_step, arms=arms)
    
    path = f"./Results/{DATASET_NAME}/{begin_date}~{end_date}/{seed}/train"
    
    model_path = f"{path}/model_{reward_type}_{seed}.pkl"
    agent_test = agent.test(initial_investment=initial_investment, test_type='test', model_path=model_path)
    test_portfolio = agent_test.get_daily_portfolio_value()
    test_portfolio = pd.Series(test_portfolio).pct_change(1).fillna(0).values.tolist() 
    model_name = f'DQN-stock:{DATASET_NAME}-reward:{reward_type}-seed:{seed}'
    return data_loader, {"name": model_name, "portfo": test_portfolio}

In [32]:
initial_investment = 1000

kwargs = {
    "load_from_file": True, 
    "transaction_cost": 0.0000,
    "initial_investment": initial_investment,
    "state_mode": 1,
    "GAMMA": 0.7, 
    "n_step": 5, 
    "BATCH_SIZE": 10, 
    "ReplayMemorySize": 20,
    "TARGET_UPDATE": 5,
    "window_size": None, 
    "lamb": 0.0,
}

_begin_date = '20{}-01-01'
_end_date = '20{}-01-01'
_split_point = '20{}-01-01' 

arms = [
    { "name": "profit", "lamb": 0},
    { "name": "regularized", "lamb": 0.01},
    { "name": "regularized", "lamb": 0.05},
    { "name": "regularized", "lamb": 0.1},
    { "name": "regularized", "lamb": 0.2},
    { "name": "sharpe", "lamb": 0.01 },
    { "name": "volatility", "lamb": 10 },
]

results = []
files = sorted(os.listdir("./Data/"))
for _file in files[9:10]:
    
    results2 = []
    for seed in tqdm(range(100)):
        
        ls = []
        bhs = []
    
        train_portfolios = {}
        test_portfolios = {}
        
        for year in range(4):
            begin_date = _begin_date.format(16+year)
            end_date = _end_date.format(19+year)
            split_point = _split_point.format(18+year)

            arm = find_best_arm(_file, begin_date, end_date)
            
            kwargs.update({
                "begin_date": begin_date, 
                "end_date": end_date, 
                "split_point": split_point, 
                "DATASET_NAME": _file,
                "reward_type": arm,
                "seed": seed,
                "n_episodes": 140,
                # NOTE 这个arms没有用的
                "arms": [{"name": "profit", "lamb": 0}],
                "show_all": True,
                "ratio_threshold": 3,
                "train_portfolios": train_portfolios,
                "test_portfolios": test_portfolios,
            })

            data_loader, model = train(**kwargs)
            ls.extend(model["portfo"])
            bh = data_loader.data_test_with_date["close"]
            bhs.append(bh)
        
        add_test_portfo(test_portfolios, seed, ls)
        if seed == 0: 
            bhs = pd.concat(bhs, axis=0)
            bh_percentage = bhs.pct_change(1).fillna(0).values
            add_test_portfo(test_portfolios, 'B&H', bh_percentage)
        indexes = calc_return(bh_percentage, test_portfolios)
        results2.append(indexes)

    results2_df = pd.concat(results2, axis=1)
    results2_bh = results2_df["B&H"]
    del results2_df["B&H"]
    final = pd.concat([
        results2_bh,
        results2_df.median(axis=1)
    ], axis=1)
    final.columns = [f"{_file}-B&H", f"{_file}-concat"]
    results.append(final)


100%|██████████| 100/100 [05:23<00:00,  3.23s/it]


In [33]:
results_df = pd.concat(results, axis=1)
results_df

Unnamed: 0,GS-B&H,GS-concat
sortino_test,0.716385,4.669564
sharpe_test,0.522315,2.860998
risk_test,0.338601,0.234699
mdd_test,0.487488,0.258174
downrisk_test,0.246873,0.143141
cumreturn_test,0.613265,12.232069


In [7]:
# results_df.to_csv("concat-single-reward.csv")

In [36]:
df = pd.read_csv("concat-single-reward.csv", index_col=[0])
df

Unnamed: 0,AAPL-B&H,AAPL-concat,AMGN-B&H,AMGN-concat,AXP-B&H,AXP-concat,BA-B&H,BA-concat,CAT-B&H,CAT-concat,CRM-B&H,CRM-concat,CSCO-B&H,CSCO-concat,CVX-B&H,CVX-concat,DIS-B&H,DIS-concat,GS-B&H,GS-concat
sortino_test,1.72205,3.423331,0.660905,4.175568,0.771248,3.464067,0.130443,2.645072,0.604627,3.617785,1.077725,3.772771,0.852705,4.223911,0.312703,3.389748,0.630877,3.988766,0.716385,4.669564
sharpe_test,1.277563,2.278933,0.466487,2.337278,0.556007,2.082764,0.095264,1.684173,0.449483,2.361963,0.78762,2.315632,0.668477,2.719119,0.252304,2.043845,0.437243,2.255794,0.522315,2.860998
risk_test,0.328437,0.248308,0.267469,0.193599,0.379724,0.304021,0.518244,0.417461,0.333272,0.23735,0.367264,0.284562,0.292008,0.188063,0.362109,0.247447,0.314345,0.230827,0.338601,0.234699
mdd_test,0.385159,0.264536,0.245339,0.106565,0.496385,0.343494,0.779201,0.572681,0.433589,0.168368,0.357158,0.202288,0.419511,0.144414,0.557739,0.257398,0.431062,0.194985,0.487488,0.258174
downrisk_test,0.243663,0.164526,0.188788,0.109506,0.273751,0.181743,0.37848,0.265422,0.247756,0.152236,0.268403,0.17327,0.228919,0.119539,0.292166,0.149022,0.217864,0.128429,0.246873,0.143141
cumreturn_test,3.311191,7.375308,0.428277,4.681843,0.752791,9.457958,-0.288336,10.839464,0.455552,6.644689,1.433962,11.163046,0.839542,6.303179,0.105502,5.702894,0.424589,5.95571,0.613265,12.232069
