In [70]:
import pandas as pd
import os
import optuna
import numpy as np

In [71]:
folder = "/mnt/c/SierraChart/Data"
file = "lsc-01-volumebars-350.txt"

fn = os.path.join(folder, file)

In [72]:
df = pd.read_csv(fn)
df.tail().T

Unnamed: 0,1111,1112,1113,1114,1115
Date,2022/11/1,2022/11/1,2022/11/1,2022/11/1,2022/11/1
Time,14:12:40.433,14:28:36.909,14:56:25.721,15:31:18.338,17:26:38.862
Open,87.28,87.04,87.15,87.28,87.55
High,87.3,87.18,87.31,87.64,87.69
Low,87.04,87.0,87.03,87.18,87.54
Last,87.04,87.14,87.28,87.55,87.62
Volume,350,350,350,350,42
# of Trades,283,302,243,220,22
OHLC Avg,87.17,87.09,87.19,87.41,87.6
HLC Avg,87.13,87.11,87.21,87.46,87.62


In [73]:
def clean_data(df):
    # fix colum names
    df.columns = [x.strip() for x in df.columns]
    
    # create dt column
    df['dt'] = df['Date'].str.cat(df.Time, sep=" ")
    df['dt'] = pd.to_datetime(df.dt).dt.tz_localize("US/Mountain")
    df.set_index("dt", inplace=True)
    df.sort_index(inplace=True)
    
    # print(df.columns)
    
    keep_cols = [
        # 'Date', 'Time', 
        'Open', 'High', 'Low', 'Last', 
        # 'Volume', 
        # '# of Trades',
        # 'OHLC Avg', 'HLC Avg', 'HL Avg', 'Bid Volume', 'Ask Volume', 
        'ADX',
        'Buy', 
        'Sell', 
        'MACD', 
        'MA of MACD', 
        'MACD Diff', 
        # 'Line', 
        'EOT', 
        # 'Null',
        # '0.89', 
        # '-0.89', 
        # 'TR',
        # 'Open.1', 
        # 'High.1', 
        # 'Low.1', 
        # 'Last.1', 
        # 'Open.2',
        # 'High.2', 
        # 'Low.2', 
        # 'Last.2', 
        # 'dt'
    ]
    
    float_columns = ['Open', 'High', 'Low', 'Last', ]
    for col in float_columns:
        df[col] = df[col].astype(float)
    
    return df[keep_cols]


def format_column_names(df):
    df.columns = [x.lower().replace(" ", "_") for x in df.columns]
    return df


def gen_features(df):
    from forecasters.features.macd import detect_crossovers_vline
    df['buysell_macd'] = detect_crossovers_vline(df.macd, y_value=0.0)
    return df

# extract data
df = pd.read_csv(fn)

# transform data
df = clean_data(df)
df = format_column_names(df)
df = gen_features(df)

In [75]:
df.tail()

Unnamed: 0_level_0,open,high,low,last,adx,buy,sell,macd,ma_of_macd,macd_diff,eot,buysell_macd
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2022-11-01 14:12:40.433000-06:00,87.28,87.3,87.04,87.04,10.827,0.0,0.0,-0.071,-0.051,-0.019,0.79,0
2022-11-01 14:28:36.909000-06:00,87.04,87.18,87.0,87.14,10.411,0.0,0.0,-0.072,-0.059,-0.013,0.78,0
2022-11-01 14:56:25.721000-06:00,87.15,87.31,87.03,87.28,10.629,87.01,0.0,-0.052,-0.06,0.008,0.77,0
2022-11-01 15:31:18.338000-06:00,87.28,87.64,87.18,87.55,12.039,0.0,0.0,-0.007,-0.049,0.042,0.78,0
2022-11-01 17:26:38.862000-06:00,87.55,87.69,87.54,87.62,13.502,0.0,0.0,0.038,-0.029,0.067,0.81,1


In [76]:
def frange(start, stop, step=1):
    while start < stop:
        yield start
        start += step

def geo_mean(iterable):
    a = np.array(iterable)
    return a.prod()**(1.0/len(a))

def run_simulation(df: pd.DataFrame, take_profit: float, stop_loss=float):
    SLIPPAGE = 0.02 # bid-ask spread + commission

    results = {}
    
    positions = 0
    positions_entry_price = 0.0
    positions_entry_dateime = None
    trade_pnl = []
    trade_time = []
    
    for i, row in df.iterrows():
        if positions != 0:
            diff = (row['last'] - positions_entry_price) - SLIPPAGE
            diff_seconds = (row.name - positions_entry_dateime).seconds
            
            if diff >= take_profit:
                to_flat_qty = -positions
                positions += to_flat_qty
                trade_pnl.append(diff)
                trade_time.append(diff_seconds)
                
            if diff <= -stop_loss:
                to_flat_qty = -positions
                positions += to_flat_qty
                trade_pnl.append(diff)
                trade_time.append(diff_seconds)
                
        if row.buysell_macd > 0 and positions < 1:
            positions += 1
            positions_entry_price = row['last']
            positions_entry_dateime = row.name
        elif row.buysell_macd < 0 and positions > -1:
            positions -= 1
            positions_entry_price = row['last']
            positions_entry_dateime = row.name
        else:
            continue
            
    trade_pnl = np.array(trade_pnl)
    results['trade_pnl'] = trade_pnl
    results['trade_pnl_total'] = trade_pnl.sum()
    total_trades = trade_pnl.size
    winning_trades_percent = trade_pnl[np.where(trade_pnl > 0.0)].size / total_trades
    loosing_trades_percent = 1.0 - winning_trades_percent
    results['trades_winning_percent'] = winning_trades_percent
    results['trades_loosing_percent'] = loosing_trades_percent
    results['trades_pnl_median'] = np.median(trade_pnl)
    results['trades_pnl_mean'] = np.mean(trade_pnl)
    results['trades_pnl_std'] = np.std(trade_pnl)

    trade_time_mins = np.array(trade_time)/60.0
    results['trade_time_diff'] = trade_time_mins
    results['trade_time_geomean'] = geo_mean(trade_time_mins)
    return results

# run_simulation(
#     df=df, 
#     take_profit=0.33, #study.best_params['take_profit'], 
#     stop_loss=0.33, #study.best_params['stop_loss']
# )

In [77]:
def objective(trial):
    take_profit = trial.suggest_float('take_profit', 0.05, 0.6)
    stop_loss   = trial.suggest_float('stop_loss', 0.05, 0.6)
    results = run_simulation(df, take_profit=take_profit, stop_loss=stop_loss)
    return_value = (
        results['trade_pnl_total'],
        results['trades_pnl_std']
    )
    return return_value

optuna.logging.set_verbosity(optuna.logging.WARNING)
study = optuna.create_study(directions=['maximize','minimize'])
study.optimize(objective, n_trials=1000)

# study.best_trials

In [157]:
trial = study.best_trials[0]

results = run_simulation(
    df=df, 
    take_profit=trial.params['take_profit'], 
    stop_loss=trial.params['stop_loss']
)

print(sum(results['trade_pnl']))

8.34000000000002


In [158]:
from optuna.visualization import plot_parallel_coordinate

optuna.visualization.plot_pareto_front(study, target_names=["trades_pnl_total", "trades_pnl_std"])

In [80]:
optuna.visualization.plot_param_importances(
    study, target=lambda t: t.values[0], target_name="trades_pnl_total"
)

In [81]:
optuna.visualization.plot_param_importances(
    study, target=lambda t: t.values[1], target_name="trades_pnl_std"
)

In [159]:
rows = []

# print(study.best_trials[0])

for x in study.best_trials:
    row = []
    row.append(x.number)
    for _, v in x.params.items():
        row.append(v)
    for v in x.values:
        row.append(v)
    rows.append(row)

headers = ['number']
for k, _ in study.best_trials[0].params.items():
    headers.append(k)

for i, v in enumerate(study.best_trials[0].values):
    headers.append(f"value_{i}")

params_df = pd.DataFrame(rows, columns=headers)
# params_df['value_2'] = np.log(params_df['value_0']/2)
# params_df['value_3'] = params_df['value_1'] / params_df['value_2'] 
params_df.sort_values(['number'], ascending=False)[0:50]
params_df['value_loss_to_profit'] = params_df['stop_loss'] / params_df['take_profit']
# params_df.head(20).value_loss_to_profit.hist(bins=20)

params_df.head(20)

Unnamed: 0,number,take_profit,stop_loss,value_0,value_1,value_loss_to_profit
0,54,0.304391,0.530411,8.34,0.550256,1.742535
1,81,0.252802,0.534918,7.39,0.543838,2.115957
2,140,0.155326,0.43825,5.56,0.493331,2.821478
3,152,0.155326,0.412187,5.06,0.486972,2.653688
4,162,0.419386,0.412546,9.84,0.592679,0.983689
5,170,0.10032,0.222811,2.13,0.414335,2.221012
6,179,0.304662,0.536906,8.34,0.550256,1.762301
7,194,0.252802,0.534918,7.39,0.543838,2.115957
8,196,0.304662,0.534918,8.34,0.550256,1.755775
9,209,0.252802,0.542321,7.39,0.543838,2.145242


In [160]:
run_simulation(
    df=df, 
    take_profit=0.30,
    stop_loss=0.50,
)

{'trade_pnl': array([ 0.81,  0.54,  0.75,  0.66,  0.41, -0.94,  0.32,  0.5 ,  0.43,
        -1.18,  0.49,  0.9 ,  0.6 , -0.7 , -0.58,  0.44,  0.36,  0.32,
        -0.74,  0.43,  0.82,  0.59, -0.76,  0.48,  0.31, -0.7 ,  0.32,
         0.4 ,  0.42, -0.51,  0.62, -0.56,  0.38, -0.52,  0.34,  0.3 ,
         0.31,  0.6 ,  0.52,  0.51, -0.55, -0.84,  0.42, -0.86, -0.73,
         0.53,  0.39,  0.36, -0.57, -0.63,  0.46,  0.36,  0.33,  0.54,
         0.44,  0.34,  0.33,  0.41,  0.35, -0.53,  0.33, -0.61,  0.39,
        -0.88, -0.52,  0.31, -0.59,  0.34, -0.56,  0.41,  0.45]),
 'trade_pnl_total': 7.310000000000006,
 'trades_winning_percent': 0.6901408450704225,
 'trades_loosing_percent': 0.3098591549295775,
 'trades_pnl_median': 0.35000000000000453,
 'trades_pnl_mean': 0.10295774647887332,
 'trades_pnl_std': 0.5487794250603515,
 'trade_time_diff': array([ 35.05      , 287.33333333, 119.        , 515.25      ,
        512.3       ,  49.33333333, 159.56666667,  49.66666667,
          8.3       ,