In [None]:
import pandas as pd
import os
import optuna
import numpy as np

In [None]:
folder = "/mnt/c/SierraChart/Data"
file = "lsc-01-volumebars-350.txt"

fn = os.path.join(folder, file)

In [None]:
df = pd.read_csv(fn)
df.tail().T

In [None]:
def clean_data(df):
    # fix colum names
    df.columns = [x.strip() for x in df.columns]
    
    # create dt column
    df['dt'] = df['Date'].str.cat(df.Time, sep=" ")
    df['dt'] = pd.to_datetime(df.dt).dt.tz_localize("US/Mountain")
    df.set_index("dt", inplace=True)
    df.sort_index(inplace=True)
    
    # print(df.columns)
    
    keep_cols = [
        # 'Date', 'Time', 
        'Open', 'High', 'Low', 'Last', 
        # 'Volume', 
        # '# of Trades',
        # 'OHLC Avg', 'HLC Avg', 'HL Avg', 'Bid Volume', 'Ask Volume', 
        'ADX',
        'Buy', 
        'Sell', 
        'MACD', 
        'MA of MACD', 
        'MACD Diff', 
        # 'Line', 
        'EOT', 
        # 'Null',
        # '0.89', 
        # '-0.89', 
        # 'TR',
        # 'Open.1', 
        # 'High.1', 
        # 'Low.1', 
        # 'Last.1', 
        # 'Open.2',
        # 'High.2', 
        # 'Low.2', 
        # 'Last.2', 
        # 'dt'
    ]
    
    float_columns = ['Open', 'High', 'Low', 'Last', ]
    for col in float_columns:
        df[col] = df[col].astype(float)
    
    return df[keep_cols]


def format_column_names(df):
    df.columns = [x.lower().replace(" ", "_") for x in df.columns]
    return df


def gen_features(df):
    import sys  
    sys.path.insert(0, '/home/weston/work/lsc/forecasters')
    from forecasters.features.macd import detect_crossovers_vline
    df['buysell_macd'] = detect_crossovers_vline(df.macd, y_value=0.0)
    return df

# extract data
df = pd.read_csv(fn)

# transform data
df = clean_data(df)
df = format_column_names(df)
df = gen_features(df)

In [None]:
df.tail()

In [None]:
def frange(start, stop, step=1):
    while start < stop:
        yield start
        start += step

def geo_mean(iterable):
    a = np.array(iterable)
    return a.prod()**(1.0/len(a))

def run_simulation(df: pd.DataFrame, take_profit: float, stop_loss=float):
    SLIPPAGE = 0.02 # bid-ask spread + commission

    results = {}
    
    positions = 0
    positions_entry_price = 0.0
    positions_entry_dateime = None
    trade_pnl = []
    trade_time = []
    
    for i, row in df.iterrows():
        if positions != 0:
            diff = (row['last'] - positions_entry_price) - SLIPPAGE
            diff_seconds = (row.name - positions_entry_dateime).seconds
            
            if diff >= take_profit:
                to_flat_qty = -positions
                positions += to_flat_qty
                trade_pnl.append(diff)
                trade_time.append(diff_seconds)
                
            if diff <= -stop_loss:
                to_flat_qty = -positions
                positions += to_flat_qty
                trade_pnl.append(diff)
                trade_time.append(diff_seconds)
                
        if row.buysell_macd > 0 and positions < 1:
            positions += 1
            positions_entry_price = row['last']
            positions_entry_dateime = row.name
        elif row.buysell_macd < 0 and positions > -1:
            positions -= 1
            positions_entry_price = row['last']
            positions_entry_dateime = row.name
        else:
            continue
            
    trade_pnl = np.array(trade_pnl)
    results['trade_pnl'] = trade_pnl
    results['trade_pnl_total'] = trade_pnl.sum()
    total_trades = trade_pnl.size
    winning_trades_percent = trade_pnl[np.where(trade_pnl > 0.0)].size / total_trades
    loosing_trades_percent = 1.0 - winning_trades_percent
    results['trades_winning_percent'] = winning_trades_percent
    results['trades_loosing_percent'] = loosing_trades_percent
    results['trades_pnl_median'] = np.median(trade_pnl)
    results['trades_pnl_mean'] = np.mean(trade_pnl)
    results['trades_pnl_std'] = np.std(trade_pnl)

    trade_time_mins = np.array(trade_time)/60.0
    results['trade_time_diff'] = trade_time_mins
    results['trade_time_geomean'] = geo_mean(trade_time_mins)
    return results

# run_simulation(
#     df=df, 
#     take_profit=0.33, #study.best_params['take_profit'], 
#     stop_loss=0.33, #study.best_params['stop_loss']
# )

In [None]:
def objective(trial):
    take_profit = trial.suggest_float('take_profit', 0.05, 0.6)
    stop_loss   = trial.suggest_float('stop_loss', 0.05, 0.6)
    results = run_simulation(df, take_profit=take_profit, stop_loss=stop_loss)
    return_value = (
        results['trade_pnl_total'],
        results['trades_pnl_std']
    )
    return return_value

optuna.logging.set_verbosity(optuna.logging.WARNING)
study = optuna.create_study(directions=['maximize','minimize'])
study.optimize(objective, n_trials=1000)

# study.best_trials

In [None]:
trial = study.best_trials[0]

results = run_simulation(
    df=df, 
    take_profit=trial.params['take_profit'], 
    stop_loss=trial.params['stop_loss']
)

print(sum(results['trade_pnl']))

In [None]:
from optuna.visualization import plot_parallel_coordinate

optuna.visualization.plot_pareto_front(study, target_names=["trades_pnl_total", "trades_pnl_std"])

In [None]:
optuna.visualization.plot_param_importances(
    study, target=lambda t: t.values[0], target_name="trades_pnl_total"
)

In [None]:
optuna.visualization.plot_param_importances(
    study, target=lambda t: t.values[1], target_name="trades_pnl_std"
)

In [None]:
rows = []

# print(study.best_trials[0])

for x in study.best_trials:
    row = []
    row.append(x.number)
    for _, v in x.params.items():
        row.append(v)
    for v in x.values:
        row.append(v)
    rows.append(row)

headers = ['number']
for k, _ in study.best_trials[0].params.items():
    headers.append(k)

for i, v in enumerate(study.best_trials[0].values):
    headers.append(f"value_{i}")

params_df = pd.DataFrame(rows, columns=headers)
# params_df['value_2'] = np.log(params_df['value_0']/2)
# params_df['value_3'] = params_df['value_1'] / params_df['value_2'] 
params_df.sort_values(['number'], ascending=False)[0:50]
params_df['value_loss_to_profit'] = params_df['stop_loss'] / params_df['take_profit']
# params_df.head(20).value_loss_to_profit.hist(bins=20)

params_df.head(20)

In [None]:
run_simulation(
    df=df, 
    take_profit=0.30,
    stop_loss=0.50,
)