In [None]:
from pathlib import Path
import datetime

import talib
import matplotlib.pyplot as plt
import polars as pl
import numpy as np
import lightgbm as lgb
from sklearn import svm, neural_network

import crypto
import data_fetcher

In [None]:
# ベースの戦略

def ma_strech(df, window_size=10, deviation_rate=0.001, suffix="_mas"):
    col_key = f"MA_{window_size}"
    df = df.with_columns(
        pl.col("close").rolling_mean(window_size=window_size).alias(col_key)
    ).with_columns(
        ((pl.col("close") - pl.col(col_key)) / pl.col("close") > deviation_rate).alias(f"sell_point{suffix}"),
        ((pl.col(col_key) - pl.col("close")) / pl.col("close") > deviation_rate).alias(f"buy_point{suffix}"),
        pl.col("close").alias(f"target_price{suffix}"),
    ).with_columns(
        ((pl.col(f"target_price{suffix}") > pl.col("low").shift(-1)) & pl.col(f"buy_point{suffix}")).alias(f"buy_executed{suffix}"),
        ((pl.col(f"target_price{suffix}") < pl.col("high").shift(-1)) & pl.col(f"sell_point{suffix}")).alias(f"sell_executed{suffix}")
    ) 
    return df


def breakout(df, window_size=30, suffix="_bo"):
    df = df.with_columns(
        ((pl.col("close").rolling_max(window_size=window_size) - pl.col("close")) < 1e-5).alias(f"buy_point{suffix}"),
        ((pl.col("close") - pl.col("close").rolling_min(window_size=window_size)) < 1e-5).alias(f"sell_point{suffix}"),
        pl.col("close").alias(f"target_price{suffix}"),
    ).with_columns(
        ((pl.col(f"target_price{suffix}") > pl.col("low").shift(-1)) & pl.col(f"buy_point{suffix}")).alias(f"buy_executed{suffix}"),
        ((pl.col(f"target_price{suffix}") < pl.col("high").shift(-1)) & pl.col(f"sell_point{suffix}")).alias(f"sell_executed{suffix}")
    )
    return df    

In [None]:
def get_df(symbol, start_date, end_date, interval):
    fetcher = data_fetcher.gmo.GMOFethcer()
    df = fetcher.fetch_ohlc(
        symbol, interval=datetime.timedelta(minutes=interval), start_date=start_date, end_date=end_date
    )
    df = crypto.features.calc_features(df)
    df = df.filter(
        pl.all_horizontal(pl.col(pl.Float32, pl.Float64).is_not_nan())
    )
    return df

def calc_profits(df, suffix):
    df = df.with_columns(
        pl.Series(
            crypto.simulate.simulate_long_trade(
                df, f"sell_executed{suffix}", f"buy_executed{suffix}", f"target_price{suffix}", f"target_price{suffix}", wall_timestep=100
            )
        ).alias(f"profits{suffix}")
    )    
    return df

In [None]:
symbol = "BTC_JPY"
interval = 1
feat_keys = [
    "BBANDS_upperband", "BBANDS_middleband", "BBANDS_lowerband",
    "DEMA", "EMA", "HT_TRENDLINE", 
    "KAMA", "MA", "MIDPOINT", "SMA", "T3",
    "TEMA", "TRIMA","WMA",
    "LINEARREG", "LINEARREG_INTERCEPT",
    "AD", "ADOSC",
    "APO",
    "HT_PHASOR_inphase","HT_PHASOR_quadrature",
    "LINEARREG_SLOPE",
    "MACD_macd","MACD_macdsignal", "MACD_macdhist",
    "MINUS_DM", "MOM", "OBV", "PLUS_DM", "STDDEV", "TRANGE",
    "ADX", "ADXR",
    "AROON_aroondown", "AROON_aroonup", "AROONOSC",
    "BOP", "CCI", "DX", "MFI",
    "MINUS_DI", "PLUS_DI",
    "RSI",
    "STOCH_slowk", "STOCH_slowd", "STOCHF_fastk", "STOCHF_fastd",
    "STOCHRSI_fastk", "STOCHRSI_fastd",
    "TRIX", "ULTOSC", "WILLR", "ATR", "NATR",
    "HT_DCPERIOD", "HT_DCPHASE", "HT_SINE_sine", "HT_SINE_leadsine", "HT_TRENDMODE",
    "BETA", "CORREL", "LINEARREG_ANGLE"
]
suffix = "_mas"
#base_alg_func = lambda x, suffix : breakout(x, window_size=10, suffix=suffix)
base_alg_func = lambda x, suffix : ma_strech(x, window_size=10, deviation_rate=0.0005, suffix=suffix)


In [None]:
def train(symbol, start_date, end_date, interval, feat_keys, base_alg_func):
    df = get_df(symbol=symbol, start_date=start_date, end_date=end_date, interval=interval)
    df = base_alg_func(df, suffix=suffix)
    df = calc_profits(df, suffix)
    train_df = df.filter(pl.col(f"buy_executed{suffix}"))
    train_x, train_y = train_df.select(*feat_keys).to_numpy(), train_df.select(f"profits{suffix}").to_numpy()

    regr = lgb.LGBMRegressor()
    regr.fit(train_x, train_y)
    # regr = svm.SVR()
    # regr.fit(train_x, train_y)
    # regr = neural_network.MLPRegressor(hidden_layer_sizes=(100,), solver="adam", max_iter=10000, batch_size=1000)
    # regr.fit(train_x, train_y * 100000)
    return regr

In [None]:
def test(regressor, start_date, end_date, interval, feat_keys, base_alg_func):
    test_df = get_df(symbol=symbol, start_date=start_date, end_date=end_date, interval=interval)
    test_df = base_alg_func(test_df, suffix=suffix)
    test_df = calc_profits(test_df, suffix)
    test_df = test_df.filter(pl.col(f"buy_executed{suffix}"))
    test_x, test_y = test_df.select(*feat_keys).to_numpy(), test_df.select(f"profits{suffix}").to_numpy()

    pred_test_y = regressor.predict(test_x)

    test_y_selected = np.where(pred_test_y > 0, test_y[..., 0], 0.0)
    plt.plot(test_y.cumsum(), label="bf")
    plt.plot(test_df["close"] / test_df["close"][0] - 1.0, label="close")    
    plt.plot(test_y_selected.cumsum(), label="pred")
    plt.legend()
    plt.grid()
    plt.show()

    return test_x, test_y, pred_test_y

In [None]:
train_start_date = datetime.datetime(2024, 1, 1)
while train_start_date < datetime.datetime.now() - datetime.timedelta(days=120):
    train_start_date += datetime.timedelta(days=30)
    train_end_date = train_start_date + datetime.timedelta(days=60)
    test_start_date = train_end_date + datetime.timedelta(days=1)
    test_end_date = test_start_date + datetime.timedelta(days=60)

    regr = train(
        symbol, 
        start_date=train_start_date, end_date=train_end_date, interval=interval, 
        feat_keys=feat_keys, base_alg_func=base_alg_func
    )

    test_x, test_y, pred_y = test(
        regr, start_date=test_start_date, end_date=test_end_date, interval=interval, 
        feat_keys=feat_keys, base_alg_func=base_alg_func
    )

In [None]:
pr_lists = [
    'CDL2CROWS',
    'CDL3BLACKCROWS',
    'CDL3INSIDE',
    'CDL3LINESTRIKE',
    'CDL3OUTSIDE',
    'CDL3STARSINSOUTH',
    'CDL3WHITESOLDIERS',
    'CDLABANDONEDBABY',
    'CDLADVANCEBLOCK',
    'CDLBELTHOLD',
    'CDLBREAKAWAY',
    'CDLCLOSINGMARUBOZU',
    'CDLCONCEALBABYSWALL',
    'CDLCOUNTERATTACK',
    'CDLDARKCLOUDCOVER',
    'CDLDOJI',
    'CDLDOJISTAR',
    'CDLDRAGONFLYDOJI',
    'CDLENGULFING',
    'CDLEVENINGDOJISTAR',
    'CDLEVENINGSTAR',
    'CDLGAPSIDESIDEWHITE',
    'CDLGRAVESTONEDOJI',
    'CDLHAMMER',
    'CDLHANGINGMAN',
    'CDLHARAMI',
    'CDLHARAMICROSS',
    'CDLHIGHWAVE',
    'CDLHIKKAKE',
    'CDLHIKKAKEMOD',
    'CDLHOMINGPIGEON',
    'CDLIDENTICAL3CROWS',
    'CDLINNECK',
    'CDLINVERTEDHAMMER',
    'CDLKICKING',
    'CDLKICKINGBYLENGTH',
    'CDLLADDERBOTTOM',
    'CDLLONGLEGGEDDOJI',
    'CDLLONGLINE',
    'CDLMARUBOZU',
    'CDLMATCHINGLOW',
    'CDLMATHOLD',
    'CDLMORNINGDOJISTAR',
    'CDLMORNINGSTAR',
    'CDLONNECK',
    'CDLPIERCING',
    'CDLRICKSHAWMAN',
    'CDLRISEFALL3METHODS',
    'CDLSEPARATINGLINES',
    'CDLSHOOTINGSTAR',
    'CDLSHORTLINE',
    'CDLSPINNINGTOP',
    'CDLSTALLEDPATTERN',
    'CDLSTICKSANDWICH',
    'CDLTAKURI',
    'CDLTASUKIGAP',
    'CDLTHRUSTING',
    'CDLTRISTAR',
    'CDLUNIQUE3RIVER',
    'CDLUPSIDEGAP2CROWS',
    'CDLXSIDEGAP3METHODS'
]