In [None]:
from pathlib import Path
import datetime
import pickle

import lightgbm as lgb
import numpy as np
import polars as pl
import matplotlib.pyplot as plt
import talib

import data_fetcher
import stock

In [None]:
symbol = "BTC_JPY"
interval_minutes = 1
train_ratio = 0.7
pips = 0.01

max_loss_rate = 0.1
max_hold_timestep = 10

fetcher = data_fetcher.gmo.GMOFethcer()
all_df = fetcher.fetch_ohlc(symbol, interval=datetime.timedelta(minutes=interval_minutes))

In [None]:
all_df = stock.crypto.feature.calc_features(all_df).filter(
    pl.all_horizontal(pl.col(pl.Float32, pl.Float64).is_not_nan())
)

train_num = (int)(len(all_df) * train_ratio)
train_df = all_df[:train_num]
test_df = all_df[train_num:]

In [None]:
def calc_profit_long(buy_executed, sell_executed, buy_price, sell_price, losscut, low, close, wall_time):
    num_data = len(buy_executed)
    profits = np.zeros(num_data)
    profit = 0
    for i in range(num_data - 1, -1, -1):
        if not buy_executed[i]:
            continue

        bp = buy_price[i]
        lp = losscut[i]
        sp = close[min(i + wall_time, num_data - 1)]
        for j in range(i + 1, min(i + wall_time + 1, num_data)):
            if low[j] < lp:
                sp = lp
                break
            if sell_executed[j]:
                sp = sell_price[j]
                break
        profit = (sp / bp) - 1.0
        profits[i] = profit
    return profits

def calc_profit_short(buy_executed, sell_executed, buy_price, sell_price, losscut, high, close, wall_time):
    num_data = len(buy_executed)
    profits = np.zeros(num_data)
    profit = 0
    for i in range(num_data - 1, -1, -1):
        if not sell_executed[i]:
            continue

        sp = sell_price[i]
        bp = close[min(i + wall_time, num_data - 1)]
        lp = losscut[i]
        for j in range(i + 1, min(i + wall_time + 1, num_data)):
            if lp < high[j]:
                bp = lp
                break
            if buy_executed[j]:
                bp = buy_price[j]
                break
        profit = (sp / bp) - 1.0
        profits[i] = profit
    return profits

def simulate_trade(
        df: pl.DataFrame, 
        buy_price_key: str, 
        sell_price_key: str, 
        buy_losscut_key: str,
        sell_losscut_key: str,
        wall_time: int = 10,
        pips: float = 1.0
):
    """約定シミュレーション
    """
    df = (
        df
        #.lazy()
        .with_columns(  # 売買価格をマーケットの刻み幅に丸める
            ((pl.col(buy_price_key) / pips).round() * pips).alias(buy_price_key),
            ((pl.col(sell_price_key) / pips).round() * pips).alias(sell_price_key),
            ((pl.col(buy_losscut_key) / pips).round() * pips).alias(buy_losscut_key),
            ((pl.col(sell_losscut_key) / pips).round() * pips).alias(sell_losscut_key),
        )
        .with_columns(  # 売買実行タイミング・成立価格を計算
            (pl.col(buy_price_key) > pl.col("low")).alias("buy_executed"),
            (pl.col(sell_price_key) < pl.col("high")).alias("sell_executed"),
            pl.when(pl.col(buy_price_key) > pl.col("low")).then(pl.col(buy_price_key)).otherwise(None).fill_null(strategy="backward").alias("buy_executed_price"),
            pl.when(pl.col(sell_price_key) < pl.col("high")).then(pl.col(sell_price_key)).otherwise(None).fill_null(strategy="backward").alias("sell_executed_price"),
        )
        # .with_columns(  # 利益を計算
        #     ((pl.col("sell_executed_price").shift() - pl.col("buy_executed_price")) / pl.col("buy_executed_price")).alias("buy_profit"),
        #     ((pl.col("buy_executed_price").shift() - pl.col("sell_executed_price")) / pl.col("sell_executed_price")).alias("sell_profit"),
        # )
        #.collect()
    ) 
    # 利益を計算
    return df

In [None]:
def preprocess(df: pl.DataFrame):
    max_loss_rate = 0.08
    df = df.with_columns(
        (pl.col("close") - pl.col("ATR") * 0.8).shift().alias("buy_target_price"),
        (pl.col("close") + pl.col("ATR") * 0.8).shift().alias("sell_target_price")
    ).with_columns(
        (pl.col("buy_target_price") * (1.0 - max_loss_rate)).alias("buy_losscut"),
        (pl.col("sell_target_price") * (1.0 + max_loss_rate)).alias("sell_losscut")
    )
    df = simulate_trade(
        df, 
        buy_price_key="buy_target_price", 
        sell_price_key="sell_target_price",
        buy_losscut_key="buy_losscut",
        sell_losscut_key="sell_losscut",
    )
    buy_profits = calc_profit_long(
        buy_executed=df["buy_executed"],
        sell_executed=df["sell_executed"],
        buy_price=df["buy_executed_price"],
        sell_price=df["sell_executed_price"],
        losscut=df["buy_losscut"],
        low=df["low"],
        close=df["close"],
        wall_time=100
    )
    sell_profits = calc_profit_short(
        buy_executed=df["buy_executed"],
        sell_executed=df["sell_executed"],
        buy_price=df["buy_executed_price"],
        sell_price=df["sell_executed_price"],
        losscut=df["sell_losscut"],
        high=df["high"],
        close=df["close"],
        wall_time=100
    )
    df = df.with_columns(
        pl.Series("buy_profits", buy_profits),
        pl.Series("sell_profits", sell_profits)
    )
    return df

In [None]:
train_df = preprocess(train_df)
test_df = preprocess(test_df)

In [None]:
train_df["sell_executed"].sum() / len(train_df), train_df["buy_executed"].sum() / len(train_df)

In [None]:
# 全部売買機会で注文
plt.plot(np.cumsum(train_df["buy_profits"].to_numpy()), label="buy")
plt.plot(np.cumsum(train_df["sell_profits"].to_numpy()), label="sell")
plt.legend()
plt.plot(train_df["close"] / train_df["close"][0] - 1.0)

In [None]:
# 全部売買機会で注文
plt.plot(np.cumsum(test_df["buy_profits"].to_numpy()), label="buy")
plt.plot(np.cumsum(test_df["sell_profits"].to_numpy()), label="sell")
plt.legend()
plt.plot(test_df["close"] / test_df["close"][0] - 1.0)

In [None]:
train_features = [
    'ADX',
    'ADXR',
    'APO',
    'AROON_aroondown',
    'AROON_aroonup',
    'AROONOSC',
    'CCI',
    'DX',
    'MACD_macd',
    'MACD_macdsignal',
    'MACD_macdhist',
    'MFI',
#     'MINUS_DI',
#     'MINUS_DM',
    'MOM',
#     'PLUS_DI',
#     'PLUS_DM',
    'RSI',
    'STOCH_slowk',
    'STOCH_slowd',
    'STOCHF_fastk',
#     'STOCHRSI_fastd',
    'ULTOSC',
    'WILLR',
#     'ADOSC',
#     'NATR',
    'HT_DCPERIOD',
    'HT_DCPHASE',
    'HT_PHASOR_inphase',
    'HT_PHASOR_quadrature',
    'HT_TRENDMODE',
    'BETA',
    'LINEARREG',
    'LINEARREG_ANGLE',
    'LINEARREG_INTERCEPT',
    'LINEARREG_SLOPE',
    'STDDEV',
    'BBANDS_upperband',
    'BBANDS_middleband',
    'BBANDS_lowerband',
    'DEMA',
    'EMA',
    'HT_TRENDLINE',
    'KAMA',
    'MA',
    'MIDPOINT',
    'T3',
    'TEMA',
    'TRIMA',
    'WMA',
]

In [None]:
train_x = train_df.select(*train_features).to_numpy()
train_y = train_df["buy_profits"].to_numpy()

estimator = lgb.LGBMRegressor(n_jobs=1, random_state=1)
estimator.fit(train_x, train_y)

In [None]:
train_pred = estimator.predict(train_x)
trained_df = train_df.with_columns(
    pl.Series("train_pred", train_pred > 0)
).with_columns(
    pl.when(pl.col("train_pred")).then(pl.col("buy_profits")).otherwise(0).alias("train_pred_profits")
)
plt.plot(trained_df["train_pred_profits"].cumsum())

In [None]:
test_pred = estimator.predict(test_df.select(*train_features).to_numpy())
tested_df = test_df.with_columns(
    pl.Series("test_pred", test_pred > 0)
).with_columns(
    pl.when(pl.col("test_pred")).then(pl.col("buy_profits")).otherwise(0).alias("test_pred_profits")
)
plt.plot(tested_df["test_pred_profits"].cumsum())

In [None]:
# テストデータの収益推移
plt.plot(tested_df["test_pred_profits"].cum_sum(), label="profit")
plt.plot(tested_df["close"] / tested_df["close"][0] - 1.0, label="price")

In [None]:
# 一日あたりの利益率
tested_df["test_pred_profits"].sum() / (len(tested_df) / 1440)

In [None]:
output_dir = Path("/home/kitamura/work/stock/outputs/20241123_crypto_train")
output_dir.mkdir(exist_ok=True)
output_path = output_dir / "model_{}.pkl".format(datetime.datetime.now().isoformat())
with open(output_path, "wb") as f:
    pickle.dump(estimator, f)

print("save model to {}".format(output_path))