既存の特徴量を使って売買タイミングを図る（ベースのアルゴリズムをつくる）のではなく、  
機械学習アルゴリズムで売買タイミングの判定まで一気に行う手法を検討する。

In [None]:
from pathlib import Path
import datetime

import talib
import matplotlib.pyplot as plt
import polars as pl
import numpy as np
import lightgbm as lgb
from sklearn import svm, neural_network

import crypto
import data_fetcher

In [None]:
def get_df(symbol, start_date, end_date, interval):
    fetcher = data_fetcher.gmo.GMOFethcer()
    df = fetcher.fetch_ohlc(
        symbol, interval=datetime.timedelta(minutes=interval), start_date=start_date, end_date=end_date
    )
    df = crypto.features.calc_features(df)
    df = df.filter(
        pl.all_horizontal(pl.col(pl.Float32, pl.Float64).is_not_nan())
    )
    return df

def calc_profits(df, suffix):
    df = df.with_columns(
        pl.Series(
            crypto.simulate.simulate_long_trade(
                df, f"sell_executed{suffix}", f"buy_executed{suffix}", f"target_price{suffix}", f"target_price{suffix}", wall_timestep=100
            )
        ).alias(f"profits{suffix}")
    )    
    return df

In [None]:
symbol = "BTC_JPY"
interval = 1
start_date = datetime.datetime(2024, 1, 1)
end_date = start_date + datetime.timedelta(days=60)
df = get_df(symbol, start_date=start_date, end_date=end_date, interval=interval)

In [None]:
# 値動きをスコア化
# 現在の価格が最大保有期間の値幅の中でどの位置にいるかをスコア化
# （高値付近 -> -1.0、安値付近 -> 1.0になるように）
window_size = 60  # 最大保有期間
pl.Config.set_tbl_rows(100)
df = df.with_columns(
    pl.col("close").rolling_max(window_size=window_size).shift(-window_size + 1).alias("rolling_max"),
    pl.col("close").rolling_min(window_size=window_size).shift(-window_size + 1).alias("rolling_min"),
).with_columns(
    (1.0 - (pl.col("close") - pl.col("rolling_min")) / (pl.col("rolling_max") - pl.col("rolling_min")) * 2.0).alias("score")
)

In [None]:
start_idx = 100
end_idx = 200
plt.plot(df["close"][start_idx:end_idx] / df["close"][0] - 1)
plt.plot(df["rolling_max"][start_idx:end_idx] / df["close"][0] - 1)
plt.plot(df["rolling_min"][start_idx:end_idx] / df["close"][0] - 1)
plt.plot(df["score"][start_idx:end_idx] * 0.01)

In [None]:
# 機械学習に使う特徴量を選択

In [None]:
start_idx = 100
end_idx = 200

plt.plot(df["close"][start_idx:end_idx] / df["close"][0] - 1.0)
# plt.plot(df["DEMA"][start_idx:end_idx], label="dema")
# plt.plot(df["HT_TRENDLINE"][start_idx:end_idx], label="ht")
# plt.plot(df["KAMA"][start_idx:end_idx], label="kama")
# plt.plot(df["MA"][start_idx:end_idx], label="ma")
# plt.plot(df["MIDPOINT"][start_idx:end_idx], label="mid")
# plt.plot(df["SMA"][start_idx:end_idx], label="sma")
# plt.plot(df["T3"][start_idx:end_idx], label="T3")
#plt.plot((df["ADOSC"] * df["close"])[start_idx:end_idx] * 0.01, label="ad")
labels = [
    # DEMA
    # "BBANDS_upperband",
    # "BBANDS_lowerband",
    # "DEMA",
    # "LINEARREG",
    # "LINEARREG_SLOPE",
    # "LINEARREG_INTERCEPT",
    # "ADOSC"
    # "APO",
    # "MACD_macd",
    # "MACD_macdhist",
    # "MINUS_DM",
    # "PLUS_DM",
    #"ADX",
    # "MOM",
    # "OBV",
    # "AROOONOSC"
]
for label in labels:
    plt.plot(df[label][start_idx:end_idx], label=label)

plt.legend()
plt.grid()

In [None]:
# res = talib.STDDEV(df["close"])
# res1 = talib.OBV(df["close"], df["volume"])
#res1, res2 = talib.AROON(high=df["high"], low=df["low"], timeperiod=14)
res = talib.BOP(open=df["open"], high=df["high"], low=df["low"], close=df["close"])

# start_idx = 0
# end_idx = 200 #len(df)
#plt.plot(df["close"][start_idx:end_idx], label="close")
plt.plot(res[start_idx:end_idx] * 100, label="feat")
# plt.plot(res1[start_idx:end_idx], label="feat1")
# plt.plot(res2[start_idx:end_idx], label="feat2")
plt.legend()
plt.grid()