In [None]:
# cryptoデータの前処理をする
import datetime

import polars as pl
import matplotlib.pyplot as plt
import talib
import joblib
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import cross_val_score, KFold, TimeSeriesSplit

import stock

In [None]:
fetcher = stock.io.gmo.GMOFethcer()
df = fetcher.fetch_ohlc("BTC_JPY", datetime.timedelta(minutes=15))

In [None]:
df = df.with_columns(
    pl.when(pl.col("datetime") < datetime.datetime(2020, 8, 5, 6, 0, 0))
    .then(0.0)
    .when(pl.col("datetime") < datetime.datetime(2020, 9, 9, 6, 0, 0))
    .then(-0.00035)
    .when(pl.col("datetime") < datetime.datetime(2020, 11, 4, 6, 0, 0))
    .then(-0.00025)
    .otherwise(0.0).alias("maker_fee"),
)
df = stock.crypto.feature.calc_features(df)

In [None]:
pips = 0.0001
df = df.with_columns(
    ((pl.col("ATR") * 0.5 / pips).round().clip(lower_bound=1.0) * pips).alias("limit_price_dist"),
).with_columns(
    (pl.col("close") - pl.col("limit_price_dist")).alias("buy_price"),
    (pl.col("close") + pl.col("limit_price_dist")).alias("sell_price")
)

In [None]:
df

In [None]:
def calc_force_entry_price(entry_price: np.ndarray, low: np.ndarray, pips: float):
    fep = np.zeros(entry_price.shape)
    fet = np.zeros(entry_price.shape)
    fep[:] = np.nan

    is_executable = np.round(low[1:] / pips) < np.round(entry_price[:-1] / pips)

    start_i = 0
    for i in range(len(is_executable)):
        fet[start_i:i + 1] += 1
        if is_executable[i]:
            fep[start_i:i + 1] = entry_price[i]
            start_i = i + 1
    return fep, fet

In [None]:
df = df.filter(
    pl.col("datetime").is_between(datetime.datetime(2018, 9, 5), datetime.datetime(2021, 3, 31))
)

In [None]:
buy_price = df["buy_price"].to_numpy()
sell_price = df["sell_price"].to_numpy()
low = df["low"].to_numpy()
high = df["high"].to_numpy()

buy_fep, buy_fet = calc_force_entry_price(buy_price, low, pips)
sell_fep, sell_fet = calc_force_entry_price(-sell_price, -high, pips)

horizon = 1

df = df.with_columns(
    pl.Series("buy_fep", buy_fep),
    pl.Series("buy_fet", buy_fet),
    pl.Series("sell_fep", -sell_fep),
    pl.Series("sell_fet", sell_fet)
).with_columns(
    pl.when(pl.col("buy_price").is_not_nan()).then(((pl.col("buy_price")  / pips).round() > (pl.col("low").shift(-1) / pips).round())).otherwise(0).cast(pl.Float32).alias("buy_executed"),
    ((pl.col("sell_price")  / pips).round() < (pl.col("high").shift(-1) / pips).round()).cast(pl.Float32).alias("sell_executed"),
).with_columns(
    pl.when(pl.col("buy_executed") > 0.5).then(pl.col("sell_fep").shift(-1) / pl.col("buy_price") - 1 - 2 * pl.col("maker_fee")).otherwise(0).alias("y_buy"),
    pl.when(pl.col("sell_executed") > 0.5).then(-(pl.col("buy_fep").shift(-1) / pl.col("sell_price") - 1) - 2 * pl.col("maker_fee")).otherwise(0).alias("y_sell"),
    pl.when(pl.col("buy_executed") > 0.5).then(pl.col("buy_price") / pl.col("close") - 1 + pl.col("maker_fee")).alias("buy_cost"),
    pl.when(pl.col("sell_executed") > 0.5).then(-(pl.col("sell_price") / pl.col("close") - 1) + pl.col("maker_fee")).alias("sell_cost"),
)

In [None]:
df.select(
    pl.col("buy_executed").rolling_mean(1000),
    pl.col("sell_executed").rolling_mean(1000),
).plot()

In [None]:
df.select(
    pl.col("buy_fet"),
    pl.col("sell_fet")
).plot.hist()

In [None]:
df.select(
    pl.col("y_buy").cum_sum(),
    pl.col("y_sell").cum_sum()
).plot()

In [None]:
train_features = sorted([
    'ADX',
    'ADXR',
    'APO',
    'AROON_aroondown',
    'AROON_aroonup',
    'AROONOSC',
    'CCI',
    'DX',
    'MACD_macd',
    'MACD_macdsignal',
    'MACD_macdhist',
    'MFI',
#     'MINUS_DI',
#     'MINUS_DM',
    'MOM',
#     'PLUS_DI',
#     'PLUS_DM',
    'RSI',
    'STOCH_slowk',
    'STOCH_slowd',
    'STOCHF_fastk',
#     'STOCHRSI_fastd',
    'ULTOSC',
    'WILLR',
#     'ADOSC',
#     'NATR',
    'HT_DCPERIOD',
    'HT_DCPHASE',
    'HT_PHASOR_inphase',
    'HT_PHASOR_quadrature',
    'HT_TRENDMODE',
    'BETA',
    'LINEARREG',
    'LINEARREG_ANGLE',
    'LINEARREG_INTERCEPT',
    'LINEARREG_SLOPE',
    'STDDEV',
    'BBANDS_upperband',
    'BBANDS_middleband',
    'BBANDS_lowerband',
    'DEMA',
    'EMA',
    'HT_TRENDLINE',
    'KAMA',
    'MA',
    'MIDPOINT',
    'T3',
    'TEMA',
    'TRIMA',
    'WMA',
])


In [None]:
train_df["y_buy"].to_numpy()[cv_indices[0][0]]

In [None]:
train_df = df.filter(
   pl.all_horizontal(pl.col(pl.Float32, pl.Float64).is_not_nan())
)

# model = lgb.LGBMRegressor(n_jobs=1, random_state=1)
# model.fit(train_df.select(train_features), df["y_buy"])
# joblib.dump(model, "model_y_buy.xz", compress=True)
# model.fit(train_df.select(train_features), df["y_sell"])
# joblib.dump(model, "model_y_sell.xz", compress=True)

cv_indices = list(KFold().split(train_df))

def cross_val_predict(x, y, cv):
   estimator = lgb.LGBMRegressor(n_jobs=1, random_state=1)
   y_pred = y.copy()
   y_pred[:] = np.nan
   for train_idx, val_idx in cv:
      estimator.fit(x[train_idx], y[train_idx])
      y_pred[val_idx] = estimator.predict(x[val_idx])
   return y_pred

train_df = train_df.with_columns(
    pl.Series("y_pred_buy", cross_val_predict(train_df.select(train_features).to_numpy(), train_df["y_buy"].to_numpy(), cv=cv_indices)),
    pl.Series("y_pred_sell", cross_val_predict(train_df.select(train_features).to_numpy(), train_df["y_sell"].to_numpy(), cv=cv_indices))
)
train_df = train_df.filter(
   pl.all_horizontal(pl.col(pl.Float32, pl.Float64).is_not_nan())
)
train_df.select(
   pl.when(pl.col("y_pred_buy") > 0).then(pl.col("y_buy")).otherwise(0.0).cumsum().alias("buy"),
   pl.when(pl.col("y_pred_sell") > 0).then(pl.col("y_sell")).otherwise(0.0).cumsum().alias("sell"),
).with_columns(
   (pl.col("buy") + pl.col("sell")).alias("total")
).plot()
