In [None]:
import warnings
warnings.filterwarnings('ignore')

import logging, joblib
logging.getLogger('lightgbm').setLevel(logging.WARNING)
import optuna
optuna.logging.set_verbosity(optuna.logging.WARNING)

import pandas as pd
import numpy as np
from statsmodels.tsa.seasonal import STL
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from sklearn.model_selection import TimeSeriesSplit
from tqdm import tqdm
import matplotlib.pyplot as plt
import os, contextlib, sys

# Paths ke CSV split
TRAIN_PATH = '/kaggle/input/data-btc/fix2/train.csv'
VAL_PATH   = '/kaggle/input/data-btc/fix2/val.csv'
TEST_PATH  = '/kaggle/input/data-btc/fix2/test.csv'

# 1) Load splits
df_tr = pd.read_csv(TRAIN_PATH, parse_dates=['timestamp']).drop(columns=['timestamp','close_time'])
df_va = pd.read_csv(VAL_PATH,   parse_dates=['timestamp']).drop(columns=['timestamp','close_time'])
df_te = pd.read_csv(TEST_PATH,  parse_dates=['timestamp']).drop(columns=['timestamp','close_time'])

# 2) Constants
horizons = {'1h':1,'2h':2,'3h':3,'6h':6,'12h':12,'1d':24,'3d':72,'7d':168,'15d':360,'30d':720}
PERIOD   = 24
FEATURES = ['close','trend','season']

# 3) Helpers
def prepare(df, h):
    df2 = df.copy()
    df2['label'] = df2['close'].shift(-h)
    df2.dropna(subset=['label'], inplace=True)
    df2.reset_index(drop=True, inplace=True)
    return df2

def fit_stl_quiet(series, period):
    with open(os.devnull, 'w') as dn, contextlib.redirect_stderr(dn):
        res = STL(series, period=period, robust=True).fit()
    return res

def walk_forward(history_df, test_df, model_params, use_tune, label):
    history_close = history_df['close'].to_numpy()
    history_label = history_df['label'].to_numpy()
    preds, trues = [], []
    pts = min(10, len(test_df))
    for i in range(pts):
        tc = test_df['close'].iloc[i]
        full_close = np.append(history_close, tc)
        stl = fit_stl_quiet(full_close, PERIOD)
        trend, season = stl.trend, stl.seasonal
        n_hist = len(history_close)
        df_re = pd.DataFrame({
            'close':  full_close[:n_hist],
            'trend':  trend[:n_hist],
            'season': season[:n_hist],
            'label':  history_label
        })
        df_re['residual'] = df_re['label'] - (df_re['trend'] + df_re['season'])
        Xr, yr = df_re[FEATURES], df_re['residual'].values

        if use_tune:
            mdl = LGBMRegressor(random_state=42, **model_params)
        else:
            mdl = LGBMRegressor(random_state=42)
        mdl.fit(Xr, yr)

        feat = pd.DataFrame({
            'close':  [tc],
            'trend':  [trend[n_hist]],
            'season': [season[n_hist]]
        })
        rp = mdl.predict(feat)[0]
        fc = trend[n_hist] + season[n_hist] + rp

        preds.append(fc)
        trues.append(test_df['label'].iloc[i])

        history_close = np.append(history_close, tc)
        history_label = np.append(history_label, trues[-1])

    rmse = mean_squared_error(trues, preds, squared=False)
    mape = mean_absolute_percentage_error(trues, preds)*100

    plt.figure(figsize=(9,4))
    plt.plot(trues, label='Actual')
    plt.plot(preds, '--', label='Predicted')
    plt.title(f'{label}: Actual vs Pred (10)')
    plt.legend(); plt.tight_layout(); plt.show()

    print(f'{label}: RMSE={rmse:.2f}, MAPE={mape:.2f}%')
    return rmse, mape

def tune_lgbm(X, y):
    def objective(trial):
        params = {
            'n_estimators': trial.suggest_int('n_estimators',100,500),
            'num_leaves':    trial.suggest_int('num_leaves',16,64),
            'max_depth':     trial.suggest_int('max_depth',3,10),
            'learning_rate': trial.suggest_float('learning_rate',1e-3,1e-1,log=True),
            'feature_fraction': trial.suggest_float('feature_fraction',0.6,1.0),
            'bagging_fraction': trial.suggest_float('bagging_fraction',0.6,1.0),
            'min_child_samples': trial.suggest_int('min_child_samples',5,30),
            'verbose': -1, 'device':'gpu'
        }
        ms = []
        tss = TimeSeriesSplit(n_splits=3)
        for ti, va in tss.split(X):
            m = LGBMRegressor(random_state=42, **params)
            m.fit(X.iloc[ti], y[ti])
            ms.append(mean_squared_error(y[va], m.predict(X.iloc[va])))
        return np.mean(ms)
    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=20, show_progress_bar=False)
    return study.best_trial.params

===== FASE 1: TRAIN → VAL (no tuning) =====
print("===== FASE 1: TRAIN → VAL =====")
for label, h in tqdm(horizons.items(), desc='Phase1'):
    df_tr_p = prepare(df_tr, h)
    df_va_p = prepare(df_va, h)
    stl_tr = fit_stl_quiet(df_tr_p['close'], PERIOD)
    df_tr_p['trend'], df_tr_p['season'] = stl_tr.trend, stl_tr.seasonal
    df_tr_p['residual'] = df_tr_p['label'] - (df_tr_p['trend']+df_tr_p['season'])
    print(f'  [Eval] {label} on VAL:')
    walk_forward(df_tr_p, df_va_p, model_params=None, use_tune=False, label=label)

# ===== FASE 2: (TRAIN+VAL) → TEST (with tuning & save) =====
print("\n===== FASE 2: (TRAIN+VAL) → TEST =====")
for label, h in tqdm(horizons.items(), desc='Phase2'):
    # prepare
    df_hist_p = prepare(pd.concat([df_tr, df_va], ignore_index=True), h)
    df_te_p   = prepare(df_te, h)

    stl_hist = fit_stl_quiet(df_hist_p['close'], PERIOD)
    df_hist_p['trend'], df_hist_p['season'] = stl_hist.trend, stl_hist.seasonal
    df_hist_p['residual'] = df_hist_p['label'] - (df_hist_p['trend']+df_hist_p['season'])
    Xhv, yhv = df_hist_p[FEATURES], df_hist_p['residual'].values

    # tuning
    print(f'  [Tune] {label}')
    best_params = tune_lgbm(Xhv, yhv)
    print(f'    best_lgbm = {best_params}')

    # final train & save
    final_model = LGBMRegressor(random_state=42, **best_params)
    final_model.fit(Xhv, yhv)
    save_path = f'/kaggle/working/LGBM_{label}.pkl'
    joblib.dump(final_model, save_path)
    print(f'    Saved model to {save_path}')

    # evaluation
    print(f'  [Eval] {label} on TEST:')
    # override walk_forward to use final_model instead of retrain per step
    preds, trues = [], []
    history_close = df_hist_p['close'].to_numpy()
    history_label = df_hist_p['label'].to_numpy()
    pts = min(10, len(df_te_p))
    for i in range(pts):
        tc = df_te_p['close'].iloc[i]
        full_close = np.append(history_close, tc)
        stl = fit_stl_quiet(full_close, PERIOD)
        trend, season = stl.trend, stl.seasonal
        feat = pd.DataFrame({
            'close':  [tc],
            'trend':  [trend[len(history_close)]],
            'season': [season[len(history_close)]]
        })
        rp = final_model.predict(feat)[0]
        fc = trend[len(history_close)] + season[len(history_close)] + rp
        preds.append(fc)
        trues.append(df_te_p['label'].iloc[i])
        history_close = np.append(history_close, tc)
        history_label = np.append(history_label, trues[-1])

    rmse = mean_squared_error(trues, preds, squared=False)
    mape = mean_absolute_percentage_error(trues, preds)*100

    plt.figure(figsize=(9,4))
    plt.plot(trues, label='Actual')
    plt.plot(preds, '--', label='Predicted')
    plt.title(f'{label}: Actual vs Pred (10)')
    plt.legend(); plt.tight_layout(); plt.show()

    print(f'{label}: RMSE={rmse:.2f}, MAPE={mape:.2f}%\n')
