# Research Notebook
Runs the same pipeline as `main.py` (load → features → labels → split → train → evaluate).

In [None]:
# Install requirements if needed (run in terminal, not required inside notebook in this demo)
# !pip install -r ../requirements.txt

import sys, os
# Make sure `src` is on path so we can import project modules
proj_root = os.path.abspath(os.path.join(os.getcwd()))
src_path = os.path.join(proj_root, 'src')
if src_path not in sys.path:
    sys.path.insert(0, src_path)

import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

random.seed(0)
np.random.seed(0)
%matplotlib inline

In [None]:
# Import project pipeline pieces
from config import Config
from src.data_loader import load_data
from src.features import engineer_features
from src.labeling import add_labels
from src.splitter import time_split, get_feature_target_matrices
from src.modeling import train_model, evaluate_on_test
from src.visualization_trades import plot_test_trades
from src.equity_curve import build_equity_curve, plot_equity_curve

print('imports OK')

In [None]:
# Load data (will generate a small synthetic CSV if missing)
cfg = Config()
df = load_data(cfg)
print(f'Loaded {len(df)} rows')
df.head()

In [None]:
# Feature engineering and labeling
df_feat = engineer_features(df)
df_lab = add_labels(df_feat, cfg.sl, cfg.tp, cfg.horizon)
print('Label distribution:')
print(df_lab['label'].value_counts())
df_lab.head()

In [None]:
# Split, create matrices, train and evaluate
df_train, df_val, df_test = time_split(df_lab, cfg.train_frac, cfg.val_frac)
print(f'Split sizes: train={len(df_train)}, val={len(df_val)}, test={len(df_test)}')
X_train, y_train = get_feature_target_matrices(df_train)
X_val, y_val = get_feature_target_matrices(df_val)
X_test, y_test = get_feature_target_matrices(df_test)

clf = train_model(X_train, y_train, X_val, y_val, cfg)
y_pred = evaluate_on_test(clf, X_test, y_test, cfg)

print('Done training and evaluation')

In [None]:
# Plot test trades (if any) and equity curve
plot_test_trades(df_test=df_test, y_test=y_test, y_pred=y_pred, n=8000, start_idx=None, title='TEST set trades (raw predictions)')

eq_df = build_equity_curve(df_test=df_test, y_test=y_test, y_pred=y_pred, start_capital=10000, risk_mode='percent', risk_value=0.01, R=cfg.tp/cfg.sl, cost_per_trade_R=0.02, ignore_true_zero=True)
plot_equity_curve(eq_df, title='Test Set Equity Curve')

In [None]:
# Marimo (optional)
try:
    import marimo as m
    if hasattr(m, "md"):
        m.md("**Marimo** loaded.")
except Exception:
    pass

## Controls
Adjust parameters and re-run the pipeline.

In [None]:
# Interactive runner: uses ipywidgets when available, falls back to manual call.
from IPython.display import display, clear_output

try:
    import marimo as m
    marimo_available = True
except Exception:
    m = None
    marimo_available = False

try:
    import ipywidgets as widgets
    have_widgets = True
except Exception:
    have_widgets = False


def run_pipeline_with_cfg(cfg):
    df = load_data(cfg)
    df_feat = engineer_features(df)
    df_lab = add_labels(df_feat, cfg.sl, cfg.tp, cfg.horizon)
    df_train, df_val, df_test = time_split(df_lab, cfg.train_frac, cfg.val_frac)
    X_train, y_train = get_feature_target_matrices(df_train)
    X_val, y_val = get_feature_target_matrices(df_val)
    X_test, y_test = get_feature_target_matrices(df_test)
    clf = train_model(X_train, y_train, X_val, y_val, cfg)
    y_pred = evaluate_on_test(clf, X_test, y_test, cfg)
    try:
        plot_test_trades(df_test=df_test, y_test=y_test, y_pred=y_pred, n=8000, start_idx=None, title='TEST set trades (raw predictions)')
    except Exception:
        print('Could not plot test trades in this environment.')
    try:
        eq_df = build_equity_curve(df_test=df_test, y_test=y_test, y_pred=y_pred, start_capital=10000, risk_mode='percent', risk_value=0.01, R=cfg.tp/cfg.sl, cost_per_trade_R=0.02, ignore_true_zero=True)
        plot_equity_curve(eq_df, title='Test Set Equity Curve')
    except Exception:
        print('Could not build/plot equity curve in this environment.')
    if marimo_available and hasattr(m, 'md'):
        m.md(f'**Ran pipeline** — SL={cfg.sl}, TP={cfg.tp}, horizon={cfg.horizon}, min_prob={cfg.min_prob_trade}')
    return clf, y_pred

if have_widgets:
    sl_w = widgets.FloatText(value=cfg.sl, description='SL')
    tp_w = widgets.FloatText(value=cfg.tp, description='TP')
    horizon_w = widgets.IntText(value=cfg.horizon, description='Horizon')
    min_prob_w = widgets.FloatSlider(value=cfg.min_prob_trade, min=0.0, max=1.0, step=0.01, description='min_prob')
    n_estim_w = widgets.IntSlider(value=cfg.n_estimators, min=10, max=1000, step=10, description='n_estim')
    max_depth_w = widgets.IntSlider(value=cfg.max_depth if cfg.max_depth else 10, min=1, max=50, step=1, description='max_depth')
    run_btn = widgets.Button(description='Run Pipeline')
    out = widgets.Output()
    def on_run(_):
        with out:
            clear_output()
            new_cfg = Config()
            new_cfg.sl = float(sl_w.value)
            new_cfg.tp = float(tp_w.value)
            new_cfg.horizon = int(horizon_w.value)
            new_cfg.min_prob_trade = float(min_prob_w.value)
            new_cfg.n_estimators = int(n_estim_w.value)
            new_cfg.max_depth = int(max_depth_w.value)
            run_pipeline_with_cfg(new_cfg)
    run_btn.on_click(on_run)
    display(widgets.VBox([widgets.HBox([sl_w, tp_w, horizon_w]), min_prob_w, widgets.HBox([n_estim_w, max_depth_w]), run_btn, out]))
else:
    print('ipywidgets not available. To get interactive controls install ipywidgets or call run_pipeline_with_cfg(cfg) manually.')