# Signal Research Template

Workflow: **Load data → Define signal → Backtest → Evaluate → Sweep → Track**

Edit the signal logic in the "Define Signal" cell and re-run from there.

In [6]:
import sys, os
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath('')), 'src'))
# If DataPortal is in the parent repo, add it too:
# sys.path.insert(0, '/Users/russellfloyd/Dropbox/NRT/nrt_dev/trend_crypto/src')

import polars as pl
from datetime import datetime, timezone

## 1. Load Data

Option A: From DuckDB via DataPortal  
Option B: From a parquet file  
Option C: Synthetic bars for prototyping

In [9]:
# --- Option A: DuckDB (uncomment) ---
# from common.config import DataConfig
# from data.portal import DataPortal
# data_cfg = DataConfig(
#     db_path="/Users/russellfloyd/Dropbox/NRT/nrt_dev/data/market.duckdb",
#     table="bars_1d_clean",
#     symbol="BTC-USD",
#     start=datetime(2015, 7, 20, tzinfo=timezone.utc),
#     end=datetime(2025, 11, 30, tzinfo=timezone.utc),
#     timeframe="1d",
# )
# portal = DataPortal(data_cfg, strict_validation=False)
# bars = portal.load_bars()

# --- Option B: Parquet ---
# bars = pl.read_parquet("path/to/bars.parquet")

# --- Option C: Synthetic (works without DuckDB) ---
from datetime import timedelta
import math

N = 2000
start = datetime(2018, 1, 1, tzinfo=timezone.utc)
price = 10000.0
rows = []
for i in range(N):
    ts = start + timedelta(days=i)
    # Trending + mean-reverting synthetic price
    trend = 0.0003 * i
    cycle = 500 * math.sin(2 * math.pi * i / 365)
    noise = 100 * math.sin(i * 0.7) * math.cos(i * 0.3)
    c = price + trend * price + cycle + noise
    o = c - 50 * math.sin(i * 0.5)
    rows.append({"ts": ts, "symbol": "BTC-USD", "open": o,
                 "high": max(o, c) + abs(noise) * 0.3,
                 "low": min(o, c) - abs(noise) * 0.3,
                 "close": c, "volume": 1e6 + i * 100})

bars = pl.DataFrame(rows)
print(f"Loaded {bars.height} bars: {bars['ts'].min()} to {bars['ts'].max()}")
bars.head(3)

Loaded 2000 bars: 2018-01-01 00:00:00+00:00 to 2023-06-23 00:00:00+00:00


ts,symbol,open,high,low,close,volume
"datetime[μs, UTC]",str,f64,f64,f64,f64,f64
2018-01-01 00:00:00 UTC,"""BTC-USD""",10000.0,10000.0,10000.0,10000.0,1000000.0
2018-01-02 00:00:00 UTC,"""BTC-USD""",10049.179868,10091.614484,10030.716528,10073.151144,1000100.0
2018-01-03 00:00:00 UTC,"""BTC-USD""",10062.469932,10128.943284,10038.07013,10104.543482,1000200.0


## 2. Define Signal

This is the cell you edit and re-run. Build any signal you want using Polars expressions on `bars`.

In [11]:
# --- Example: MA spread signal ---
FAST, SLOW = 5, 40

signal_df = bars.with_columns([
    pl.col("close").rolling_mean(FAST).alias("ma_fast"),
    pl.col("close").rolling_mean(SLOW).alias("ma_slow"),
]).with_columns(
    (pl.col("ma_fast") - pl.col("ma_slow")).alias("signal_raw"),
).with_columns(
    # Normalize: signal > 0 means long
    pl.when(pl.col("signal_raw") > 0).then(1.0).otherwise(0.0).alias("signal"),
    # Forward return for IC evaluation
    pl.col("close").pct_change().shift(-1).alias("fwd_ret"),
).drop_nulls(subset=["signal", "fwd_ret"])

print(f"Signal: {signal_df.height} observations")
print(f"Long fraction: {signal_df.filter(pl.col('signal') > 0).height / signal_df.height:.1%}")
signal_df.select(["ts", "close", "ma_fast", "ma_slow", "signal", "fwd_ret"]).tail(5)

Signal: 1999 observations
Long fraction: 60.9%


ts,close,ma_fast,ma_slow,signal,fwd_ret
"datetime[μs, UTC]",f64,f64,f64,f64,f64
2023-06-18 00:00:00 UTC,16118.843257,16071.734873,16183.676109,0.0,-0.001845
2023-06-19 00:00:00 UTC,16089.105943,16084.92235,16178.139375,0.0,-0.001615
2023-06-20 00:00:00 UTC,16063.127658,16091.884121,16172.33969,0.0,0.000709
2023-06-21 00:00:00 UTC,16074.519941,16092.125319,16168.262132,0.0,0.002828
2023-06-22 00:00:00 UTC,16119.97344,16093.114048,16166.901086,0.0,0.002507


## 3. Evaluate Signal Quality (IC / Hit Rate)

In [13]:
from research.alpha_pipeline import evaluate_alpha

result = evaluate_alpha(
    name=f"ma_spread_{FAST}_{SLOW}",
    signal=signal_df["signal"],
    forward_returns=signal_df["fwd_ret"],
)
print(f"IC:        {result.ic_mean:.4f}")
print(f"IC IR:     {result.ic_ir:.4f}")
print(f"Hit rate:  {result.hit_rate:.1%}")
print(f"Turnover:  {result.turnover:.4f}")
print(f"Sharpe:    {result.sharpe:.2f}")

IC:        0.0570
IC IR:     2.5507
Hit rate:  32.7%
Turnover:  0.0270
Sharpe:    9.87


## 4. Quick Backtest

In [None]:
from research.api import quick_backtest

equity_df, summary = quick_backtest(
    bars,
    strategy_mode="ma_crossover_long_only",
    fast=FAST, slow=SLOW,
    fee_bps=10, slippage_bps=5,
)

print(f"Total return:  {summary['total_return']:.4f}")
print(f"Sharpe:        {summary['sharpe']:.4f}")
print(f"Max drawdown:  {summary['max_drawdown']:.4f}")
print(f"Trade count:   {summary.get('trade_count', 'N/A')}")

## 5. Parameter Sweep

In [None]:
from research.api import quick_sweep

sweep_results = quick_sweep(
    bars,
    param_grid={
        "fast": [3, 5, 8, 12, 20],
        "slow": [20, 40, 60, 100],
    },
    fee_bps=10, slippage_bps=5,
)

# Show top results by Sharpe
sweep_results.sort("sharpe", descending=True).head(10)

## 6. Walk-Forward Validation

In [None]:
from research.optimizer import ParameterOptimizer

def evaluate_fn(bars_slice, params):
    _, s = quick_backtest(
        bars_slice,
        fast=params["fast"], slow=params["slow"],
        fee_bps=10, slippage_bps=5,
    )
    return s["sharpe"]

opt = ParameterOptimizer(
    bars=bars,
    evaluate_fn=evaluate_fn,
    param_grid={"fast": [3, 5, 8, 12], "slow": [20, 40, 60]},
    n_splits=5,
    train_frac=0.7,
    gap=20,  # purge gap in bars
)
opt_result = opt.optimize()

print(f"Best params:      {opt_result.best_params}")
print(f"Best train metric: {opt_result.best_metric:.4f}")
print(f"Deflated Sharpe:   {opt_result.deflated_sharpe:.4f}")
print(f"Trials tested:     {opt_result.n_trials}")

# Show all results
pl.DataFrame(opt_result.all_results).sort("avg_test_metric", descending=True)

## 7. Track Experiment

In [None]:
from research.experiment import ExperimentTracker

tracker = ExperimentTracker("experiments/signal_research")
run = tracker.start_run(
    run_name=f"ma_{FAST}_{SLOW}",
    params={"fast": FAST, "slow": SLOW, "fee_bps": 10},
)
tracker.log_metrics({
    "sharpe": float(summary["sharpe"]),
    "total_return": float(summary["total_return"]),
    "max_drawdown": float(summary["max_drawdown"]),
    "ic": result.ic_mean,
    "hit_rate": result.hit_rate,
})
tracker.finish_run()
print(f"Logged run: {run.run_id}")

# Leaderboard
print("\nLeaderboard (top 5 by Sharpe):")
for r in tracker.leaderboard(metric="sharpe", top_n=5):
    print(f"  {r.run_name:20s}  sharpe={r.metrics.get('sharpe', 0):.4f}  "
          f"ret={r.metrics.get('total_return', 0):.4f}  "
          f"dd={r.metrics.get('max_drawdown', 0):.4f}")

## 8. Generate HTML Tearsheet

In [None]:
from monitoring.dashboard import generate_html_tearsheet

path = generate_html_tearsheet(
    equity_df, summary,
    output_path=f"artifacts/tearsheets/ma_{FAST}_{SLOW}.html",
    title=f"MA({FAST},{SLOW}) Backtest",
)
print(f"Tearsheet: {path}")