In [None]:
import polars as pl
import numpy as np                          # Numerical computing library
from datetime import datetime, timedelta    # Date and time operations
from tqdm import tqdm                       # Progress bar
import torch                                # PyTorch framework
import torch.nn as nn                       # Neural network modules
import research                             # Model building and training utilities
import binance                              

##################################################
# Configuration
################################################## 
pl.Config.set_tbl_width_chars(200)
pl.Config.set_fmt_str_lengths(100)
pl.Config.set_tbl_cols(-1)

sym = 'BTCUSDT'
time_interval = '12h'
max_lags= 4
forecast_horizon = 1

features = ['close_log_return_lag_1','close_log_return_lag_2','close_log_return_lag_3']
target = 'close_log_return'
maker_fee = binance.MAKER_FEE
taker_fee = binance.TAKER_FEE

validation_start_date = datetime(2024, 10, 1, 0, 0)
validation_end_date = datetime(2025, 10, 1, 0, 0)


In [5]:
class LinearModel(nn.Module):
    def __init__(self, input_features):
        super(LinearModel, self).__init__()
        self.linear = nn.Linear(input_features, 1)

    def forward(self, x):
        return self.linear(x)

In [None]:
# Sliding window parameters
training_window_hours = 365 * 24  # Train on 1 year of hourly data
step_size_hours = 12  # Retrain every 12 hours (match time_interval)

# Load all data once (training window + validation period)
all_data_start = validation_start_date - timedelta(hours=training_window_hours)
# all_data_end = validation_end_date  # Full validation period # For testing, replace with the next line
all_data_end = validation_start_date + timedelta(hours=12 * 10)

ts_all = research.load_ohlc_timeseries_range(
    sym, 
    time_interval, 
    start_date=all_data_start, 
    end_date=all_data_end
)
ts_all = research.add_log_return_features(ts_all, 'close', forecast_horizon, max_no_lags=max_lags)
ts_all = ts_all.drop_nulls()

print(f"Loaded {len(ts_all)} rows from {all_data_start} to {all_data_end}")


Loading BTCUSDT: 100%|██████████| 371/371 [00:20<00:00, 18.18day/s]

Loaded 737 rows from 2023-10-02 00:00:00 to 2024-10-06 00:00:00





In [None]:
predictions = []
actuals = []
timestamps = []

# Iterate through validation period with sliding window
current_date = validation_start_date
iteration_count = 0

# Calculate total iterations and create progress bar
total_iterations = int((all_data_end - validation_start_date).total_seconds() / (step_size_hours * 3600))
pbar = tqdm(total=total_iterations, desc="Training & Validating", unit="iteration")

def retrain_model(ts_train):
    model = LinearModel(len(features))
    research.learn_model_trades(
        ts_train, 
        features, 
        target, 
        model, 
        test_size=0.01,  # Minimal test split since we're validating separately
        loss=nn.L1Loss(),
        no_epochs=5000,
        log=False
    )
    return model


while current_date < all_data_end:
    iteration_count += 1
    # Define training sliding window
    train_start = current_date - timedelta(hours=training_window_hours)
    train_end = current_date
    
    # Slice training data from pre-loaded timeseries
    ts_train = ts_all.filter(
        (pl.col('datetime') >= train_start) & 
        (pl.col('datetime') < train_end)
    )
    
    model = retrain_model(ts_train)

    # Make prediction for next time step from pre-loaded data (ts_point is a single-row DataFrame)
    ts_point = ts_all.filter(pl.col('datetime') == current_date)

    X_pred = torch.tensor(ts_point[features].to_numpy(), dtype=torch.float32)
    
    model.eval()
    with torch.no_grad():
        y_hat = model(X_pred)
    
    # These we will use to build up the dataframe of results
    predictions.append(y_hat.item())
    actuals.append(ts_point[target].item())
    timestamps.append(current_date)
    
    # Move window forward
    current_date += timedelta(hours=step_size_hours)
    
    # Update progress bar
    pbar.update(1)
    pbar.set_postfix({
        'date': current_date.strftime('%Y-%m-%d'),
        'predictions': len(predictions)
    })

pbar.close()

Training & Validating: 100%|██████████| 10/10 [01:04<00:00,  6.45s/iteration, date=2024-10-06, predictions=10]


✓ Validation complete! Total iterations: 10, Predictions: 10





In [None]:
# Create results DataFrame with trade metrics
validation_results = pl.DataFrame({
    'timestamp': timestamps,
    'y': actuals,
    'y_hat': predictions
}).with_columns(
    (pl.col('y_hat').sign()==pl.col('y').sign()).alias('is_won'),
    pl.col('y_hat').sign().alias('signal'),
).with_columns(
    (pl.col('signal') * pl.col('y')).alias('trade_log_return')
).with_columns(
    pl.col('trade_log_return').cum_sum().alias('equity_curve')
)

# Add transaction fees
validation_results = research.add_tx_fees_log(validation_results, maker_fee, taker_fee)

In [9]:
research.plot_column(validation_results, 'equity_curve', title='Sliding Window Validation')