# OHLCV Loader & Fine-Tune Integration Test

This notebook verifies the new `OHLCVLoader` (Dataset) and `OHLCVMoiraiModule` integration.

Steps:
1. Initialize `OHLCVLoader` pointing to real 5m data (Stock 'A').
2. Verify data packing (7 channels).
3. Initialize `OHLCVMoiraiModule` with custom scaler.
4. Run a forward pass to verify normalization logic.

In [9]:
from pathlib import Path
import numpy as np
import torch
from torch.utils.data import DataLoader

from uni2ts.data.ohlcvloader import OHLCVLoader
from uni2ts.model.moirai.custom_module import OHLCVMoiraiModule
from uni2ts.distribution import StudentTOutput

## 1. Initialize Loader

In [10]:
# Path to your real data
data_path = Path("/opt/uni2ts/data/processed_equities/5m")
window_size = 512
stride = 256

print(f"Loading data from: {data_path}")

# Initialize loader
loader = OHLCVLoader(
    data_path=data_path,
    window_size=window_size,
    stride=stride,
    max_stocks=1, # Load just the first one to be quick
    freq='5min',
    verbose=True
)
loader[0]

Loading data from: /opt/uni2ts/data/processed_equities/5m

OHLCVLoader Initialized
  Data path: /opt/uni2ts/data/processed_equities/5m
  Window size: 512
  Stride: 256
  Frequency: 5min
  Timezone: America/New_York
Indexing windows for 1 files...


{'target': tensor([[5.6331e+01, 5.6331e+01, 5.6331e+01,  ..., 1.4651e+05, 0.0000e+00,
          0.0000e+00],
         [5.6331e+01, 5.6465e+01, 5.5794e+01,  ..., 9.8559e+04, 5.0000e+00,
          0.0000e+00],
         [5.6241e+01, 5.6241e+01, 5.5302e+01,  ..., 1.0667e+05, 1.0000e+01,
          0.0000e+00],
         ...,
         [4.6942e+01, 4.6942e+01, 4.6942e+01,  ..., 0.0000e+00,        nan,
          2.0000e+00],
         [4.6942e+01, 4.6942e+01, 4.6942e+01,  ..., 0.0000e+00,        nan,
          2.0000e+00],
         [4.6942e+01, 4.6942e+01, 4.6942e+01,  ..., 0.0000e+00,        nan,
          2.0000e+00]]),
 'observed_mask': tensor([[True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         ...,
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True]]),
 'sample_id': tensor([0, 0, 0

In [None]:
# Path to your real data
data_path = Path("/opt/uni2ts/data/processed_equities/5m")
window_size = 512
stride = 256

print(f"Loading data from: {data_path}")

# Initialize loader
loader = OHLCVLoader(
    data_path=data_path,
    window_size=window_size,
    stride=stride,
    max_stocks=1, # Load just the first one to be quick
    freq='5min',
    verbose=True
)

print(f"\nLoader created with {len(loader)} samples")

# Get one sample
sample = loader[0]
target = sample['target']
variate_id = sample['variate_id']
observed_mask = sample['observed_mask']

print("\nSample Shapes:")
print(f"Target (Packed): {target.shape} (Expected: [Time, 7])")
print(f"Variate ID: {variate_id.shape}")
print(f"Observed Mask: {observed_mask.shape}")

assert target.shape == (window_size, 7), f"Expected shape ({window_size}, 7), got {target.shape}"
assert (variate_id[0] == torch.arange(7)).all(), "Variate IDs should be 0..6"

# Check for masked values (handling of NaNs)
has_masked_values = (~observed_mask).any().item()
print(f"Has masked values: {has_masked_values}")

if has_masked_values:
     print("Correctly identifying missing/NaN values via mask.")
else:
     print("Warning: No masked values found. Check if data contains gaps/NaNs.")

print("\nLoader Test Passed! Features packed correctly.")

## 2. Test OHLCVMoiraiModule Integration

In [None]:
# Initialize Module
module = OHLCVMoiraiModule(
    distr_output=StudentTOutput(output_domain='real'),
    d_model=64,
    num_layers=2,
    patch_sizes=[16],
    max_seq_len=window_size,
    attn_dropout_p=0.1,
    dropout_p=0.1,
    scaling=True
)

print("Module initialized. Scaler type:", type(module.scaler))
print("Scaler config:")
print(f"  Open Index: {module.scaler.open_idx}")
print(f"  High Index: {module.scaler.high_idx}")
print(f"  Low Index: {module.scaler.low_idx}")
print(f"  Volume Index: {module.scaler.volume_idx}")
print(f"  Minutes Index: {module.scaler.minutes_idx}")
print(f"  Dow Index: {module.scaler.day_of_week_idx}")

# Prepare batch
batch_size = 4
dataloader = DataLoader(loader, batch_size=batch_size)
batch = next(iter(dataloader))

# Forward pass
# Note: MoiraiModule expects inputs, but we just want to test scaler behavior first
# We can access the scaler directly
loc, scale = module.scaler(
    batch['target'],
    batch['observed_mask'],
    batch['sample_id'],
    batch['variate_id']
)

print("\nScaler Output Stats:")
print(f"Loc shape: {loc.shape}")
print(f"Scale shape: {scale.shape}")

# Verify OHL Grouping (Indices 0, 1, 2 should share stats per window)
loc_ohl = loc[:, :, :3] # Batch, Time, 0-2

print("\nChecking OHL Grouping:")
diff_oh = (loc[:, :, 0] - loc[:, :, 1]).abs().max().item()
diff_ol = (loc[:, :, 0] - loc[:, :, 2]).abs().max().item()
print(f"Max difference between Open and High loc: {diff_oh}")
print(f"Max difference between Open and Low loc: {diff_ol}")

assert diff_oh < 1e-5, "Open and High should share normalization stats!"
assert diff_ol < 1e-5, "Open and Low should share normalization stats!"

print("\nSuccess! Module correctly uses OHLCVPackedScaler with packed data.")