In [27]:
import torch
import numpy as np
from einops import rearrange, repeat
from uni2ts.module.packed_scaler import GroupedPackedStdScaler, PackedStdScaler, PackedMidRangeScaler

# Set seed for reproducible examples
torch.manual_seed(42)


<torch._C.Generator at 0x7dc7a80d7250>

In [28]:
# ============================================
# STEP 1: Create sample OHLCV data
# ============================================
# Shape: [time_steps, 5] where columns are [Open, High, Low, Close, Volume]
ohlcv_data = torch.tensor([
    [100.0, 105.0, 99.0, 104.0, 1000000],
    [104.0, 108.0, 103.0, 107.0, 1200000],
    [107.0, 110.0, 106.0, 109.0, 900000],
    [109.0, 112.0, 108.0, 111.0, 1100000],
    [111.0, 114.0, 110.0, 113.0, 950000],
], dtype=torch.float32)

print("Original OHLCV data shape:", ohlcv_data.shape)  # [5, 5]
print("OHLC (prices):\n", ohlcv_data[:, :4])
print("Volume:\n", ohlcv_data[:, 4:])



Original OHLCV data shape: torch.Size([5, 5])
OHLC (prices):
 tensor([[100., 105.,  99., 104.],
        [104., 108., 103., 107.],
        [107., 110., 106., 109.],
        [109., 112., 108., 111.],
        [111., 114., 110., 113.]])
Volume:
 tensor([[1000000.],
        [1200000.],
        [ 900000.],
        [1100000.],
        [ 950000.]])


In [None]:

# ============================================
# STEP 2: Reshape to packed format
# ============================================
# MOIRAI expects data in format: [batch, (dim * time), patch_size]
# For simple testing without patching, we use patch_size=1

time_steps = ohlcv_data.shape[0]  # 5
num_variates = ohlcv_data.shape[1]  # 5 (OHLCV)
patch_size = 1

# Add patch dimension: [time, dim] -> [time, dim, patch_size]
target = ohlcv_data.unsqueeze(-1)

# Reshape from [time, dim] to [(dim * time), patch_size]
# This is the "packed" format where variates are interleaved
target = rearrange(target, "time dim patch -> (dim time) patch")

print("\nPacked target shape:", target.shape)  # [25, 1]
print("First 5 values (Open for all timesteps):", target[:5])
print("Next 5 values (High for all timesteps):", target[5:10])



Packed target shape: torch.Size([25, 1])
First 5 values (Open for all timesteps): tensor([[100.],
        [104.],
        [107.],
        [109.],
        [111.]])
Next 5 values (High for all timesteps): tensor([[105.],
        [108.],
        [110.],
        [112.],
        [114.]])


In [33]:

# ============================================
# STEP 3: Create observed_mask
# ============================================
# All values are observed in this example
observed_mask = torch.ones_like(target, dtype=torch.bool)

# ============================================
# STEP 4: Create sample_id
# ============================================
# sample_id identifies which sample each timestep belongs to
# For a single sample, all timesteps have the same ID (use 1, not 0)
sample_id = torch.ones(target.shape[0], dtype=torch.long)

# ============================================
# STEP 5: Create variate_id
# ============================================
# variate_id identifies which variate (0-4 for OHLCV) each timestep belongs to
# Pattern: [0,0,0,0,0, 1,1,1,1,1, 2,2,2,2,2, 3,3,3,3,3, 4,4,4,4,4]
# (5 timesteps for each of 5 variates)
variate_id = repeat(
    torch.arange(num_variates), 
    "dim -> (dim time)", 
    time=time_steps
)

print("\nVariate IDs:", variate_id)
print("Unique variate IDs:", torch.unique(variate_id))



Variate IDs: tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4,
        4])
Unique variate IDs: tensor([0, 1, 2, 3, 4])


In [None]:

# ============================================
# STEP 6: Create group mapping
# ============================================
# Group 0: Open, High, Low, Close (indices 0,1,2,3)
# Group 1: Volume (index 4)
group_mapping = torch.tensor([0, 0, 0, 0, 1])

print("\nGroup mapping:", group_mapping)
print("Variate 0 (Open) -> Group", group_mapping[0])
print("Variate 1 (High) -> Group", group_mapping[1])
print("Variate 2 (Low) -> Group", group_mapping[2])
print("Variate 3 (Close) -> Group", group_mapping[3])
print("Variate 4 (Volume) -> Group", group_mapping[4])

# ============================================
# STEP 7: Initialize and run the scaler
# ============================================
scaler = GroupedPackedStdScaler(
    group_mapping=group_mapping,
    correction=1,  # Bessel's correction for unbiased std
    minimum_scale=1e-5
)

# Add batch dimension: [seq_len, patch_size] -> [1, seq_len, patch_size]
target_batch = target.unsqueeze(0)
observed_mask_batch = observed_mask.unsqueeze(0)
sample_id_batch = sample_id.unsqueeze(0)
variate_id_batch = variate_id.unsqueeze(0)

print("\nBatch shapes:")
print("  target:", target_batch.shape)  # [1, 25, 1]
print("  observed_mask:", observed_mask_batch.shape)
print("  sample_id:", sample_id_batch.shape)
print("  variate_id:", variate_id_batch.shape)


Group mapping: tensor([0, 0, 0, 0, 1])
Variate 0 (Open) -> Group tensor(0)
Variate 1 (High) -> Group tensor(0)
Variate 2 (Low) -> Group tensor(0)
Variate 3 (Close) -> Group tensor(0)
Variate 4 (Volume) -> Group tensor(1)

Batch shapes:
  target: torch.Size([1, 25, 1])
  observed_mask: torch.Size([1, 25, 1])
  sample_id: torch.Size([1, 25])
  variate_id: torch.Size([1, 25])


In [38]:
# Run the scaler
loc, scale = scaler(
    target=target_batch,
    observed_mask=observed_mask_batch,
    sample_id=sample_id_batch,
    variate_id=variate_id_batch
)

# ============================================
# STEP 8: Access and interpret output tensors
# ============================================
print("\n" + "="*60)
print("OUTPUT TENSORS")
print("="*60)

print("\nLocation (mean) tensor shape:", loc.shape)  # [1, 1, 25]
print("Scale (std) tensor shape:", scale.shape)      # [1, 1, 25]




OUTPUT TENSORS

Location (mean) tensor shape: torch.Size([1, 25, 1])
Scale (std) tensor shape: torch.Size([1, 25, 1])


In [42]:
loc

tensor([[[1.0750e+02],
         [1.0750e+02],
         [1.0750e+02],
         [1.0750e+02],
         [1.0750e+02],
         [1.0750e+02],
         [1.0750e+02],
         [1.0750e+02],
         [1.0750e+02],
         [1.0750e+02],
         [1.0750e+02],
         [1.0750e+02],
         [1.0750e+02],
         [1.0750e+02],
         [1.0750e+02],
         [1.0750e+02],
         [1.0750e+02],
         [1.0750e+02],
         [1.0750e+02],
         [1.0750e+02],
         [1.0300e+06],
         [1.0300e+06],
         [1.0300e+06],
         [1.0300e+06],
         [1.0300e+06]]])

In [44]:
variate_id

tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4,
        4])

In [47]:


# Extract values for each variate
print("\n--- Per-Variate Statistics ---")
for i in range(num_variates):
    # Get indices for this variate
    variate_indices = torch.where(variate_id == i)[0]
    
    # Get loc and scale for this variate (should be same across all timesteps)
    variate_loc = loc[0, variate_indices[0], 0].item()
    variate_scale = scale[0, variate_indices[0], 0].item()
    
    # Get original data for this variate
    variate_data = ohlcv_data[:, i]
    
    print(f"\nVariate {i} ({['Open', 'High', 'Low', 'Close', 'Volume'][i]}):")
    print(f"  Group: {group_mapping[i]}")
    print(f"  Mean (loc): {variate_loc:.4f}")
    print(f"  Std (scale): {variate_scale:.4f}")
    print(f"  Original data: {variate_data.tolist()}")

# Verify that OHLC share the same statistics (same group)
print("\n--- Group Statistics Verification ---")
ohlc_locs = [loc[0, i*5, 0].item() for i in range(4)]
ohlc_scales = [scale[0, i*5, 0].item() for i in range(4)]
print(f"OHLC means: {[f'{x:.4f}' for x in ohlc_locs]} (should all be equal)")
print(f"OHLC stds: {[f'{x:.4f}' for x in ohlc_scales]} (should all be equal)")

volume_loc = loc[0, 20, 0].item()
volume_scale = scale[0, 20, 0].item()
print(f"\nVolume mean: {volume_loc:.4f}")
print(f"Volume std: {volume_scale:.4f}")


--- Per-Variate Statistics ---

Variate 0 (Open):
  Group: 0
  Mean (loc): 107.5000
  Std (scale): 4.0846
  Original data: [100.0, 104.0, 107.0, 109.0, 111.0]

Variate 1 (High):
  Group: 0
  Mean (loc): 107.5000
  Std (scale): 4.0846
  Original data: [105.0, 108.0, 110.0, 112.0, 114.0]

Variate 2 (Low):
  Group: 0
  Mean (loc): 107.5000
  Std (scale): 4.0846
  Original data: [99.0, 103.0, 106.0, 108.0, 110.0]

Variate 3 (Close):
  Group: 0
  Mean (loc): 107.5000
  Std (scale): 4.0846
  Original data: [104.0, 107.0, 109.0, 111.0, 113.0]

Variate 4 (Volume):
  Group: 1
  Mean (loc): 1030000.0000
  Std (scale): 120415.9453
  Original data: [1000000.0, 1200000.0, 900000.0, 1100000.0, 950000.0]

--- Group Statistics Verification ---
OHLC means: ['107.5000', '107.5000', '107.5000', '107.5000'] (should all be equal)
OHLC stds: ['4.0846', '4.0846', '4.0846', '4.0846'] (should all be equal)

Volume mean: 1030000.0000
Volume std: 120415.9453


In [48]:

# ============================================
# STEP 9: Normalize the data
# ============================================
normalized = (target_batch - loc) / scale

print("\n" + "="*60)
print("NORMALIZED DATA")
print("="*60)

# Reshape back to [time, dim] for easier viewing
normalized_reshaped = rearrange(
    normalized.squeeze(0), 
    "(dim time) patch -> time dim patch", 
    dim=num_variates, 
    time=time_steps
).squeeze(-1)

print("\nNormalized OHLCV data:\n", normalized_reshaped)

# Verify normalization
print("\n--- Verification ---")
print("OHLC normalized mean:", normalized_reshaped[:, :4].mean().item())
print("OHLC normalized std:", normalized_reshaped[:, :4].std().item())
print("Volume normalized mean:", normalized_reshaped[:, 4].mean().item())
print("Volume normalized std:", normalized_reshaped[:, 4].std().item())


NORMALIZED DATA

Normalized OHLCV data:
 tensor([[-1.8362, -0.6121, -2.0810, -0.8569, -0.2491],
        [-0.8569,  0.1224, -1.1017, -0.1224,  1.4118],
        [-0.1224,  0.6121, -0.3672,  0.3672, -1.0796],
        [ 0.3672,  1.1017,  0.1224,  0.8569,  0.5813],
        [ 0.8569,  1.5913,  0.6121,  1.3465, -0.6644]])

--- Verification ---
OHLC normalized mean: 4.470348535789981e-09
OHLC normalized std: 0.9999996423721313
Volume normalized mean: 0.0
Volume normalized std: 1.0


# Standard PackedStdScaler

In [49]:
import torch
from einops import rearrange, repeat
from uni2ts.module.packed_scaler import PackedStdScaler

# Use the same OHLCV data as above
print("\n" + "="*60)
print("STANDARD PACKEDSTDSCALER (Individual Normalization)")
print("="*60)

# Initialize standard scaler (normalizes each variate independently)
std_scaler = PackedStdScaler()

# Run the standard scaler
std_loc, std_scale = std_scaler(target_batch, observed_mask_batch, sample_id_batch, variate_id_batch)

print("\nOutput shapes:")
print("  loc:", std_loc.shape)  # [1, 25, 1]
print("  scale:", std_scale.shape)  # [1, 25, 1]

# Extract and display per-variate statistics
print("\n--- Per-Variate Statistics (Individual Normalization) ---")
for i in range(num_variates):
    variate_indices = torch.where(variate_id == i)[0]
    variate_loc = std_loc[0, variate_indices[0], 0].item()
    variate_scale = std_scale[0, variate_indices[0], 0].item()
    variate_data = ohlcv_data[:, i]
    
    print(f"\nVariate {i} ({['Open', 'High', 'Low', 'Close', 'Volume'][i]}):")
    print(f"  Mean (loc): {variate_loc:.4f}")
    print(f"  Std (scale): {variate_scale:.4f}")
    print(f"  Original data: {variate_data.tolist()}")

# Verify that each variate has DIFFERENT statistics
print("\n--- Individual Statistics Verification ---")
std_locs = [std_loc[0, i*5, 0].item() for i in range(5)]
std_scales = [std_scale[0, i*5, 0].item() for i in range(5)]
print(f"All means: {[f'{x:.4f}' for x in std_locs]} (should be different)")
print(f"All stds: {[f'{x:.4f}' for x in std_scales]} (should be different)")

# Normalize with standard scaler
std_normalized = (target_batch - std_loc) / std_scale

# Reshape back to [time, dim] for viewing
std_normalized_reshaped = rearrange(
    std_normalized.squeeze(0), 
    "(dim time) patch -> time dim patch", 
    dim=num_variates, 
    time=time_steps
).squeeze(-1)

print("\nNormalized OHLCV data (individual):\n", std_normalized_reshaped)



STANDARD PACKEDSTDSCALER (Individual Normalization)

Output shapes:
  loc: torch.Size([1, 25, 1])
  scale: torch.Size([1, 25, 1])

--- Per-Variate Statistics (Individual Normalization) ---

Variate 0 (Open):
  Mean (loc): 106.2000
  Std (scale): 4.3244
  Original data: [100.0, 104.0, 107.0, 109.0, 111.0]

Variate 1 (High):
  Mean (loc): 109.8000
  Std (scale): 3.4929
  Original data: [105.0, 108.0, 110.0, 112.0, 114.0]

Variate 2 (Low):
  Mean (loc): 105.2000
  Std (scale): 4.3244
  Original data: [99.0, 103.0, 106.0, 108.0, 110.0]

Variate 3 (Close):
  Mean (loc): 108.8000
  Std (scale): 3.4929
  Original data: [104.0, 107.0, 109.0, 111.0, 113.0]

Variate 4 (Volume):
  Mean (loc): 1030000.0000
  Std (scale): 120415.9453
  Original data: [1000000.0, 1200000.0, 900000.0, 1100000.0, 950000.0]

--- Individual Statistics Verification ---
All means: ['106.2000', '109.8000', '105.2000', '108.8000', '1030000.0000'] (should be different)
All stds: ['4.3244', '3.4929', '4.3244', '3.4929', '120

# Mid Point Scaler

In [None]:
import pandas as pd
import torch
from einops import rearrange, repeat

# Load your data
df = pd.read_parquet('/opt/uni2ts/data/processed_equities/5m/A.parquet')
df


Original OHLCV data:
OHLC (prices): tensor([[100., 105.,  99., 104.],
        [104., 108., 103., 107.],
        [107., 110., 106., 109.],
        [109., 112., 108., 111.],
        [111., 114., 110., 113.]])
Volume: tensor([[1000000.],
        [1200000.],
        [ 900000.],
        [1100000.],
        [ 950000.]])

Price range: tensor(99.) to tensor(114.)
Volume range: tensor(900000.) to tensor(1200000.)


In [None]:

# Extract OHLCV columns
ohlcv_data = torch.tensor(df[['open', 'high', 'low', 'close', 'volume']].values, dtype=torch.float32)

# Use the same packing and scaling code as above
# ... (rest of the code from the example)
