In [1]:
import pandas as pd
from quantrl_lab.data import (
    DataSourceRegistry,
    DataProcessor,
    IndicatorRegistry,
    AlpacaDataLoader, 
    YfinanceDataloader, 
    AlphaVantageDataLoader
)

from quantrl_lab.data.indicators.technical_indicators import *
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)

#### What each data source supports (as of the latest implementation)

In [2]:
data_source_apc = AlpacaDataLoader(
)
supported_features = data_source_apc.supported_features
print(f"Supported features for AlpacaDataLoader: {supported_features}") 

data_source_yf = YfinanceDataloader()
supported_features = data_source_yf.supported_features
print(f"Supported features for YfinanceDataLoader: {supported_features}")

data_source_av = AlphaVantageDataLoader()
supported_features = data_source_av.supported_features
print(f"Supported features for AlphaVantageDataLoader: {supported_features}")

Supported features for AlpacaDataLoader: ['historical_bars', 'news', 'live_data', 'streaming', 'connection_managed', 'instrument_discovery']
Supported features for YfinanceDataLoader: ['historical_bars', 'connection_managed', 'instrument_discovery']
Supported features for AlphaVantageDataLoader: ['historical_bars', 'news', 'connection_managed', 'instrument_discovery']


In [3]:
print(f"Available indicators:  {IndicatorRegistry.list_all()}")

Available indicators:  ['SMA', 'EMA', 'RSI', 'MACD', 'ATR', 'BB', 'STOCH', 'OBV']


In [4]:
# * You can either initialize it with the default data source configuration
# * or overwrite it with your own configuration

data_loader = DataSourceRegistry(
    
    # sources = {
    #     "primary_source": AlpacaDataLoader,
    #     "news_source": AlpacaDataLoader
    # }
    
    
    # sources = {
    #     "primary_source": AlphaVantageDataLoader,
    #     "news_source": AlphaVantageDataLoader
    # }
)

In [5]:
olhcv_df = data_loader.get_historical_ohlcv_data(
    symbols="MU",
    start="2023-01-01",
    end="2025-01-01",
    timeframe="1d",
)

In [6]:
news_df = data_loader.get_news_data(
    "MU",
    start="2023-01-01",
    end="2025-01-01",
)

Output()

In [None]:
news_df.head()

In [7]:
data_processor = DataProcessor(olhcv_data=olhcv_df, news_data=news_df)

#### You can input the technical indicators in the following way

1. Simple string format
indicators = ["SMA", "RSI", "MACD", "BB"] # using default window size

2. Dictionary Format with Parameters
indicators = [
    {"SMA": {"window": 20}},
    {"RSI": {"window": 14}},
    {"MACD": {"fast": 12, "slow": 26, "signal": 9}},
    {"BB": {"window": 20, "std": 2}}
]

3. Mixed Format
indicators = [
    "SMA",  # Uses default parameters
    {"RSI": {"window": 21}},  # Custom parameters
    "MACD"  # Uses default parameters
]

4. Complex Multi-Parameter Example
indicators = [
    {"SMA": {"window": [10, 20, 50]}},
    {"EMA": {"window": [12, 26]}},
    {"RSI": {"window": 14}},
    {"MACD": {"fast": 12, "slow": 26, "signal": 9}},
    {"BollingerBands": [
        {"window": 20, "std": 2},
        {"window": 20, "std": 2.5}
    ]}
]

In [8]:
# Define comprehensive indicator configuration with multiple window sizes
indicators = [
    # Simple Moving Average - multiple windows
    {"SMA": {"window": [5, 10, 20, 50]}},
    
    # Exponential Moving Average - multiple windows
    {"EMA": {"window": [5, 10, 20, 50]}},
    
    # Relative Strength Index - multiple windows
    {"RSI": {"window": [7, 14, 21, 28]}},
    
    # MACD - multiple parameter combinations
    {"MACD": [
        {"fast": 12, "slow": 26, "signal": 9},  # Standard MACD
        {"fast": 5, "slow": 35, "signal": 5},   # Fast MACD
        {"fast": 8, "slow": 21, "signal": 5},   # Custom MACD
        {"fast": 19, "slow": 39, "signal": 9}   # Slow MACD
    ]},
    
    # Average True Range - multiple windows
    {"ATR": {"window": [7, 14, 21, 28]}},
    
    # Bollinger Bands - multiple windows and standard deviations
    {"BB": [
        {"window": 10, "num_std": 2.0},
        {"window": 20, "num_std": 2.0},  # Standard BB
        {"window": 20, "num_std": 2.5},
        {"window": 50, "num_std": 2.0}
    ]},
    
    # Stochastic Oscillator - multiple parameter combinations
    {"STOCH": [
        {"k_window": 14, "d_window": 3, "smooth_k": 1},  # Fast Stochastic
        {"k_window": 14, "d_window": 3, "smooth_k": 3},  # Slow Stochastic
        {"k_window": 21, "d_window": 5, "smooth_k": 3}   # Custom Stochastic
    ]},
    
    # On-Balance Volume (no parameters needed)
    "OBV"
]

In [14]:
processed_data, metadata = data_processor.data_processing_pipeline(
    indicators=indicators,
    fillna_strategy="neutral" # for missing sentiment for the data, fill with neutral value (0)
)

print("\nMetadata:")
for key, value in metadata.items():
    print(f"  {key}: {value}")


Metadata:
  symbol: MU
  date_range: {'start': '2023-01-03', 'end': '2024-12-31'}
  fillna_strategy: neutral
  technical_indicators: [{'SMA': {'window': [5, 10, 20, 50]}}, {'EMA': {'window': [5, 10, 20, 50]}}, {'RSI': {'window': [7, 14, 21, 28]}}, {'MACD': [{'fast': 12, 'slow': 26, 'signal': 9}, {'fast': 5, 'slow': 35, 'signal': 5}, {'fast': 8, 'slow': 21, 'signal': 5}, {'fast': 19, 'slow': 39, 'signal': 9}]}, {'ATR': {'window': [7, 14, 21, 28]}}, {'BB': [{'window': 10, 'num_std': 2.0}, {'window': 20, 'num_std': 2.0}, {'window': 20, 'num_std': 2.5}, {'window': 50, 'num_std': 2.0}]}, {'STOCH': [{'k_window': 14, 'd_window': 3, 'smooth_k': 1}, {'k_window': 14, 'd_window': 3, 'smooth_k': 3}, {'k_window': 21, 'd_window': 5, 'smooth_k': 3}]}, 'OBV']
  news_sentiment_applied: True
  columns_dropped: ['Date', 'Timestamp', 'Symbol']
  original_shape: (502, 10)
  final_shape: (453, 50)
