In [1]:
import pandas as pd
from quantrl_lab.data import (
    DataSourceRegistry,
    DataProcessor,
    IndicatorRegistry,
    AlpacaDataLoader, 
    YfinanceDataloader, 
    AlphaVantageDataLoader
)

from quantrl_lab.data.indicators.technical_indicators import *
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)

#### What each data source supports (as of the latest implementation)

In [2]:
data_source_apc = AlpacaDataLoader(
)
supported_features = data_source_apc.supported_features
print(f"Supported features for AlpacaDataLoader: {supported_features}") 

data_source_yf = YfinanceDataloader()
supported_features = data_source_yf.supported_features
print(f"Supported features for YfinanceDataLoader: {supported_features}")

data_source_av = AlphaVantageDataLoader()
supported_features = data_source_av.supported_features
print(f"Supported features for AlphaVantageDataLoader: {supported_features}")

Supported features for AlpacaDataLoader: ['historical_bars', 'news', 'live_data', 'streaming', 'connection_managed', 'instrument_discovery']
Supported features for YfinanceDataLoader: ['historical_bars', 'connection_managed', 'instrument_discovery']
Supported features for AlphaVantageDataLoader: ['historical_bars', 'news', 'connection_managed', 'instrument_discovery']


In [3]:
print(f"Available indicators:  {IndicatorRegistry.list_all()}")

Available indicators:  ['SMA', 'EMA', 'RSI', 'MACD', 'ATR', 'BB', 'STOCH', 'OBV']


In [4]:
# * You can either initialize it with the default data source configuration
# * or overwrite it with your own configuration

data_loader = DataSourceRegistry(
    
    # sources = {
    #     "primary_source": AlpacaDataLoader,
    #     "news_source": AlpacaDataLoader
    # }
    
    
    # sources = {
    #     "primary_source": AlphaVantageDataLoader,
    #     "news_source": AlphaVantageDataLoader
    # }
)

In [5]:
olhcv_df = data_loader.get_historical_ohlcv_data(
    symbols="MU",
    start="2023-01-01",
    end="2025-01-01",
    timeframe="1d",
)

In [6]:
news_df = data_loader.get_news_data(
    "MU",
    start="2023-01-01",
    end="2025-01-01",
)

Output()

In [7]:
news_df.head()

Unnamed: 0,author,content,created_at,headline,id,images,source,summary,symbols,updated_at,url
0,Benzinga Insights,,2024-12-31T17:35:11Z,10 Information Technology Stocks Whale Activit...,42745926,"[{'size': 'large', 'url': 'https://cdn.benzing...",benzinga,,"[AAPL, AMD, CLSK, KC, MSTR, MU, NVDA, ORCL, PL...",2024-12-31T17:35:12Z,https://www.benzinga.com/insights/options/24/1...
1,Mark Putrino,,2024-12-30T18:17:10Z,Stock Of The Day: Micron At Risk Of Breaking K...,42728975,"[{'size': 'large', 'url': 'https://cdn.benzing...",benzinga,MU stock could drop as it breaks support at $8...,[MU],2024-12-30T18:17:11Z,https://www.benzinga.com/trading-ideas/technic...
2,Benzinga Insights,,2024-12-30T14:45:39Z,Market Whales and Their Recent Bets on Micron ...,42722860,"[{'size': 'large', 'url': 'https://cdn.benzing...",benzinga,,[MU],2024-12-30T14:45:40Z,https://www.benzinga.com/insights/options/24/1...
3,Anusuya Lahiri,,2024-12-30T13:47:25Z,Taiwan Semiconductor's Kaohsiung Expansion To ...,42721526,"[{'size': 'large', 'url': 'https://cdn.benzing...",benzinga,Taiwan Semiconductor plans to build two new wa...,"[AMD, MU, NVDA, SOXQ, TSM, USD]",2024-12-30T13:47:26Z,https://www.benzinga.com/24/12/42721526/taiwan...
4,Michael Cohen,,2024-12-28T15:00:23Z,"Benzinga Bulls And Bears: Quantum Computing, T...",42713893,"[{'size': 'large', 'url': 'https://cdn.benzing...",benzinga,Benzinga examined the prospects for many inves...,"[AAPL, AMD, BTCUSD, DOGEUSD, ETHUSD, F, HMC, H...",2024-12-28T15:00:24Z,https://www.benzinga.com/trading-ideas/long-id...


In [8]:
data_processor = DataProcessor(olhcv_data=olhcv_df, news_data=news_df)

#### You can input the technical indicators in the following way

1. Simple string format
indicators = ["SMA", "RSI", "MACD", "BB"] # using default window size

2. Dictionary Format with Parameters
indicators = [
    {"SMA": {"window": 20}},
    {"RSI": {"window": 14}},
    {"MACD": {"fast": 12, "slow": 26, "signal": 9}},
    {"BB": {"window": 20, "std": 2}}
]

3. Mixed Format
indicators = [
    "SMA",  # Uses default parameters
    {"RSI": {"window": 21}},  # Custom parameters
    "MACD"  # Uses default parameters
]

4. Complex Multi-Parameter Example
indicators = [
    {"SMA": {"window": [10, 20, 50]}},
    {"EMA": {"window": [12, 26]}},
    {"RSI": {"window": 14}},
    {"MACD": {"fast": 12, "slow": 26, "signal": 9}},
    {"BollingerBands": [
        {"window": 20, "std": 2},
        {"window": 20, "std": 2.5}
    ]}
]

In [9]:
# Define comprehensive indicator configuration with multiple window sizes
indicators = [
    # Simple Moving Average - multiple windows
    {"SMA": {"window": [5, 10, 20, 50]}},
    
    # Exponential Moving Average - multiple windows
    {"EMA": {"window": [5, 10, 20, 50]}},
    
    # Relative Strength Index - multiple windows
    {"RSI": {"window": [7, 14, 21, 28]}},
    
    # MACD - multiple parameter combinations
    {"MACD": [
        {"fast": 12, "slow": 26, "signal": 9},  # Standard MACD
        {"fast": 5, "slow": 35, "signal": 5},   # Fast MACD
        {"fast": 8, "slow": 21, "signal": 5},   # Custom MACD
        {"fast": 19, "slow": 39, "signal": 9}   # Slow MACD
    ]},
    
    # Average True Range - multiple windows
    {"ATR": {"window": [7, 14, 21, 28]}},
    
    # Bollinger Bands - multiple windows and standard deviations
    {"BB": [
        {"window": 10, "num_std": 2.0},
        {"window": 20, "num_std": 2.0},  # Standard BB
        {"window": 20, "num_std": 2.5},
        {"window": 50, "num_std": 2.0}
    ]},
    
    # Stochastic Oscillator - multiple parameter combinations
    {"STOCH": [
        {"k_window": 14, "d_window": 3, "smooth_k": 1},  # Fast Stochastic
        {"k_window": 14, "d_window": 3, "smooth_k": 3},  # Slow Stochastic
        {"k_window": 21, "d_window": 5, "smooth_k": 3}   # Custom Stochastic
    ]},
    
    # On-Balance Volume (no parameters needed)
    "OBV"
]

In [10]:
processed_data = data_processor.data_processing_pipeline(
    indicators=indicators,
    fillna_strategy="neutral" # for missing sentiment for the data, fill with neutral value (0)
)

Device set to use cpu


In [11]:
processed_data.head()

Unnamed: 0,Open,High,Low,Close,Volume,Trade_count,VWAP,SMA_5,SMA_10,SMA_20,SMA_50,EMA_5,EMA_10,EMA_20,EMA_50,RSI_7,RSI_14,RSI_21,RSI_28,MACD_line_12_26,MACD_signal_9,MACD_line_5_35,MACD_signal_5,MACD_line_8_21,MACD_line_19_39,ATR_7,ATR_14,ATR_21,ATR_28,BB_middle_10,BB_upper_10_2.0,BB_lower_10_2.0,BB_bandwidth_10,BB_middle_20,BB_upper_20_2.0,BB_lower_20_2.0,BB_bandwidth_20,BB_upper_20_2.5,BB_lower_20_2.5,BB_middle_50,BB_upper_50_2.0,BB_lower_50_2.0,BB_bandwidth_50,STOCH_%K_14_1,STOCH_%D_3,STOCH_%K_14_3,STOCH_%K_21_3,STOCH_%D_5,OBV,sentiment_score
0,53.33,54.27,52.88,54.13,16450150.0,148058.0,53.658406,54.566,55.532,57.0395,58.3376,54.637723,55.451931,56.611859,56.82071,29.686963,38.391911,45.890576,50.268689,-1.204459,-0.005189,-2.446257,-1.389417,-1.546069,-0.523198,1.804313,1.887277,1.935478,1.971536,55.532,57.887359,53.176641,0.084829,57.0395,61.070509,53.008491,0.176676,62.078261,52.000739,58.3376,64.215068,52.460132,0.201498,20.147059,15.973301,19.356404,14.038764,13.301875,-55206082.0,0.928469
1,53.69,56.71,53.61,56.57,16594461.0,142413.0,55.712984,54.76,55.546,56.776,58.4616,55.281816,55.655216,56.607873,56.810878,55.175868,49.592193,52.207407,54.469922,-1.049236,-0.103168,-1.773611,-1.332152,-1.217623,-0.495082,1.989411,1.9739,1.990931,2.011838,55.546,57.926611,53.165389,0.085717,56.776,60.115394,53.436606,0.147043,60.950243,52.601757,58.4616,63.89802,53.02518,0.185983,56.029412,22.382453,32.594981,23.892044,14.691408,-38611621.0,0.494082
2,56.8,57.72,56.215,56.66,35727014.0,160291.0,56.752924,55.106,55.534,56.6065,58.5108,55.74121,55.837904,56.612837,56.804961,55.864368,49.953604,52.422521,54.616575,-0.908485,-0.175348,-1.292248,-1.204425,-0.948971,-0.464068,1.92021,1.940407,1.967792,1.993737,55.534,57.888026,53.179974,0.084778,56.6065,59.569061,53.643939,0.13084,60.309702,52.903298,58.5108,63.83313,53.18847,0.181926,65.271967,33.033621,47.149479,33.752654,18.440677,-2884607.0,0.861282
3,56.7,57.865,56.08,57.64,11888401.0,86738.0,57.166535,55.854,55.615,56.538,58.5694,56.37414,56.165558,56.710662,56.837708,63.070414,53.834973,54.751095,56.209341,-0.70968,-0.217524,-0.693015,-1.006669,-0.611158,-0.38623,1.900894,1.929306,1.959087,1.986282,55.615,58.210573,53.019427,0.093341,56.538,59.324702,53.751298,0.123223,60.021378,53.054622,58.5694,63.78436,53.35444,0.178078,82.879027,49.268198,68.060135,50.896941,26.709942,9003794.0,0.0
4,58.38,59.52,57.595,58.63,16093704.0,131385.0,58.541587,56.726,55.922,56.5895,58.6066,57.126094,56.613638,56.893456,56.907994,69.02987,57.427004,56.984229,57.762294,-0.46686,-0.227669,-0.027886,-0.747764,-0.229953,-0.268248,1.904338,1.928999,1.957464,1.984094,55.922,59.140209,52.703791,0.115096,56.5895,59.494403,53.684597,0.128332,60.220629,52.958371,58.6066,63.795641,53.417559,0.17708,86.83432,64.512684,78.328438,65.0629,37.528661,25097498.0,0.70052
