In [178]:
import os
import pickle
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler, RobustScaler

## Data Acquisition

In [179]:
def fetch_data(ticker, start_date="2005-01-01", end_date="2025-01-19"):
    try:
        data = yf.Ticker(ticker).history(interval="1d", start=start_date, end=end_date)
        return data.reset_index()
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return None

def fetch_multiple_data(tickers, start_date="2005-01-01", end_date="2025-01-19"):
    data_dict = {}
    for ticker in tickers:
        print(f"Fetching data for {ticker}...")
        data = fetch_data(ticker, start_date, end_date)
        if data is not None:
            data_dict[ticker] = data
    return data_dict


## Cleanup

In [180]:
def reorder_change_columns(df):
    return df[['OC_pct_change', 'HL_pct_change'] + [col for col in df.columns if col not in ['OC_pct_change', 'HL_pct_change']]]


## Technical Indicators Functions

In [181]:
def calculate_rsi(df, period=14):
    """Calculate the Relative Strength Index (RSI)."""
    delta = df['Close'].diff()
    gain = delta.clip(lower=0).rolling(window=period).mean()
    loss = (-delta).clip(lower=0).rolling(window=period).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

def calculate_bollinger_band_width(df, window=20, num_std=2):
    """Calculate the Bollinger Bands width."""
    ma = df['Close'].rolling(window=window).mean()
    std = df['Close'].rolling(window=window).std()
    upper_band = ma + num_std * std
    lower_band = ma - num_std * std
    return (upper_band - lower_band) / ma

def calculate_adx(df, window=14):
    """Calculate the Average Directional Index (ADX)."""
    high, low, close = df['High'], df['Low'], df['Close']
    prev_close = close.shift(1)
    tr = pd.concat([high - low, (high - prev_close).abs(), (low - prev_close).abs()], axis=1).max(axis=1)
    up_move = high - high.shift(1)
    down_move = low.shift(1) - low
    plus_dm = up_move.where((up_move > down_move) & (up_move > 0), 0)
    minus_dm = down_move.where((down_move > up_move) & (down_move > 0), 0)
    tr_smooth = tr.ewm(alpha=1/window, min_periods=window, adjust=False).mean()
    plus_dm_smooth = plus_dm.ewm(alpha=1/window, min_periods=window, adjust=False).mean()
    minus_dm_smooth = minus_dm.ewm(alpha=1/window, min_periods=window, adjust=False).mean()
    plus_di = 100 * (plus_dm_smooth / tr_smooth)
    minus_di = 100 * (minus_dm_smooth / tr_smooth)
    dx = 100 * ( (plus_di - minus_di).abs() / (plus_di + minus_di) )
    return dx.ewm(alpha=1/window, min_periods=window, adjust=False).mean()

def calculate_volume_roc(df, period=20):
    """Calculate the Volume Rate of Change (ROC)."""
    volume = df['Volume']
    return ((volume - volume.shift(period)) / volume.shift(period)) * 100

def calculate_price_zscore(df, window=20):
    """Calculate the rolling Z-score of the Close price."""
    rolling_mean = df['Close'].rolling(window=window).mean()
    rolling_std = df['Close'].rolling(window=window).std()
    return (df['Close'] - rolling_mean) / rolling_std

def calculate_skewness(df, window=20):
    """Calculate the rolling skewness of the Close price."""
    return df['Close'].rolling(window=window).skew()

def calculate_macd(df, fast_period=12, slow_period=26, signal_period=9):
    """Calculate MACD indicators."""
    ema_fast = df['Close'].ewm(span=fast_period, adjust=False).mean()
    ema_slow = df['Close'].ewm(span=slow_period, adjust=False).mean()
    macd_line = ema_fast - ema_slow
    signal_line = macd_line.ewm(span=signal_period, adjust=False).mean()
    macd_hist = macd_line - signal_line
    return pd.DataFrame({
        'MACD': macd_line,
        'MACD_Signal': signal_line,
        'MACD_Hist': macd_hist
    })

def calculate_stochastic(df, k_period=14, d_period=3):
    """Calculate the Stochastic Oscillator."""
    lowest_low = df['Low'].rolling(window=k_period).min()
    highest_high = df['High'].rolling(window=k_period).max()
    percent_k = 100 * (df['Close'] - lowest_low) / (highest_high - lowest_low)
    percent_d = percent_k.rolling(window=d_period).mean()
    return pd.DataFrame({
        'Stochastic_%K': percent_k,
        'Stochastic_%D': percent_d
    })

def calculate_std(df, window=20):
    """Calculate the rolling standard deviation of the Close price."""
    return df['Close'].rolling(window=window).std()

def calculate_fibonacci_retracement(df, fib_levels=[0.236, 0.382, 0.5, 0.618, 0.786]):
    """Calculate Fibonacci retracement levels based on the overall high and low."""
    swing_high = df['High'].max()
    swing_low = df['Low'].min()
    diff = swing_high - swing_low
    levels = {f'Fib_{int(level*100)}': swing_high - diff * level for level in fib_levels}
    return pd.DataFrame({key: [value] * len(df) for key, value in levels.items()}, index=df.index)

## Compute Indicators

In [182]:
def compute_technical_indicators(df, period_ranges=[14, 20, 50, 200]):
    features = pd.DataFrame(index=df.index)
    features['Date'] = df['Date']
    
    for period in period_ranges:
        features[f'RSI_{period}'] = calculate_rsi(df, period)
        features[f'BB_Width_{period}'] = calculate_bollinger_band_width(df, window=period)
        features[f'ADX_{period}'] = calculate_adx(df, window=period)
        features[f'Volume_ROC_{period}'] = calculate_volume_roc(df, period)
        features[f'Price_Z_Score_{period}'] = calculate_price_zscore(df, window=period)
        features[f'Skewness_{period}'] = calculate_skewness(df, window=period)
        features[f'Std_{period}'] = calculate_std(df, window=period)
        
    # Add indicators that do not vary by period_ranges
    features = features.join(calculate_macd(df))
    features = features.join(calculate_stochastic(df))
    features = features.join(calculate_fibonacci_retracement(df))
    
    return features

def add_calendar_features(df):
    df['Date'] = pd.to_datetime(df['Date'], utc=True)
    df['is_trading_day'] = df['Date'].dt.dayofweek.apply(lambda x: 1 if x < 5 else 0)
    df['day_of_week'] = df['Date'].dt.dayofweek
    df['day_of_month'] = df['Date'].dt.day
    df['week_of_year'] = df['Date'].dt.isocalendar().week
    df['month'] = df['Date'].dt.month
    df['quarter'] = df['Date'].dt.quarter
    df['is_month_end'] = df['Date'].dt.is_month_end.astype(int)
    df['is_quarter_end'] = df['Date'].dt.is_quarter_end.astype(int)
    df['is_year_end'] = df['Date'].dt.is_year_end.astype(int)
    df['days_to_month_end'] = df['Date'].dt.days_in_month - df['Date'].dt.day
    return df

def drop_unnecessary_columns(df, columns_to_drop=["Dividends", "Stock Splits", 
                           "Capital Gains", "Open", "High", "Low", "Close"]):    
    return df.drop(columns=columns_to_drop, errors='ignore')

def prepare_dataset(df, period_ranges=[14, 20, 50, 200], remove_features=["Date", "Dividends", "Stock Splits", 
                                                                             "Capital Gains", "Open", "High", "Low", "Close", "Volume"]):
    indicators = compute_technical_indicators(df, period_ranges)
    indicators = add_calendar_features(indicators)
    indicators = drop_unnecessary_columns(indicators, remove_features)
    
    df_merged = df.join(indicators)
    df_merged = df_merged.dropna().drop(columns=["Date"], errors='ignore')
    return df_merged

## Compute Targets

In [183]:
def add_percent_change_features(df):
    # Copy the original DataFrame
    df = df.copy()

    # Calculate percent change from Low to High
    df['HL_pct_change'] = ((df['High'] - df['Low']) / df['Low'])

    # Calculate percent change from Open to Close
    df['OC_pct_change'] = ((df['Close'] - df['Open']) / df['Open'])

    return df



## Normalization

In [184]:
def normalize_dataframe_with_outlier_handling(df, outlier_columns=["Volume", "BB_Width_14", "BB_Width_20", "BB_Width_50", "BB_Width_200", 
                                                                   "Volume_ROC_14", "Volume_ROC_20", "Volume_ROC_50", "Volume_ROC_200"]):
    df_norm = df.copy()
    
    # Initialize scalers
    MinMax_scaler = MinMaxScaler()
    robust_scaler = RobustScaler()
    
    # Normalize each numeric column appropriately
    for col in df_norm.columns:
        if col in outlier_columns:
            # Apply log transformation to compress large values.
            # If values are all non-negative, use log1p; otherwise, use a signed log transform.
            if df_norm[col].min() >= 0:
                log_transformed = np.log1p(df_norm[col])
            else:
                log_transformed = np.sign(df_norm[col]) * np.log1p(np.abs(df_norm[col]))
            
            # Then apply robust scaling on the transformed data.
            df_norm[col] = robust_scaler.fit_transform(log_transformed.values.reshape(-1, 1))
        else:
            # For other numeric columns, apply MinMax scaling.
            df_norm[col] = MinMax_scaler.fit_transform(df_norm[[col]])
    return df_norm

## Sliding Window Creation

In [185]:
def create_sliding_windows(data, window_size, target_days, target_columns, mode='forward'):
    X_list, y_list = [], []
    
    if mode == 'forward':
        # For each index i, use data[i : i+window_size] as X and the following target_days as y.
        for i in range(len(data) - window_size - target_days + 1):
            X_window = data.iloc[i:i + window_size].drop(columns=target_columns)
            y_window = data.iloc[i + window_size: i + window_size + target_days][target_columns]
            X_list.append(X_window.values)
            y_list.append(y_window.values)
    elif mode == 'backward':
        # For each index i, use data[i-window_size : i] as X and data[i : i+target_days] as y.
        for i in range(window_size, len(data) - target_days + 1):
            X_window = data.iloc[i - window_size:i].drop(columns=target_columns)
            y_window = data.iloc[i: i + target_days][target_columns]
            X_list.append(X_window.values)
            y_list.append(y_window.values)
    else:
        raise ValueError("Mode must be either 'forward' or 'backward'.")
        
    return np.array(X_list), np.array(y_list)

def merge_sliding_window_data(data_dict, window_size, target_days, target_columns, mode='forward'):
    X_list, y_list = [], []
    for ticker, df in data_dict.items():
        X, y = create_sliding_windows(df, window_size, target_days, target_columns, mode)
        X_list.append(X)
        y_list.append(y)
    merged_X = np.concatenate(X_list, axis=0)
    merged_y = np.concatenate(y_list, axis=0)
    return merged_X, merged_y



## Saving the Data

In [186]:
def save_data(data, filename, folder_path):
    """
    Save the given data to a pickle file.
    """
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    with open(os.path.join(folder_path, filename), "wb") as file:
        pickle.dump(data, file)

## Final Pipeline

In [187]:
def main_pipeline():
    # Configuration
    base_tickers = ["GDXJ", "GDX", "SLV"]
    ensemble_tickers = ["GLD"]
    start_date = "2005-01-01"
    end_date = "2025-01-02"
    period_ranges = [14, 20, 50, 200]
    remove_features = ["Dividends", "Stock Splits", "Capital Gains",
                       "Open", "High", "Low", "Close", "Volume"]
    window_sizes = [14, 30, 60, 90, 180]
    target_days = 7
    target_columns = ['HL_pct_change', 'OC_pct_change']  # Targets from the original data
    mode_base = 'forward'      # For individual models
    mode_ensemble = 'backward' # For ensemble models

    # 1. Data Acquisition
    base_data = fetch_multiple_data(base_tickers, start_date, end_date)
    ensemble_data = fetch_multiple_data(ensemble_tickers, start_date, end_date)

    # 2. Add percent change features & Prepare Dataset
    for ticker in base_data:
        # Add percent change features from low-high and open-close.
        base_data[ticker] = add_percent_change_features(base_data[ticker])
        # Compute technical indicators, add calendar features
        base_data[ticker] = prepare_dataset(base_data[ticker], period_ranges)
        # Drop unnecessary columns
        base_data[ticker] = drop_unnecessary_columns(base_data[ticker], remove_features)
        # Move the volume column to the right
        base_data[ticker] = reorder_change_columns(base_data[ticker])
    
    for ticker in ensemble_data:
        # Add percent change features from low-high and open-close.
        ensemble_data[ticker] = add_percent_change_features(ensemble_data[ticker])
        # Compute technical indicators, add calendar features
        ensemble_data[ticker] = prepare_dataset(ensemble_data[ticker], period_ranges)
        # Drop unnecessary columns
        ensemble_data[ticker] = drop_unnecessary_columns(ensemble_data[ticker], remove_features)
        # Move the volume column to the right
        ensemble_data[ticker] = reorder_change_columns(ensemble_data[ticker])

    # 3. Normalize the DataFrames using updated normalization (with outlier handling)
    for ticker in base_data:
        base_data[ticker] = normalize_dataframe_with_outlier_handling(base_data[ticker])
    for ticker in ensemble_data:
        ensemble_data[ticker] = normalize_dataframe_with_outlier_handling(ensemble_data[ticker])
    
    # 4. Create Sliding Window Datasets
    base_X_dict, base_y_dict = {}, {}
    for window_size in window_sizes:
        X, y = merge_sliding_window_data(base_data, window_size, target_days, target_columns, mode=mode_base)
        base_X_dict[window_size] = X
        base_y_dict[window_size] = y
        print(f"Base data - Window size {window_size}: X shape {X.shape}, y shape {y.shape}")
    
    ensemble_X_dict, ensemble_y_dict = {}, {}
    for window_size in window_sizes:
        X, y = merge_sliding_window_data(ensemble_data, window_size, target_days, target_columns, mode=mode_ensemble)
        ensemble_X_dict[window_size] = X
        ensemble_y_dict[window_size] = y
        print(f"Ensemble data - Window size {window_size}: X shape {X.shape}, y shape {y.shape}")
    
    # 5. Save Processed Data
    for window_size in window_sizes:
        save_data(base_X_dict[window_size], f"X_{window_size}days_i.pkl", "data")
        save_data(base_y_dict[window_size], f"y_{window_size}days_i.pkl", "data")
        save_data(ensemble_X_dict[window_size], f"X_{window_size}days_a.pkl", "data")
        save_data(ensemble_y_dict[window_size], f"y_{window_size}days_a.pkl", "data")

In [188]:
def Testing_pipeline_for_gld(start_date="2005-01-01", end_date="2025-01-19"):
    # Step 1: Fetch raw GLD data.
    print("Step 1: Fetching raw GLD data...")
    gld_data = fetch_data("GLD", start_date, end_date)
    print("Fetched Data (first 5 rows):")
    print(gld_data.head(), "\n")
    
    # Step 2: Add percent change features.
    print("Step 2: Adding percent change features...")
    gld_pct = add_percent_change_features(gld_data)
    print("Data after adding percent change features (first 5 rows):")
    print(gld_pct.head(), "\n")
    
    # Step 3: Prepare the dataset.
    print("Step 3: Preparing dataset (technical indicators, calendar features, dropping unnecessary columns)...")
    gld_prepared = prepare_dataset(gld_pct)
    print("Prepared Data (first 5 rows):")
    print(gld_prepared.head(), "\n")
    
    # Step 4: Normalize the data with outlier handling.
    print("Step 4: Normalizing data with outlier handling...")
    gld_normalized = normalize_dataframe_with_outlier_handling(gld_prepared)
    print("Normalized Data (first 5 rows):")
    print(gld_normalized.head(), "\n")

    # Step 5: Drop columns.
    print("Step 5: Dropping unnecessary columns...")
    finaldf = drop_unnecessary_columns(gld_normalized)
    print("Final Data (first 5 rows):")
    finaldf = reorder_change_columns(finaldf)
    print(finaldf.head(), "\n")
    
    return gld_data,gld_pct,gld_prepared,gld_normalized,finaldf




In [189]:
datagld , data_pct, data_prepared, data_normalized, finaldf = Testing_pipeline_for_gld()

Step 1: Fetching raw GLD data...
Fetched Data (first 5 rows):
                       Date       Open       High        Low      Close  \
0 2005-01-03 00:00:00-05:00  42.980000  43.169998  42.740002  43.020000   
1 2005-01-04 00:00:00-05:00  42.799999  42.910000  42.459999  42.740002   
2 2005-01-05 00:00:00-05:00  42.750000  42.880001  42.599998  42.669998   
3 2005-01-06 00:00:00-05:00  42.480000  42.560001  42.070000  42.150002   
4 2005-01-07 00:00:00-05:00  42.090000  42.389999  41.700001  41.840000   

    Volume  Dividends  Stock Splits  Capital Gains  
0  4750400        0.0           0.0            0.0  
1  3456800        0.0           0.0            0.0  
2  2033600        0.0           0.0            0.0  
3  2556400        0.0           0.0            0.0  
4  4492700        0.0           0.0            0.0   

Step 2: Adding percent change features...
Data after adding percent change features (first 5 rows):
                       Date       Open       High        Low      C

In [190]:
datagld

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Capital Gains
0,2005-01-03 00:00:00-05:00,42.980000,43.169998,42.740002,43.020000,4750400,0.0,0.0,0.0
1,2005-01-04 00:00:00-05:00,42.799999,42.910000,42.459999,42.740002,3456800,0.0,0.0,0.0
2,2005-01-05 00:00:00-05:00,42.750000,42.880001,42.599998,42.669998,2033600,0.0,0.0,0.0
3,2005-01-06 00:00:00-05:00,42.480000,42.560001,42.070000,42.150002,2556400,0.0,0.0,0.0
4,2005-01-07 00:00:00-05:00,42.090000,42.389999,41.700001,41.840000,4492700,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
5039,2025-01-13 00:00:00-05:00,246.339996,246.789993,245.149994,245.740005,8448600,0.0,0.0,0.0
5040,2025-01-14 00:00:00-05:00,245.619995,247.039993,245.520004,247.029999,4914800,0.0,0.0,0.0
5041,2025-01-15 00:00:00-05:00,248.210007,248.899994,247.009995,248.880005,6345000,0.0,0.0,0.0
5042,2025-01-16 00:00:00-05:00,250.559998,251.389999,250.270004,250.600006,9236000,0.0,0.0,0.0


In [191]:
data_pct

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Capital Gains,HL_pct_change,OC_pct_change
0,2005-01-03 00:00:00-05:00,42.980000,43.169998,42.740002,43.020000,4750400,0.0,0.0,0.0,0.010061,0.000931
1,2005-01-04 00:00:00-05:00,42.799999,42.910000,42.459999,42.740002,3456800,0.0,0.0,0.0,0.010598,-0.001402
2,2005-01-05 00:00:00-05:00,42.750000,42.880001,42.599998,42.669998,2033600,0.0,0.0,0.0,0.006573,-0.001871
3,2005-01-06 00:00:00-05:00,42.480000,42.560001,42.070000,42.150002,2556400,0.0,0.0,0.0,0.011647,-0.007768
4,2005-01-07 00:00:00-05:00,42.090000,42.389999,41.700001,41.840000,4492700,0.0,0.0,0.0,0.016547,-0.005940
...,...,...,...,...,...,...,...,...,...,...,...
5039,2025-01-13 00:00:00-05:00,246.339996,246.789993,245.149994,245.740005,8448600,0.0,0.0,0.0,0.006690,-0.002436
5040,2025-01-14 00:00:00-05:00,245.619995,247.039993,245.520004,247.029999,4914800,0.0,0.0,0.0,0.006191,0.005741
5041,2025-01-15 00:00:00-05:00,248.210007,248.899994,247.009995,248.880005,6345000,0.0,0.0,0.0,0.007652,0.002699
5042,2025-01-16 00:00:00-05:00,250.559998,251.389999,250.270004,250.600006,9236000,0.0,0.0,0.0,0.004475,0.000160


In [192]:
data_prepared

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Capital Gains,HL_pct_change,OC_pct_change,...,is_trading_day,day_of_week,day_of_month,week_of_year,month,quarter,is_month_end,is_quarter_end,is_year_end,days_to_month_end
398,64.739998,65.220001,64.400002,64.750000,5052000,0.0,0.0,0.0,0.012733,0.000154,...,1,2,2,31,8,3,0,0,0,29
399,64.000000,64.330002,63.549999,64.120003,3840600,0.0,0.0,0.0,0.012274,0.001875,...,1,3,3,31,8,3,0,0,0,28
400,64.870003,64.980003,64.000000,64.279999,4811100,0.0,0.0,0.0,0.015313,-0.009095,...,1,4,4,31,8,3,0,0,0,27
401,64.470001,64.809998,64.250000,64.500000,2493800,0.0,0.0,0.0,0.008716,0.000465,...,1,0,7,32,8,3,0,0,0,24
402,63.980000,64.570000,63.849998,63.970001,5574200,0.0,0.0,0.0,0.011276,-0.000156,...,1,1,8,32,8,3,0,0,0,23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5039,246.339996,246.789993,245.149994,245.740005,8448600,0.0,0.0,0.0,0.006690,-0.002436,...,1,0,13,3,1,1,0,0,0,18
5040,245.619995,247.039993,245.520004,247.029999,4914800,0.0,0.0,0.0,0.006191,0.005741,...,1,1,14,3,1,1,0,0,0,17
5041,248.210007,248.899994,247.009995,248.880005,6345000,0.0,0.0,0.0,0.007652,0.002699,...,1,2,15,3,1,1,0,0,0,16
5042,250.559998,251.389999,250.270004,250.600006,9236000,0.0,0.0,0.0,0.004475,0.000160,...,1,3,16,3,1,1,0,0,0,15


In [193]:
data_normalized

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Capital Gains,HL_pct_change,OC_pct_change,...,is_trading_day,day_of_week,day_of_month,week_of_year,month,quarter,is_month_end,is_quarter_end,is_year_end,days_to_month_end
398,0.042199,0.040859,0.044171,0.041665,-0.661424,0.0,0.0,0.0,0.087272,0.397217,...,0.0,0.50,0.033333,0.576923,0.636364,0.666667,0.0,0.0,0.0,0.966667
399,0.038508,0.036424,0.039928,0.038532,-1.037915,0.0,0.0,0.0,0.083638,0.406808,...,0.0,0.75,0.066667,0.576923,0.636364,0.666667,0.0,0.0,0.0,0.933333
400,0.042847,0.039663,0.042174,0.039328,-0.728520,0.0,0.0,0.0,0.107693,0.345655,...,0.0,1.00,0.100000,0.576923,0.636364,0.666667,0.0,0.0,0.0,0.900000
401,0.040852,0.038816,0.043422,0.040422,-1.630925,0.0,0.0,0.0,0.055472,0.398950,...,0.0,0.00,0.200000,0.596154,0.636364,0.666667,0.0,0.0,0.0,0.800000
402,0.038408,0.037620,0.041425,0.037787,-0.526342,0.0,0.0,0.0,0.075742,0.395484,...,0.0,0.25,0.233333,0.596154,0.636364,0.666667,0.0,0.0,0.0,0.766667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5039,0.948025,0.945588,0.946297,0.941530,0.044738,0.0,0.0,0.0,0.039433,0.382778,...,0.0,0.00,0.400000,0.038462,0.000000,0.000000,0.0,0.0,0.0,0.600000
5040,0.944433,0.946833,0.948143,0.947944,-0.699234,0.0,0.0,0.0,0.035484,0.428357,...,0.0,0.25,0.433333,0.038462,0.000000,0.000000,0.0,0.0,0.0,0.566667
5041,0.957352,0.956101,0.955580,0.957142,-0.348477,0.0,0.0,0.0,0.047046,0.411403,...,0.0,0.50,0.466667,0.038462,0.000000,0.000000,0.0,0.0,0.0,0.533333
5042,0.969074,0.968509,0.971851,0.965694,0.167108,0.0,0.0,0.0,0.021901,0.397246,...,0.0,0.75,0.500000,0.038462,0.000000,0.000000,0.0,0.0,0.0,0.500000


In [194]:
finaldf

Unnamed: 0,OC_pct_change,HL_pct_change,Volume,RSI_14,BB_Width_14,ADX_14,Volume_ROC_14,Price_Z_Score_14,Skewness_14,Std_14,...,is_trading_day,day_of_week,day_of_month,week_of_year,month,quarter,is_month_end,is_quarter_end,is_year_end,days_to_month_end
398,0.397217,0.087272,-0.661424,0.438988,0.975596,0.143026,-0.458661,0.717195,0.577635,0.118341,...,0.0,0.50,0.033333,0.576923,0.636364,0.666667,0.0,0.0,0.0,0.966667
399,0.406808,0.083638,-1.037915,0.396061,0.610136,0.134597,-0.465465,0.680336,0.483882,0.094605,...,0.0,0.75,0.066667,0.576923,0.636364,0.666667,0.0,0.0,0.0,0.933333
400,0.345655,0.107693,-0.728520,0.496568,0.653114,0.133970,-0.440972,0.696171,0.485374,0.097396,...,0.0,1.00,0.100000,0.576923,0.636364,0.666667,0.0,0.0,0.0,0.900000
401,0.398950,0.055472,-1.630925,0.576443,0.762470,0.133389,-0.477186,0.701183,0.459121,0.104644,...,0.0,0.00,0.200000,0.596154,0.636364,0.666667,0.0,0.0,0.0,0.800000
402,0.395484,0.075742,-0.526342,0.474090,0.756820,0.126701,-0.335635,0.631359,0.459720,0.104272,...,0.0,0.25,0.233333,0.596154,0.636364,0.666667,0.0,0.0,0.0,0.766667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5039,0.382778,0.039433,0.044738,0.621762,-0.394603,0.053238,0.323436,0.687640,0.626194,0.221990,...,0.0,0.00,0.400000,0.038462,0.000000,0.000000,0.0,0.0,0.0,0.600000
5040,0.428357,0.035484,-0.699234,0.599939,-0.321582,0.052544,-0.432897,0.740383,0.570400,0.239805,...,0.0,0.25,0.433333,0.038462,0.000000,0.000000,0.0,0.0,0.0,0.566667
5041,0.411403,0.047046,-0.348477,0.667598,-0.225809,0.061783,0.410110,0.803135,0.559572,0.263439,...,0.0,0.50,0.466667,0.038462,0.000000,0.000000,0.0,0.0,0.0,0.533333
5042,0.397246,0.021901,0.167108,0.685678,-0.067883,0.082044,0.869909,0.829354,0.570792,0.302418,...,0.0,0.75,0.500000,0.038462,0.000000,0.000000,0.0,0.0,0.0,0.500000


In [195]:
# Get data, prepare dataset, and save processed data
main_pipeline()

Fetching data for GDXJ...
Fetching data for GDX...
Fetching data for SLV...
Fetching data for GLD...
Base data - Window size 14: X shape (11941, 14, 48), y shape (11941, 7, 2)
Base data - Window size 30: X shape (11893, 30, 48), y shape (11893, 7, 2)
Base data - Window size 60: X shape (11803, 60, 48), y shape (11803, 7, 2)
Base data - Window size 90: X shape (11713, 90, 48), y shape (11713, 7, 2)
Base data - Window size 180: X shape (11443, 180, 48), y shape (11443, 7, 2)
Ensemble data - Window size 14: X shape (4615, 14, 48), y shape (4615, 7, 2)
Ensemble data - Window size 30: X shape (4599, 30, 48), y shape (4599, 7, 2)
Ensemble data - Window size 60: X shape (4569, 60, 48), y shape (4569, 7, 2)
Ensemble data - Window size 90: X shape (4539, 90, 48), y shape (4539, 7, 2)
Ensemble data - Window size 180: X shape (4449, 180, 48), y shape (4449, 7, 2)
