#Prepare

##Google Drive



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


##Import stock_data

In [None]:
import pandas as pd

file_path = '/content/drive/MyDrive/Quant Trading/Stock Price/all_stock_data.pkl'
df_stocks = pd.read_pickle(file_path)
display(df_stocks[list(df_stocks.keys())[0]].head())

Unnamed: 0,Date,Close,High,Low,Open,Volume,symbol,EMA_5,EMA_10,EMA_20,...,K_5,D_5,K_14,D_14,MFI_5,MFI_14,MFI_21,BB_Middle_20,BB_Upper_20,BB_Lower_20
0,1980-03-17,3.145833,3.302083,3.125,3.125,219600.0,AMD,3.145833,3.145833,3.145833,...,,,,,0.0,0.0,0.0,,,
1,1980-03-18,3.03125,3.125,2.9375,3.125,727200.0,AMD,3.107639,3.125,3.13492,...,,,,,0.0,0.0,0.0,,,
2,1980-03-19,3.041667,3.083333,3.020833,3.03125,295200.0,AMD,3.085648,3.109848,3.126039,...,,,,,28.990665,28.990665,28.990665,,,
3,1980-03-20,3.010417,3.0625,3.010417,3.041666984558105,159600.0,AMD,3.060571,3.09177,3.115027,...,,,,,25.085655,25.085655,25.085655,,,
4,1980-03-21,2.916667,3.020833,2.90625,3.010416984558105,130800.0,AMD,3.012603,3.059933,3.096136,...,2.631661,,,,22.651101,22.651101,22.651101,,,


#Price Target Set

##Price Target Set--Initialize/ Save/ Load/ Display

### Price Target Set Initialization (to 0)

In [None]:
stock_tickers = list(df_stocks.keys())
target_types = ['close_up', 'close_down', 'high_reach', 'low_reach']
time_lengths = ['1d', '5d', '20d', '100d', '250d']
price_change_definitions = ['50pct', '20pct', '10pct', '5pct', '2pct']

print(f"Stock Tickers: {stock_tickers[:5]}...")
print(f"Target Types: {target_types}")
print(f"Time Lengths: {time_lengths}")
print(f"Price Change Definitions: {price_change_definitions}")

Stock Tickers: ['AMD', 'GLD', 'GS', 'INTC', 'JPM']...
Target Types: ['close_up', 'close_down', 'high_reach', 'low_reach']
Time Lengths: ['1d', '5d', '20d', '100d', '250d']
Price Change Definitions: ['50pct', '20pct', '10pct', '5pct', '2pct']


In [None]:
stock_price_targets_data = {}

for stock_ticker in stock_tickers:
    stock_price_targets_data[stock_ticker] = {}
    # Extract the 'Date' column and convert to DatetimeIndex for the current stock
    stock_dates = pd.to_datetime(df_stocks[stock_ticker]['Date'])

    for target_type in target_types:
        stock_price_targets_data[stock_ticker][target_type] = {}
        for time_length in time_lengths:
            stock_price_targets_data[stock_ticker][target_type][time_length] = {}
            for price_change_definition in price_change_definitions:
                # Create a Pandas Series filled with zeros, using the stock's dates as index
                zero_series = pd.Series(0, index=stock_dates, dtype=int)
                stock_price_targets_data[stock_ticker][target_type][time_length][price_change_definition] = zero_series

print("stock_price_targets_data initialized successfully with four levels.")
# Verify the new structure by checking a sample
# For example, check 'AMD' -> 'close_up' -> '1d' -> '50pct'
print(f"Sample Series (AMD, close_up, 1d, 50pct) head:\n{stock_price_targets_data['AMD']['close_up']['1d']['50pct'].head()}")
print(f"Sample Series (AMD, close_up, 1d, 50pct) tail:\n{stock_price_targets_data['AMD']['close_up']['1d']['50pct'].tail()}")

stock_price_targets_data initialized successfully with four levels.
Sample Series (AMD, close_up, 1d, 50pct) head:
Date
1980-03-17    0
1980-03-18    0
1980-03-19    0
1980-03-20    0
1980-03-21    0
dtype: int64
Sample Series (AMD, close_up, 1d, 50pct) tail:
Date
2026-02-06    0
2026-02-09    0
2026-02-10    0
2026-02-11    0
NaT           0
dtype: int64


###Save Price Target Set

In [None]:
import pickle

# Define the path to save the dictionary
save_path = '/content/drive/MyDrive/Quant Trading/Stock Price/stock_price_targets_data.pkl'

# Save the stock_price_targets_data dictionary using pickle
with open(save_path, 'wb') as f:
    pickle.dump(stock_price_targets_data, f)

print(f"stock_price_targets_data dictionary successfully saved to: {save_path}")

stock_price_targets_data dictionary successfully saved to: /content/drive/MyDrive/Quant Trading/Stock Price/stock_price_targets_data.pkl


###Load Price Target Set

In [None]:
import pickle

# Define the path to load the dictionary
load_path = '/content/drive/MyDrive/Quant Trading/Stock Price/stock_price_targets_data.pkl'

# Load the stock_price_targets_data dictionary
with open(load_path, 'rb') as f:
    stock_price_targets_data = pickle.load(f)

print(f"stock_price_targets_data dictionary successfully loaded from: {load_path}")

stock_price_targets_data dictionary successfully loaded from: /content/drive/MyDrive/Quant Trading/Stock Price/stock_price_targets_data.pkl


###Display Price Target Set

In [None]:
import random
import pandas as pd

# Set the specific target_type, time_length, and price_change_definition, and date
selected_stock_ticker = 'MU'
selected_target_type = 'close_up'
selected_time_length = '5d'
selected_price_change_definition = '5pct'
selected_date = pd.to_datetime('2026-01-01')   # You can change any of them

print(f"Displaying sample for: ")
print(f"  Stock Ticker: {selected_stock_ticker}")
print(f"  Target Type: {selected_target_type}")
print(f"  Time Length: {selected_time_length}")
print(f"  Price Change Definition: {selected_price_change_definition}")
print(f"  Date: {selected_date}")

# Access the corresponding Pandas Series
sample_series = stock_price_targets_data[selected_stock_ticker][selected_target_type][selected_time_length][selected_price_change_definition]


# Ensure the index is a DatetimeIndex for date-based operations
if not isinstance(sample_series.index, pd.DatetimeIndex):
    sample_series.index = pd.to_datetime(sample_series.index)

# Find the closest date to selected_date in the series index
# Calculate the absolute difference between the series index and the selected date
diffs = abs(sample_series.index - selected_date)
# Find the index of the minimum difference to get the closest date's position
closest_date_idx = diffs.argmin()
closest_date = sample_series.index[closest_date_idx]

print(f"  Closest date found in series index: {closest_date}")

# Determine the start and end indices for the sample (e.g., 5 days before, selected date, 5 days after)
start_idx = max(0, closest_date_idx - 5)
end_idx = min(len(sample_series) - 1, closest_date_idx + 5)

# Get the subset of dates being displayed
displayed_dates = sample_series.iloc[start_idx : end_idx + 1].index

# Get the original stock DataFrame for the selected ticker
original_stock_df = df_stocks[selected_stock_ticker].copy()
original_stock_df['Date'] = pd.to_datetime(original_stock_df['Date'])
original_stock_df.set_index('Date', inplace=True)

# Filter the original stock data for the displayed dates and get 'Close', 'High', and 'Low' prices
original_stock_data_subset = original_stock_df.loc[displayed_dates, ['Close', 'High', 'Low']]

# Combine the target series and original stock data into a single DataFrame for display
combined_display_df = pd.DataFrame({
    'Target Value': sample_series.iloc[start_idx : end_idx + 1]
}).merge(original_stock_data_subset, left_index=True, right_index=True, how='left')

print(f"\nCombined Sample Data around {selected_date.year}:")
display(combined_display_df)

Displaying sample for: 
  Stock Ticker: MU
  Target Type: close_up
  Time Length: 5d
  Price Change Definition: 5pct
  Date: 2026-01-01 00:00:00
  Closest date found in series index: 2025-12-31 00:00:00

Combined Sample Data around 2026:


Unnamed: 0_level_0,Target Value,Close,High,Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-12-23,0.0,276.158417,281.746156,272.21003
2025-12-24,1.0,286.56424,289.183177,277.138054
2025-12-26,1.0,284.675018,290.75255,283.305576
2025-12-29,1.0,294.369995,294.5,278.5
2025-12-30,1.0,292.630005,298.829987,292.01001
2025-12-31,1.0,285.410004,293.170013,284.179993
2026-01-02,1.0,315.420013,315.600006,294.859985
2026-01-05,1.0,312.149994,325.529999,309.549988
2026-01-06,0.0,343.429993,344.549988,318.059998
2026-01-07,0.0,339.549988,346.299988,337.179993


##Calculate Close Price Target

###Define Helper Functions

In [None]:
import numpy as np
import pandas as pd

def calculate_future_close_change(df, time_length_str):
    """
    Calculates the percentage change of the 'Close' price from the current day to a future day.

    Args:
        df (pd.DataFrame): DataFrame containing stock data with a 'Close' column.
        time_length_str (str): String indicating the future time length (e.g., '1d', '5d').

    Returns:
        pd.Series: Series of future close price percentage changes.
    """
    # Extract number of days from time_length_str (e.g., '5d' -> 5)
    days_to_shift = int(time_length_str.replace('d', ''))

    # Calculate future close price
    future_close = df['Close'].shift(-days_to_shift)

    # Calculate percentage change, handle division by zero if current close is 0
    # Using .values for direct array operations can be faster for this calculation
    percentage_change = ((future_close - df['Close']) / df['Close']) * 100

    return percentage_change

def parse_percentage_string(percentage_str):
    """
    Converts a string like '50pct' to a float representing the percentage (e.g., 50.0).

    Args:
        percentage_str (str): The percentage string (e.g., '50pct').

    Returns:
        float: The numeric percentage.
    """
    return float(percentage_str.replace('pct', ''))

print("Helper functions `calculate_future_close_change` and `parse_percentage_string` defined.")

Helper functions `calculate_future_close_change` and `parse_percentage_string` defined.


###Calculate close_up

In [None]:
for stock_ticker in stock_tickers:
    current_df_stock = df_stocks[stock_ticker].copy()
    # Ensure the 'Date' column is a DatetimeIndex for proper alignment and indexing
    current_df_stock['Date'] = pd.to_datetime(current_df_stock['Date'])
    current_df_stock.set_index('Date', inplace=True)

    for time_length in time_lengths:
        # Calculate future close price changes for the current stock and time length
        future_close_changes = calculate_future_close_change(current_df_stock, time_length)

        for price_change_definition in price_change_definitions:
            threshold_percentage = parse_percentage_string(price_change_definition)

            # Create a Series for 'close_up' targets
            # Initialize with 0, set to 1 where condition is met, and keep NaN where original future_close_changes was NaN
            target_series_close_up = (future_close_changes >= threshold_percentage).astype(float)
            target_series_close_up = target_series_close_up.mask(future_close_changes.isna(), np.nan).astype(int, errors='ignore') # Convert non-NaN floats to int, keep NaNs

            # Store the calculated target series in the dictionary
            stock_price_targets_data[stock_ticker]['close_up'][time_length][price_change_definition] = target_series_close_up

        print(f"Calculated close_up targets for {stock_ticker} - {time_length}")

print("All 'close_up' target variables calculated and populated.")

Calculated close_up targets for AMD - 1d
Calculated close_up targets for AMD - 5d
Calculated close_up targets for AMD - 20d
Calculated close_up targets for AMD - 100d
Calculated close_up targets for AMD - 250d
Calculated close_up targets for GLD - 1d
Calculated close_up targets for GLD - 5d
Calculated close_up targets for GLD - 20d
Calculated close_up targets for GLD - 100d
Calculated close_up targets for GLD - 250d
Calculated close_up targets for GS - 1d
Calculated close_up targets for GS - 5d
Calculated close_up targets for GS - 20d
Calculated close_up targets for GS - 100d
Calculated close_up targets for GS - 250d
Calculated close_up targets for INTC - 1d
Calculated close_up targets for INTC - 5d
Calculated close_up targets for INTC - 20d
Calculated close_up targets for INTC - 100d
Calculated close_up targets for INTC - 250d
Calculated close_up targets for JPM - 1d
Calculated close_up targets for JPM - 5d
Calculated close_up targets for JPM - 20d
Calculated close_up targets for JPM 

###Calculate close_down

In [None]:
for stock_ticker in stock_tickers:
    current_df_stock = df_stocks[stock_ticker].copy()
    current_df_stock['Date'] = pd.to_datetime(current_df_stock['Date'])
    current_df_stock.set_index('Date', inplace=True)

    for time_length in time_lengths:
        future_close_changes = calculate_future_close_change(current_df_stock, time_length)

        for price_change_definition in price_change_definitions:
            threshold_percentage = parse_percentage_string(price_change_definition)

            # Create a Series for 'close_down' targets
            # Initialize with 0, set to 1 where condition is met (decrease >= threshold), and keep NaN where original future_close_changes was NaN
            target_series_close_down = (future_close_changes <= -threshold_percentage).astype(float)
            target_series_close_down = target_series_close_down.mask(future_close_changes.isna(), np.nan).astype(int, errors='ignore') # Convert non-NaN floats to int, keep NaNs

            # Store the calculated target series in the dictionary
            stock_price_targets_data[stock_ticker]['close_down'][time_length][price_change_definition] = target_series_close_down

        print(f"Calculated close_down targets for {stock_ticker} - {time_length}")

print("All 'close_down' target variables calculated and populated.")

Calculated close_down targets for AMD - 1d
Calculated close_down targets for AMD - 5d
Calculated close_down targets for AMD - 20d
Calculated close_down targets for AMD - 100d
Calculated close_down targets for AMD - 250d
Calculated close_down targets for GLD - 1d
Calculated close_down targets for GLD - 5d
Calculated close_down targets for GLD - 20d
Calculated close_down targets for GLD - 100d
Calculated close_down targets for GLD - 250d
Calculated close_down targets for GS - 1d
Calculated close_down targets for GS - 5d
Calculated close_down targets for GS - 20d
Calculated close_down targets for GS - 100d
Calculated close_down targets for GS - 250d
Calculated close_down targets for INTC - 1d
Calculated close_down targets for INTC - 5d
Calculated close_down targets for INTC - 20d
Calculated close_down targets for INTC - 100d
Calculated close_down targets for INTC - 250d
Calculated close_down targets for JPM - 1d
Calculated close_down targets for JPM - 5d
Calculated close_down targets for 

##Calculate Reach Price Target

###Define Helper Function

In [None]:
import numpy as np
import pandas as pd

def calculate_future_high_low_reach(df, time_length_str):
    """
    Calculates the maximum high and minimum low percentage changes from the current day
    to a future day within a specified time window.

    Args:
        df (pd.DataFrame): DataFrame containing stock data with 'Close', 'High', and 'Low' columns.
                           The DataFrame must have a DatetimeIndex.
        time_length_str (str): String indicating the future time length (e.g., '1d', '5d').

    Returns:
        tuple: A tuple containing two Pandas Series:
               - max_high_percentage_change: Series of maximum future high percentage changes.
               - min_low_percentage_change: Series of minimum future low percentage changes.
    """
    # Extract number of days from time_length_str (e.g., '5d' -> 5)
    days_to_shift = int(time_length_str.replace('d', ''))

    # Calculate max_high_in_future
    # Shift 'High' prices to align with current day (future 'High')
    future_high_shifted = df['High'].shift(-1)
    # Apply rolling maximum over the shifted series for the specified window
    max_high_in_future_rolling = future_high_shifted.rolling(window=days_to_shift, min_periods=1).max()
    # Shift back to align the end of the future window with the current day
    max_high_in_future = max_high_in_future_rolling.shift(-(days_to_shift - 1))

    # Calculate min_low_in_future
    # Shift 'Low' prices to align with current day (future 'Low')
    future_low_shifted = df['Low'].shift(-1)
    # Apply rolling minimum over the shifted series for the specified window
    min_low_in_future_rolling = future_low_shifted.rolling(window=days_to_shift, min_periods=1).min()
    # Shift back to align the end of the future window with the current day
    min_low_in_future = min_low_in_future_rolling.shift(-(days_to_shift - 1))

    # Get current day's close price
    current_close = df['Close']

    # Calculate percentage change for max_high_in_future
    max_high_percentage_change = ((max_high_in_future - current_close) / current_close) * 100
    # Handle division by zero: if current_close is 0, percentage change is NaN
    max_high_percentage_change = max_high_percentage_change.mask(current_close == 0, np.nan)

    # Calculate percentage change for min_low_in_future
    min_low_percentage_change = ((min_low_in_future - current_close) / current_close) * 100
    # Handle division by zero: if current_close is 0, percentage change is NaN
    min_low_percentage_change = min_low_percentage_change.mask(current_close == 0, np.nan)

    return max_high_percentage_change, min_low_percentage_change

print("Helper function `calculate_future_high_low_reach` defined.")

Helper function `calculate_future_high_low_reach` defined.


###Calculate high_reach

In [None]:
for stock_ticker in stock_tickers:
    current_df_stock = df_stocks[stock_ticker].copy()
    current_df_stock['Date'] = pd.to_datetime(current_df_stock['Date'])
    current_df_stock.set_index('Date', inplace=True)

    for time_length in time_lengths:
        # Calculate future high and low percentage changes for the current stock and time length
        max_high_percentage_change, _ = calculate_future_high_low_reach(current_df_stock, time_length)

        for price_change_definition in price_change_definitions:
            threshold_percentage = parse_percentage_string(price_change_definition)

            # Create a Series for 'high_reach' targets
            # Set to 1 where condition is met, and keep NaN where original max_high_percentage_change was NaN
            target_series_high_reach = (max_high_percentage_change >= threshold_percentage).astype(float)
            target_series_high_reach = target_series_high_reach.mask(max_high_percentage_change.isna(), np.nan).astype(int, errors='ignore')

            # Store the calculated target series in the dictionary
            stock_price_targets_data[stock_ticker]['high_reach'][time_length][price_change_definition] = target_series_high_reach

        print(f"Calculated high_reach targets for {stock_ticker} - {time_length}")

print("All 'high_reach' target variables calculated and populated.")

Calculated high_reach targets for AMD - 1d
Calculated high_reach targets for AMD - 5d
Calculated high_reach targets for AMD - 20d
Calculated high_reach targets for AMD - 100d
Calculated high_reach targets for AMD - 250d
Calculated high_reach targets for GLD - 1d
Calculated high_reach targets for GLD - 5d
Calculated high_reach targets for GLD - 20d
Calculated high_reach targets for GLD - 100d
Calculated high_reach targets for GLD - 250d
Calculated high_reach targets for GS - 1d
Calculated high_reach targets for GS - 5d
Calculated high_reach targets for GS - 20d
Calculated high_reach targets for GS - 100d
Calculated high_reach targets for GS - 250d
Calculated high_reach targets for INTC - 1d
Calculated high_reach targets for INTC - 5d
Calculated high_reach targets for INTC - 20d
Calculated high_reach targets for INTC - 100d
Calculated high_reach targets for INTC - 250d
Calculated high_reach targets for JPM - 1d
Calculated high_reach targets for JPM - 5d
Calculated high_reach targets for 

###Calculate low_reach

In [None]:
for stock_ticker in stock_tickers:
    current_df_stock = df_stocks[stock_ticker].copy()
    current_df_stock['Date'] = pd.to_datetime(current_df_stock['Date'])
    current_df_stock.set_index('Date', inplace=True)

    for time_length in time_lengths:
        # Calculate future high and low percentage changes for the current stock and time length
        _, min_low_percentage_change = calculate_future_high_low_reach(current_df_stock, time_length)

        for price_change_definition in price_change_definitions:
            threshold_percentage = parse_percentage_string(price_change_definition)

            # Create a Series for 'low_reach' targets
            # Set to 1 where condition is met (decrease >= threshold), and keep NaN where original min_low_percentage_change was NaN
            target_series_low_reach = (min_low_percentage_change <= -threshold_percentage).astype(float)
            target_series_low_reach = target_series_low_reach.mask(min_low_percentage_change.isna(), np.nan).astype(int, errors='ignore')

            # Store the calculated target series in the dictionary
            stock_price_targets_data[stock_ticker]['low_reach'][time_length][price_change_definition] = target_series_low_reach

        print(f"Calculated low_reach targets for {stock_ticker} - {time_length}")

print("All 'low_reach' target variables calculated and populated.")

Calculated low_reach targets for AMD - 1d
Calculated low_reach targets for AMD - 5d
Calculated low_reach targets for AMD - 20d
Calculated low_reach targets for AMD - 100d
Calculated low_reach targets for AMD - 250d
Calculated low_reach targets for GLD - 1d
Calculated low_reach targets for GLD - 5d
Calculated low_reach targets for GLD - 20d
Calculated low_reach targets for GLD - 100d
Calculated low_reach targets for GLD - 250d
Calculated low_reach targets for GS - 1d
Calculated low_reach targets for GS - 5d
Calculated low_reach targets for GS - 20d
Calculated low_reach targets for GS - 100d
Calculated low_reach targets for GS - 250d
Calculated low_reach targets for INTC - 1d
Calculated low_reach targets for INTC - 5d
Calculated low_reach targets for INTC - 20d
Calculated low_reach targets for INTC - 100d
Calculated low_reach targets for INTC - 250d
Calculated low_reach targets for JPM - 1d
Calculated low_reach targets for JPM - 5d
Calculated low_reach targets for JPM - 20d
Calculated lo

#Volatility Target Set

##Volatility Target Initialize/ Set--Save/ Load/ Display

###Volatility Target Set Initialization (to 0)

In [None]:
stock_volatility_targets_data = {}

for stock_ticker in stock_tickers:
    stock_volatility_targets_data[stock_ticker] = {}
    # Extract the 'Date' column and convert to DatetimeIndex for the current stock
    stock_dates = pd.to_datetime(df_stocks[stock_ticker]['Date'])

    # Filter out '1d' from time_lengths for volatility targets, as it's not meaningful
    filtered_time_lengths = [tl for tl in time_lengths if tl != '1d']

    for time_length in filtered_time_lengths:
        # Create a Pandas Series filled with zeros, using the stock's dates as index
        zero_series = pd.Series(0, index=stock_dates, dtype=int)
        stock_volatility_targets_data[stock_ticker][time_length] = zero_series

print("stock_volatility_targets_data initialized successfully with two levels.")
# Verify the new structure by checking a sample for a valid time length (e.g., '5d')
print(f"Sample Series (AMD, 5d) head:\n{stock_volatility_targets_data['AMD']['5d'].head()}")
print(f"Sample Series (AMD, 5d) tail:\n{stock_volatility_targets_data['AMD']['5d'].tail()}")

stock_volatility_targets_data initialized successfully with two levels.
Sample Series (AMD, 5d) head:
Date
1980-03-17    0
1980-03-18    0
1980-03-19    0
1980-03-20    0
1980-03-21    0
dtype: int64
Sample Series (AMD, 5d) tail:
Date
2026-02-06    0
2026-02-09    0
2026-02-10    0
2026-02-11    0
NaT           0
dtype: int64


###Save Volatility Target Set

In [None]:
import pickle

# Define the path to save the volatility dictionary
save_path_volatility = '/content/drive/MyDrive/Quant Trading/Stock Price/stock_volatility_targets_data.pkl'

# Save the stock_volatility_targets_data dictionary using pickle
with open(save_path_volatility, 'wb') as f:
    pickle.dump(stock_volatility_targets_data, f)

print(f"stock_volatility_targets_data dictionary successfully saved to: {save_path_volatility}")

stock_volatility_targets_data dictionary successfully saved to: /content/drive/MyDrive/Quant Trading/Stock Price/stock_volatility_targets_data.pkl


###Load Volatility Target Set

In [None]:
import pickle

# Define the path to load the volatility dictionary
load_path_volatility = '/content/drive/MyDrive/Quant Trading/Stock Price/stock_volatility_targets_data.pkl'

# Load the stock_volatility_targets_data dictionary
with open(load_path_volatility, 'rb') as f:
    stock_volatility_targets_data = pickle.load(f)

print(f"stock_volatility_targets_data dictionary successfully loaded from: {load_path_volatility}")

stock_volatility_targets_data dictionary successfully loaded from: /content/drive/MyDrive/Quant Trading/Stock Price/stock_volatility_targets_data.pkl


###Display Volatility Target Set

In [None]:
import random
import pandas as pd

# Set the specific stock_ticker, time_length, and date for display
selected_stock_ticker = 'AMD'
selected_time_length = '5d' # Changed to a valid time length, excluding '1d'
selected_date = pd.to_datetime('2025-01-01') # You can change any of them

print(f"Displaying volatility sample for: ")
print(f"  Stock Ticker: {selected_stock_ticker}")
print(f"  Time Length: {selected_time_length}")
print(f"  Date: {selected_date}")

# Access the corresponding Pandas Series from stock_volatility_targets_data
sample_series = stock_volatility_targets_data[selected_stock_ticker][selected_time_length]

# Ensure the index is a DatetimeIndex for date-based operations
if not isinstance(sample_series.index, pd.DatetimeIndex):
    sample_series.index = pd.to_datetime(sample_series.index)

# Find the closest date to selected_date in the series index
diffs = abs(sample_series.index - selected_date)
closest_date_idx = diffs.argmin()
closest_date = sample_series.index[closest_date_idx]

print(f"  Closest date found in series index: {closest_date}")

# Determine the start and end indices for the sample (e.g., 5 days before, selected date, 5 days after)
start_idx = max(0, closest_date_idx - 5)
end_idx = min(len(sample_series) - 1, closest_date_idx + 5)

# Get the subset of dates being displayed
displayed_dates = sample_series.iloc[start_idx : end_idx + 1].index

# Get the original stock DataFrame for the selected ticker
original_stock_df = df_stocks[selected_stock_ticker].copy()
original_stock_df['Date'] = pd.to_datetime(original_stock_df['Date'])
original_stock_df.set_index('Date', inplace=True)

# Filter the original stock data for the displayed dates and get 'Close', 'High', and 'Low' prices
original_stock_data_subset = original_stock_df.loc[displayed_dates, ['Close', 'High', 'Low']]

# Combine the volatility series and original stock data into a single DataFrame for display
combined_display_df = pd.DataFrame({
    'Volatility Value': sample_series.iloc[start_idx : end_idx + 1]
}).merge(original_stock_data_subset, left_index=True, right_index=True, how='left')

print(f"\nCombined Sample Volatility Data around {selected_date.year}:")
display(combined_display_df)

Displaying volatility sample for: 
  Stock Ticker: AMD
  Time Length: 5d
  Date: 2025-01-01 00:00:00
  Closest date found in series index: 2024-12-31 00:00:00

Combined Sample Volatility Data around 2025:


Unnamed: 0_level_0,Volatility Value,Close,High,Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-12-23,2.909462,124.599998,126.339996,120.470001
2024-12-24,2.926739,126.290001,127.510002,124.660004
2024-12-26,2.504274,125.059998,127.300003,125.050003
2024-12-27,2.065603,125.190002,126.18,122.260002
2024-12-30,2.541828,122.440002,124.099998,122.349998
2024-12-31,1.381048,120.790001,123.550003,120.139999
2025-01-02,0.942814,120.629997,123.139999,119.440002
2025-01-03,2.326199,125.370003,125.559998,121.419998
2025-01-06,2.741308,129.550003,130.729996,127.360001
2025-01-07,2.616774,127.330002,131.710007,126.849998


##Calculate Volatility Set

In [None]:
import numpy as np

def calculate_volatility_target(df, time_length_str):
    """
    Calculates the rolling volatility (standard deviation of log returns) for a given time length.

    Args:
        df (pd.DataFrame): DataFrame containing stock data with a 'Close' column.
                           The DataFrame must have a DatetimeIndex.
        time_length_str (str): String indicating the time window for rolling calculation (e.g., '1d', '5d').

    Returns:
        pd.Series: Series of rolling volatility values.
    """
    # Extract number of days from time_length_str (e.g., '5d' -> 5)
    days_to_window = int(time_length_str.replace('d', ''))

    # Calculate daily log returns
    # Add a small epsilon to avoid log(0) if Close price is zero.
    log_returns = np.log(df['Close'] / df['Close'].shift(1) + 1e-9)

    # Calculate rolling standard deviation of log returns
    # The standard deviation is often annualized for volatility, but here we just need the raw rolling std
    # Adjust min_periods if you want to ensure a certain number of data points are present
    rolling_volatility = log_returns.rolling(window=days_to_window, min_periods=1).std() * 100 # Multiply by 100 for percentage

    return rolling_volatility

print("Helper function `calculate_volatility_target` defined.")

Helper function `calculate_volatility_target` defined.


In [None]:
for stock_ticker in stock_tickers:
    current_df_stock = df_stocks[stock_ticker].copy()
    current_df_stock['Date'] = pd.to_datetime(current_df_stock['Date'])
    current_df_stock.set_index('Date', inplace=True)

    # Use the filtered_time_lengths, which excludes '1d'
    for time_length in filtered_time_lengths:
        # Calculate volatility for the current stock and time length
        rolling_volatility = calculate_volatility_target(current_df_stock, time_length)

        # Store the calculated continuous volatility series in the dictionary
        stock_volatility_targets_data[stock_ticker][time_length] = rolling_volatility

        print(f"Calculated volatility targets for {stock_ticker} - {time_length}")

print("All 'volatility' target variables calculated and populated.")

Calculated volatility targets for AMD - 5d
Calculated volatility targets for AMD - 20d
Calculated volatility targets for AMD - 100d
Calculated volatility targets for AMD - 250d
Calculated volatility targets for GLD - 5d
Calculated volatility targets for GLD - 20d
Calculated volatility targets for GLD - 100d
Calculated volatility targets for GLD - 250d
Calculated volatility targets for GS - 5d
Calculated volatility targets for GS - 20d
Calculated volatility targets for GS - 100d
Calculated volatility targets for GS - 250d
Calculated volatility targets for INTC - 5d
Calculated volatility targets for INTC - 20d
Calculated volatility targets for INTC - 100d
Calculated volatility targets for INTC - 250d
Calculated volatility targets for JPM - 5d
Calculated volatility targets for JPM - 20d
Calculated volatility targets for JPM - 100d
Calculated volatility targets for JPM - 250d
Calculated volatility targets for META - 5d
Calculated volatility targets for META - 20d
Calculated volatility targ