#Prepare

### Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Import Original Stock Price

The following stock prices are up to February 10, 2026

In [None]:
import pandas as pd

# 1. Define a list named stock_symbols
stock_symbols = ['AMD', 'GLD', 'GS', 'INTC', 'JPM', 'META', 'MSFT', 'MU', 'NVDA', 'RXRX', 'TSLA']

# 2. Define a string variable base_path
base_path = '/content/drive/MyDrive/Quant Trading/Stock Price/'

# 3. Initialize an empty dictionary named stock_data
stock_data = {}

# 4. Loop through each symbol in the stock_symbols list
for symbol in stock_symbols:
    try:
        # a. Construct the full file path for the current stock's CSV file
        file_path = f'{base_path}{symbol}.csv'

        # b. Read the CSV file into a Pandas DataFrame
        df = pd.read_csv(file_path)

        # c. Store the DataFrame in the stock_data dictionary
        stock_data[symbol] = df
        print(f"Successfully loaded {symbol}.csv")
    except FileNotFoundError:
        print(f"Error: {symbol}.csv not found at {file_path}")
    except Exception as e:
        print(f"Error loading {symbol}.csv: {e}")

# 5. Print the keys of the stock_data dictionary to confirm
print("\nLoaded stock symbols:")
print(stock_data.keys())

Successfully loaded AMD.csv
Successfully loaded GLD.csv
Successfully loaded GS.csv
Successfully loaded INTC.csv
Successfully loaded JPM.csv
Successfully loaded META.csv
Successfully loaded MSFT.csv
Successfully loaded MU.csv
Successfully loaded NVDA.csv
Successfully loaded RXRX.csv
Successfully loaded TSLA.csv

Loaded stock symbols:
dict_keys(['AMD', 'GLD', 'GS', 'INTC', 'JPM', 'META', 'MSFT', 'MU', 'NVDA', 'RXRX', 'TSLA'])


# Save/ Load/ Display 'stock_data'

### Save 'stock_data'

In [None]:
import pickle
import os

# Define the full path to save the file
save_path = os.path.join(base_path, 'all_stock_data.pkl')

# Use pickle to save the stock_data dictionary to a file
try:
    with open(save_path, 'wb') as f:
        pickle.dump(stock_data, f)
    print(f"'stock_data' dictionary successfully saved to: {save_path}")
except Exception as e:
    print(f"Error saving 'stock_data' dictionary: {e}")

'stock_data' dictionary successfully saved to: /content/drive/MyDrive/Quant Trading/Stock Price/all_stock_data.pkl


### Load 'stock_data'

In [None]:
import pickle
import os

# Define the base path (originally from cell af55b064)
base_path = '/content/drive/MyDrive/Quant Trading/Stock Price/'

# Define the full path to load the file
load_path = os.path.join(base_path, 'all_stock_data.pkl')

# Load the 'stock_data' dictionary from the file
try:
    with open(load_path, 'rb') as f:
        loaded_stock_data = pickle.load(f)
    print(f"'stock_data' dictionary successfully loaded from: {load_path}")
    print(f"Loaded stock symbols: {loaded_stock_data.keys()}")

    # Assign the loaded data back to the stock_data variable for continued use
    stock_data = loaded_stock_data

    print("\n--- Sample DataFrame (AMD from loaded data) ---")
    display(stock_data['AMD'].head(10))

except FileNotFoundError:
    print(f"Error: File not found at {load_path}")
except Exception as e:
    print(f"Error loading 'stock_data' dictionary: {e}")

'stock_data' dictionary successfully loaded from: /content/drive/MyDrive/Quant Trading/Stock Price/all_stock_data.pkl
Loaded stock symbols: dict_keys(['AMD', 'GLD', 'GS', 'INTC', 'JPM', 'META', 'MSFT', 'MU', 'NVDA', 'RXRX', 'TSLA'])

--- Sample DataFrame (AMD from loaded data) ---


Unnamed: 0,Date,Close,High,Low,Open,Volume,symbol,EMA_5,EMA_10,EMA_20,...,K_5,D_5,K_14,D_14,MFI_5,MFI_14,MFI_21,BB_Middle_20,BB_Upper_20,BB_Lower_20
0,1980-03-17,3.145833,3.302083,3.125,3.125,219600.0,AMD,3.145833,3.145833,3.145833,...,,,,,0.0,0.0,0.0,,,
1,1980-03-18,3.03125,3.125,2.9375,3.125,727200.0,AMD,3.107639,3.125,3.13492,...,,,,,0.0,0.0,0.0,,,
2,1980-03-19,3.041667,3.083333,3.020833,3.03125,295200.0,AMD,3.085648,3.109848,3.126039,...,,,,,28.990665,28.990665,28.990665,,,
3,1980-03-20,3.010417,3.0625,3.010417,3.041666984558105,159600.0,AMD,3.060571,3.09177,3.115027,...,,,,,25.085655,25.085655,25.085655,,,
4,1980-03-21,2.916667,3.020833,2.90625,3.010416984558105,130800.0,AMD,3.012603,3.059933,3.096136,...,2.631661,,,,22.651101,22.651101,22.651101,,,
5,1980-03-24,2.666667,2.916667,2.635417,2.916666984558105,436800.0,AMD,2.897291,2.98843,3.055234,...,6.382983,,,,17.408011,17.408011,17.408011,,,
6,1980-03-25,2.604167,2.75,2.552083,2.666666984558105,645600.0,AMD,2.799583,2.918564,3.012275,...,9.804041,6.272895,,,19.283901,13.097476,13.097476,,,
7,1980-03-26,2.447917,2.604167,2.427083,2.604166984558105,466800.0,AMD,2.682361,2.832992,2.958527,...,3.278787,6.488604,,,0.0,11.200471,11.200471,,,
8,1980-03-27,2.375,2.375,2.239583,2.375,1129200.0,AMD,2.579907,2.749721,2.902953,...,17.333374,10.138734,,,0.0,8.43771,8.43771,,,
9,1980-03-28,2.541667,2.583333,2.458333,2.4583330154418945,666000.0,AMD,2.567161,2.711893,2.868545,...,44.615437,21.742533,,,20.098582,20.919794,20.919794,,,


### Display All Feature Names

In [None]:
import pandas as pd

# Take one sample DataFrame (e.g., AMD) to inspect its columns
sample_df = stock_data['AMD'].copy()

# Define a list of original columns that are not engineered features
original_cols = ['Date', 'Close', 'High', 'Low', 'Open', 'Volume', 'Adj Close']

# Filter out original columns to show only the engineered features
engineered_features = [col for col in sample_df.columns if col not in original_cols]

print("Current Engineered Feature Names:")
for feature in engineered_features:
    print(feature)

print(f"\nTotal Engineered Features: {len(engineered_features)}")
print(f"Total Columns in a DataFrame: {len(sample_df.columns)}")


Current Engineered Feature Names:
symbol
EMA_5
EMA_10
EMA_20
EMA_50
SMA_50
SMA_100
SMA_200
RSI_5
RSI_14
RSI_21
MACD
Signal_Line
MACD_Histogram
ATR_5
ATR_14
ATR_21
Volume_SMA_5
Volume_SMA_20
Volume_SMA_60
OBV
OBV_Z_Score_20
OBV_Z_Score_50
K_5
D_5
K_14
D_14
MFI_5
MFI_14
MFI_21
BB_Middle_20
BB_Upper_20
BB_Lower_20

Total Engineered Features: 33
Total Columns in a DataFrame: 39


### Display Sample Data for Selected Stocks for Convinience of Checking

In [None]:
import pandas as pd

# Select three sample stock symbols for display
sample_symbols = ['AMD', 'GLD', 'MSFT']

print("--- Sample Data for Selected Stocks (First 5 rows, after initial NaNs) ---\n")

for symbol in sample_symbols:
    if symbol in stock_data:
        print(f"Displaying data for {symbol}:")
        df = stock_data[symbol].copy()

        # Drop rows where any of the EMA or SMA columns are NaN to get valid data points
        # This ensures we see data where indicators are fully calculated
        ma_cols = [col for col in df.columns if col.startswith('EMA_') or col.startswith('SMA_')]
        df_clean = df.dropna(subset=ma_cols)

        # Display the first 5 rows of the cleaned DataFrame (which will be later in time)
        display(df_clean.head(5))
        print("\n" + "-" * 50 + "\n") # Separator for readability
    else:
        print(f"Error: {symbol} not found in stock_data dictionary.")


--- Sample Data for Selected Stocks (First 5 rows, after initial NaNs) ---

Displaying data for AMD:


Unnamed: 0,Date,Close,High,Low,Open,Volume,symbol,EMA_5,EMA_10,EMA_20,...,K_5,D_5,K_14,D_14,MFI_5,MFI_14,MFI_21,BB_Middle_20,BB_Upper_20,BB_Lower_20
199,1980-12-30,5.6875,5.729167,5.5,5.5,259800.0,AMD,5.555735,5.560325,5.704131,...,87.179448,82.94454,58.730135,52.910045,89.287229,42.114079,42.044766,5.764583,6.753013,4.776154
200,1980-12-31,5.75,5.8125,5.666667,5.6875,187200.0,AMD,5.62049,5.594812,5.708499,...,85.714286,79.059816,76.92307,59.280238,82.645809,46.315076,40.553128,5.717708,6.605664,4.829753
201,1981-01-02,5.875,6.041667,5.75,5.75,558000.0,AMD,5.705326,5.645755,5.724357,...,73.333282,82.075672,85.185157,73.612787,82.877576,59.459611,46.414021,5.684375,6.488158,4.880592
202,1981-01-05,5.4375,5.979167,5.4375,5.875,477000.0,AMD,5.616051,5.60789,5.697037,...,3.333282,54.12695,46.296268,69.468165,58.803912,52.034514,45.598409,5.622917,6.286134,4.959699
203,1981-01-06,5.3125,5.458333,5.0625,5.4375,1014000.0,AMD,5.514867,5.554183,5.660414,...,25.531907,34.066157,35.185157,55.555527,42.042628,44.050379,40.842918,5.576042,6.182759,4.969324



--------------------------------------------------

Displaying data for GLD:


Unnamed: 0,Date,Close,High,Low,Open,Volume,symbol,EMA_5,EMA_10,EMA_20,...,K_5,D_5,K_14,D_14,MFI_5,MFI_14,MFI_21,BB_Middle_20,BB_Upper_20,BB_Lower_20
199,2005-09-02,44.25,44.360001,44.09,44.2400016784668,1414900.0,GLD,43.876449,43.772679,43.658626,...,90.565943,76.849878,81.818182,64.273983,78.587479,61.103524,58.009915,43.807,44.649456,42.964544
200,2005-09-06,44.27,44.419998,44.189999,44.41999816894531,819700.0,GLD,44.007633,43.863101,43.716852,...,90.683353,89.976174,90.683353,84.20508,76.859192,59.019161,61.149954,43.853,44.690845,43.015155
201,2005-09-07,44.330002,44.490002,44.290001,44.40999984741211,1300700.0,GLD,44.115089,43.947992,43.775247,...,89.041128,90.096808,90.476201,87.659246,0.0,67.856791,64.899481,43.9025,44.729643,43.075358
202,2005-09-08,44.599998,44.779999,44.470001,44.720001220703125,1061200.0,GLD,44.276726,44.066539,43.853795,...,80.645135,86.789872,90.862917,90.674157,0.0,73.472616,68.781096,43.95,44.823879,43.076121
203,2005-09-09,44.84,44.869999,44.66,44.66999816894531,1393900.0,GLD,44.464484,44.207168,43.947719,...,96.153997,88.61342,98.543747,93.294288,0.0,78.322238,69.494592,43.9695,44.90555,43.03345



--------------------------------------------------

Displaying data for MSFT:


Unnamed: 0,Date,Close,High,Low,Open,Volume,symbol,EMA_5,EMA_10,EMA_20,...,K_5,D_5,K_14,D_14,MFI_5,MFI_14,MFI_21,BB_Middle_20,BB_Upper_20,BB_Lower_20
199,1986-12-24,0.104726,0.104991,0.104461,0.1049909023289533,7027200.0,MSFT,0.103572,0.102678,0.101122,...,95.455059,93.357282,97.059058,91.613091,0.0,76.443803,66.402279,0.102843,0.107648,0.098039
200,1986-12-26,0.104461,0.105521,0.104461,0.1047257136127911,3715200.0,MSFT,0.103868,0.103002,0.10144,...,77.777896,91.077652,88.88964,93.464722,0.0,69.758797,61.839043,0.102711,0.107152,0.09827
201,1986-12-29,0.100219,0.105521,0.100219,0.1044606696872755,41702400.0,MSFT,0.102652,0.102496,0.101323,...,0.0,57.744318,44.448167,76.798955,56.473729,66.419151,61.773826,0.102446,0.106813,0.098078
202,1986-12-30,0.101279,0.10181,0.099158,0.1002188500865282,25401600.0,MSFT,0.102194,0.102275,0.101319,...,33.339675,37.03919,55.558187,62.965331,34.66583,64.945586,63.344583,0.10234,0.106712,0.097967
203,1986-12-31,0.10234,0.10393,0.101279,0.1012792912721912,23356800.0,MSFT,0.102243,0.102286,0.101416,...,50.004574,27.781417,66.668519,55.558291,34.066737,64.568044,68.420749,0.10226,0.106568,0.097952



--------------------------------------------------



#Feature Calculation

### Calculate 5,10,20,50-day EMA and 50,100,200-day SMA

In [None]:
import pandas as pd

# Define the EMA periods to calculate
ema_periods = [5, 10, 20, 50]

# Define the SMA periods to calculate
sma_periods = [50, 100, 200]

# Loop through each stock in the stock_data dictionary
for symbol, df in stock_data.items():
    # Ensure 'Close' column is numeric
    df['Close'] = pd.to_numeric(df['Close'], errors='coerce')

    # Sort by date to ensure correct rolling calculations (if not already sorted)
    # This is crucial for time-series calculations like moving averages.
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df = df.sort_values(by='Date').reset_index(drop=True)

    # Calculate EMAs
    for period in ema_periods:
        df[f'EMA_{period}'] = df['Close'].ewm(span=period, adjust=False).mean()

    # Calculate SMAs
    for period in sma_periods:
        df[f'SMA_{period}'] = df['Close'].rolling(window=period).mean()

    # Update the DataFrame in the dictionary
    stock_data[symbol] = df
    print(f"Successfully calculated EMAs and SMAs for {symbol}.")

print("\n--- Sample DataFrame (AMD with new MA columns) ---")
display(stock_data['AMD'].head(10)) # Display first 10 rows to see more MA values


Successfully calculated EMAs and SMAs for AMD.
Successfully calculated EMAs and SMAs for GLD.
Successfully calculated EMAs and SMAs for GS.
Successfully calculated EMAs and SMAs for INTC.
Successfully calculated EMAs and SMAs for JPM.
Successfully calculated EMAs and SMAs for META.
Successfully calculated EMAs and SMAs for MSFT.
Successfully calculated EMAs and SMAs for MU.
Successfully calculated EMAs and SMAs for NVDA.
Successfully calculated EMAs and SMAs for RXRX.
Successfully calculated EMAs and SMAs for TSLA.

--- Sample DataFrame (AMD with new MA columns) ---


Unnamed: 0,Date,Close,High,Low,Open,Volume,symbol,EMA_5,EMA_10,EMA_20,EMA_50,SMA_50,SMA_100,SMA_200
0,1980-03-17,3.145833,3.3020830154418945,3.125,3.125,219600,AMD,3.145833,3.145833,3.145833,3.145833,,,
1,1980-03-18,3.03125,3.125,2.9375,3.125,727200,AMD,3.107639,3.125,3.13492,3.14134,,,
2,1980-03-19,3.041667,3.0833330154418945,3.0208330154418945,3.03125,295200,AMD,3.085648,3.109848,3.126039,3.137431,,,
3,1980-03-20,3.010417,3.0625,3.010416984558105,3.041666984558105,159600,AMD,3.060571,3.09177,3.115027,3.13245,,,
4,1980-03-21,2.916667,3.0208330154418945,2.90625,3.010416984558105,130800,AMD,3.012603,3.059933,3.096136,3.123988,,,
5,1980-03-24,2.666667,2.916666984558105,2.635416984558105,2.916666984558105,436800,AMD,2.897291,2.98843,3.055234,3.106054,,,
6,1980-03-25,2.604167,2.75,2.5520830154418945,2.666666984558105,645600,AMD,2.799583,2.918564,3.012275,3.086372,,,
7,1980-03-26,2.447917,2.604166984558105,2.4270830154418945,2.604166984558105,466800,AMD,2.682361,2.832992,2.958527,3.061334,,,
8,1980-03-27,2.375,2.375,2.2395830154418945,2.375,1129200,AMD,2.579907,2.749721,2.902953,3.034419,,,
9,1980-03-28,2.541667,2.5833330154418945,2.4583330154418945,2.4583330154418945,666000,AMD,2.567161,2.711893,2.868545,3.015096,,,


### Calculate 5-day, 14-day, and 21-day RSI

In [None]:
import pandas as pd

# Define the RSI periods to calculate
rsi_periods = [5, 14, 21]

# Loop through each stock in the stock_data dictionary
for symbol, df in stock_data.items():
    # Ensure 'Close' column is numeric
    df['Close'] = pd.to_numeric(df['Close'], errors='coerce')

    # Ensure 'Date' column is datetime and sorted for correct time-series calculations
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df = df.sort_values(by='Date').reset_index(drop=True)

    # Calculate daily price changes
    delta = df['Close'].diff()

    # Calculate gains and losses
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)

    for period in rsi_periods:
        # Calculate exponential moving average of gains and losses
        avg_gain = gain.ewm(com=period-1, adjust=False).mean()
        avg_loss = loss.ewm(com=period-1, adjust=False).mean()

        # Calculate Relative Strength (RS)
        rs = avg_gain / avg_loss

        # Calculate RSI
        df[f'RSI_{period}'] = 100 - (100 / (1 + rs))

    # Update the DataFrame in the dictionary
    stock_data[symbol] = df
    print(f"Successfully calculated RSIs for {symbol}.")

print("\n--- Sample DataFrame (AMD with new RSI columns, after initial NaNs) ---")
# Display a sample with cleaned NaNs to see valid RSI values
sample_df = stock_data['AMD'].copy()
ma_rsi_cols = [col for col in sample_df.columns if col.startswith('EMA_') or col.startswith('SMA_') or col.startswith('RSI_')]
sample_df_clean = sample_df.dropna(subset=ma_rsi_cols)
display(sample_df_clean.head(10))


Successfully calculated RSIs for AMD.
Successfully calculated RSIs for GLD.
Successfully calculated RSIs for GS.
Successfully calculated RSIs for INTC.
Successfully calculated RSIs for JPM.
Successfully calculated RSIs for META.
Successfully calculated RSIs for MSFT.
Successfully calculated RSIs for MU.
Successfully calculated RSIs for NVDA.
Successfully calculated RSIs for RXRX.
Successfully calculated RSIs for TSLA.

--- Sample DataFrame (AMD with new RSI columns, after initial NaNs) ---


Unnamed: 0,Date,Close,High,Low,Open,Volume,symbol,EMA_5,EMA_10,EMA_20,EMA_50,SMA_50,SMA_100,SMA_200,RSI_5,RSI_14,RSI_21
199,1980-12-30,5.6875,5.7291669845581055,5.5,5.5,259800,AMD,5.555735,5.560325,5.704131,5.680604,5.782708,5.26125,4.139375,58.629825,48.851518,49.449498
200,1980-12-31,5.75,5.8125,5.6666669845581055,5.6875,187200,AMD,5.62049,5.594812,5.708499,5.683326,5.795833,5.276563,4.152396,61.617229,49.979903,50.190847
201,1981-01-02,5.875,6.0416669845581055,5.75,5.75,558000,AMD,5.705326,5.645755,5.724357,5.690842,5.81125,5.292604,4.166615,67.486799,52.248843,51.679015
202,1981-01-05,5.4375,5.9791669845581055,5.4375,5.875,477000,AMD,5.616051,5.60789,5.697037,5.680907,5.817708,5.302813,4.178594,40.434651,44.619969,46.566084
203,1981-01-06,5.3125,5.458333015441895,5.0625,5.4375,1014000,AMD,5.514867,5.554183,5.660414,5.66646,5.820208,5.312396,4.190104,35.370914,42.701547,45.223801
204,1981-01-07,5.1875,5.208333015441895,4.708333015441895,5.208333015441895,1091400,AMD,5.405745,5.487513,5.615375,5.647677,5.822917,5.321563,4.201458,30.583369,40.811877,43.895241
205,1981-01-08,4.9375,5.1666669845581055,4.8541669845581055,5.1666669845581055,621000,AMD,5.249663,5.387511,5.550815,5.619827,5.816667,5.327083,4.212813,22.851011,37.260433,41.344585
206,1981-01-09,4.541667,4.9791669845581055,4.5416669845581055,4.9375,1116000,AMD,5.013664,5.233721,5.454706,5.577546,5.809167,5.329167,4.2225,15.230047,32.446086,37.702378
207,1981-01-12,4.666667,4.7916669845581055,4.6041669845581055,4.6041669845581055,1473000,AMD,4.897999,5.13062,5.379655,5.541825,5.8025,5.334792,4.233594,25.091554,35.289543,39.470453
208,1981-01-13,4.583333,4.6666669845581055,4.583333015441895,4.6666669845581055,506400,AMD,4.79311,5.031114,5.303814,5.504238,5.793333,5.341146,4.244635,22.87404,34.254379,38.701572


### Calculate MACD, Signal Line, and MACD Histogram


MACD line is based on 12-day EMA - 26-day EMA here.\
Signal Line is based on 9-day EMA of MACD.\
MACD Histogram = MACD - Signal.

In [None]:
import pandas as pd

# Define MACD periods
ema_fast_period = 12
ema_slow_period = 26
signal_period = 9

# Loop through each stock in the stock_data dictionary
for symbol, df in stock_data.items():
    # Ensure 'Close' column is numeric
    df['Close'] = pd.to_numeric(df['Close'], errors='coerce')

    # Ensure 'Date' column is datetime and sorted for correct time-series calculations
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df = df.sort_values(by='Date').reset_index(drop=True)

    # Calculate Fast EMA (12-period EMA)
    df['EMA_Fast'] = df['Close'].ewm(span=ema_fast_period, adjust=False).mean()

    # Calculate Slow EMA (26-period EMA)
    df['EMA_Slow'] = df['Close'].ewm(span=ema_slow_period, adjust=False).mean()

    # Calculate MACD Line
    df['MACD'] = df['EMA_Fast'] - df['EMA_Slow']

    # Calculate Signal Line (9-period EMA of MACD Line)
    df['Signal_Line'] = df['MACD'].ewm(span=signal_period, adjust=False).mean()

    # Calculate MACD Histogram
    df['MACD_Histogram'] = df['MACD'] - df['Signal_Line']

    # Drop temporary EMA columns if desired (optional)
    df.drop(columns=['EMA_Fast', 'EMA_Slow'], inplace=True, errors='ignore')

    # Update the DataFrame in the dictionary
    stock_data[symbol] = df
    print(f"Successfully calculated MACD indicators for {symbol}.")

print("\n--- Sample DataFrame (AMD with new MACD columns, after initial NaNs) ---")
# Display a sample with cleaned NaNs to see valid MACD values
sample_df = stock_data['AMD'].copy()
macd_cols = [col for col in sample_df.columns if col.startswith('MACD') or col.startswith('Signal_Line')]
sample_df_clean = sample_df.dropna(subset=macd_cols)
display(sample_df_clean.head(10))


Successfully calculated MACD indicators for AMD.
Successfully calculated MACD indicators for GLD.
Successfully calculated MACD indicators for GS.
Successfully calculated MACD indicators for INTC.
Successfully calculated MACD indicators for JPM.
Successfully calculated MACD indicators for META.
Successfully calculated MACD indicators for MSFT.
Successfully calculated MACD indicators for MU.
Successfully calculated MACD indicators for NVDA.
Successfully calculated MACD indicators for RXRX.
Successfully calculated MACD indicators for TSLA.

--- Sample DataFrame (AMD with new MACD columns, after initial NaNs) ---


Unnamed: 0,Date,Close,High,Low,Open,Volume,symbol,EMA_5,EMA_10,EMA_20,EMA_50,SMA_50,SMA_100,SMA_200,RSI_5,RSI_14,RSI_21,MACD,Signal_Line,MACD_Histogram
0,1980-03-17,3.145833,3.3020830154418945,3.125,3.125,219600,AMD,3.145833,3.145833,3.145833,3.145833,,,,,,,0.0,0.0,0.0
1,1980-03-18,3.03125,3.125,2.9375,3.125,727200,AMD,3.107639,3.125,3.13492,3.14134,,,,0.0,0.0,0.0,-0.009141,-0.001828,-0.007312
2,1980-03-19,3.041667,3.0833330154418945,3.0208330154418945,3.03125,295200,AMD,3.085648,3.109848,3.126039,3.137431,,,,10.204387,8.917468,8.713958,-0.015367,-0.004536,-0.010831
3,1980-03-20,3.010417,3.0625,3.010416984558105,3.041666984558105,159600,AMD,3.060571,3.09177,3.115027,3.13245,,,,7.380296,6.922995,6.837258,-0.022563,-0.008141,-0.014421
4,1980-03-21,2.916667,3.0208330154418945,2.90625,3.010416984558105,130800,AMD,3.012603,3.059933,3.096136,3.123988,,,,3.621658,4.018942,4.073661,-0.035422,-0.013597,-0.021825
5,1980-03-24,2.666667,2.916666984558105,2.635416984558105,2.916666984558105,436800,AMD,2.897291,2.98843,3.055234,3.106054,,,,1.342548,1.822932,1.910946,-0.065036,-0.023885,-0.041151
6,1980-03-25,2.604167,2.75,2.5520830154418945,2.666666984558105,645600,AMD,2.799583,2.918564,3.012275,3.086372,,,,1.121916,1.58915,1.677208,-0.092483,-0.037605,-0.054878
7,1980-03-26,2.447917,2.604166984558105,2.4270830154418945,2.604166984558105,466800,AMD,2.682361,2.832992,2.958527,3.061334,,,,0.741245,1.181282,1.269574,-0.125397,-0.055163,-0.070234
8,1980-03-27,2.375,2.375,2.2395830154418945,2.375,1129200,AMD,2.579907,2.749721,2.902953,3.034419,,,,0.618772,1.046319,1.134469,-0.155572,-0.075245,-0.080327
9,1980-03-28,2.541667,2.5833330154418945,2.4583330154418945,2.4583330154418945,666000,AMD,2.567161,2.711893,2.868545,3.015096,,,,32.488889,22.766769,21.247945,-0.164146,-0.093025,-0.071121


### Calculate 5-day, 14-day, and 21-day Average True Range (ATR)



Using EMA for smoothing here.

In [None]:
import pandas as pd
import numpy as np

# Define the ATR periods to calculate
atr_periods = [5, 14, 21]

# Loop through each stock in the stock_data dictionary
for symbol, df in stock_data.items():
    # Ensure relevant columns are numeric and Date is datetime
    df['Close'] = pd.to_numeric(df['Close'], errors='coerce')
    df['High'] = pd.to_numeric(df['High'], errors='coerce')
    df['Low'] = pd.to_numeric(df['Low'], errors='coerce')

    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df = df.sort_values(by='Date').reset_index(drop=True)

    # Calculate True Range (TR)
    # High - Low
    high_low = df['High'] - df['Low']
    # High - Previous Close (absolute value)
    high_prev_close = np.abs(df['High'] - df['Close'].shift(1))
    # Low - Previous Close (absolute value)
    low_prev_close = np.abs(df['Low'] - df['Close'].shift(1))

    # True Range is the maximum of the three
    df['TR'] = high_low.combine(high_prev_close, max).combine(low_prev_close, max)

    # Calculate ATR for each defined period using EMA of TR
    for period in atr_periods:
        df[f'ATR_{period}'] = df['TR'].ewm(span=period, adjust=False).mean()

    # Drop the temporary TR column if desired
    df.drop(columns=['TR'], inplace=True, errors='ignore')

    # Update the DataFrame in the dictionary
    stock_data[symbol] = df
    print(f"Successfully calculated ATR indicators for {symbol}.")

print("\n--- Sample DataFrame (AMD with new ATR columns, after initial NaNs) ---")
# Display a sample with cleaned NaNs to see valid ATR values
sample_df = stock_data['AMD'].copy()
atr_cols = [col for col in sample_df.columns if col.startswith('ATR_')]
sample_df_clean = sample_df.dropna(subset=atr_cols)
display(sample_df_clean.head(10))


Successfully calculated ATR indicators for AMD.
Successfully calculated ATR indicators for GLD.
Successfully calculated ATR indicators for GS.
Successfully calculated ATR indicators for INTC.
Successfully calculated ATR indicators for JPM.
Successfully calculated ATR indicators for META.
Successfully calculated ATR indicators for MSFT.
Successfully calculated ATR indicators for MU.
Successfully calculated ATR indicators for NVDA.
Successfully calculated ATR indicators for RXRX.
Successfully calculated ATR indicators for TSLA.

--- Sample DataFrame (AMD with new ATR columns, after initial NaNs) ---


Unnamed: 0,Date,Close,High,Low,Open,Volume,symbol,EMA_5,EMA_10,EMA_20,...,SMA_200,RSI_5,RSI_14,RSI_21,MACD,Signal_Line,MACD_Histogram,ATR_5,ATR_14,ATR_21
0,1980-03-17,3.145833,3.302083,3.125,3.125,219600,AMD,3.145833,3.145833,3.145833,...,,,,,0.0,0.0,0.0,0.177083,0.177083,0.177083
1,1980-03-18,3.03125,3.125,2.9375,3.125,727200,AMD,3.107639,3.125,3.13492,...,,0.0,0.0,0.0,-0.009141,-0.001828,-0.007312,0.1875,0.18125,0.179924
2,1980-03-19,3.041667,3.083333,3.020833,3.03125,295200,AMD,3.085648,3.109848,3.126039,...,,10.204387,8.917468,8.713958,-0.015367,-0.004536,-0.010831,0.145833,0.165416,0.169249
3,1980-03-20,3.010417,3.0625,3.010417,3.041666984558105,159600,AMD,3.060571,3.09177,3.115027,...,,7.380296,6.922995,6.837258,-0.022563,-0.008141,-0.014421,0.114583,0.150305,0.158598
4,1980-03-21,2.916667,3.020833,2.90625,3.010416984558105,130800,AMD,3.012603,3.059933,3.096136,...,,3.621658,4.018942,4.073661,-0.035422,-0.013597,-0.021825,0.114583,0.145542,0.154596
5,1980-03-24,2.666667,2.916667,2.635417,2.916666984558105,436800,AMD,2.897291,2.98843,3.055234,...,,1.342548,1.822932,1.910946,-0.065036,-0.023885,-0.041151,0.170139,0.163637,0.16611
6,1980-03-25,2.604167,2.75,2.552083,2.666666984558105,645600,AMD,2.799583,2.918564,3.012275,...,,1.121916,1.58915,1.677208,-0.092483,-0.037605,-0.054878,0.179398,0.168207,0.169002
7,1980-03-26,2.447917,2.604167,2.427083,2.604166984558105,466800,AMD,2.682361,2.832992,2.958527,...,,0.741245,1.181282,1.269574,-0.125397,-0.055163,-0.070234,0.178627,0.169391,0.169736
8,1980-03-27,2.375,2.375,2.239583,2.375,1129200,AMD,2.579907,2.749721,2.902953,...,,0.618772,1.046319,1.134469,-0.155572,-0.075245,-0.080327,0.188529,0.174583,0.173245
9,1980-03-28,2.541667,2.583333,2.458333,2.4583330154418945,666000,AMD,2.567161,2.711893,2.868545,...,,32.488889,22.766769,21.247945,-0.164146,-0.093025,-0.071121,0.19513,0.179083,0.176435


### Calculate 5-day, 20-day, and 60-day Volume SMA


In [None]:
import pandas as pd

# Define the Volume SMA periods to calculate
volume_sma_periods = [5, 20, 60]

# Loop through each stock in the stock_data dictionary
for symbol, df in stock_data.items():
    # Ensure 'Volume' column is numeric
    df['Volume'] = pd.to_numeric(df['Volume'], errors='coerce')

    # Ensure 'Date' column is datetime and sorted for correct time-series calculations
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df = df.sort_values(by='Date').reset_index(drop=True)

    # Calculate SMAs for Volume
    for period in volume_sma_periods:
        df[f'Volume_SMA_{period}'] = df['Volume'].rolling(window=period).mean()

    # Update the DataFrame in the dictionary
    stock_data[symbol] = df
    print(f"Successfully calculated Volume SMAs for {symbol}.")

print("\n--- Sample DataFrame (AMD with new Volume SMA columns, after initial NaNs) ---")
# Display a sample with cleaned NaNs to see valid Volume SMA values
sample_df = stock_data['AMD'].copy()
volume_sma_cols = [col for col in sample_df.columns if col.startswith('Volume_SMA_')]
sample_df_clean = sample_df.dropna(subset=volume_sma_cols)
display(sample_df_clean.head(10))


Successfully calculated Volume SMAs for AMD.
Successfully calculated Volume SMAs for GLD.
Successfully calculated Volume SMAs for GS.
Successfully calculated Volume SMAs for INTC.
Successfully calculated Volume SMAs for JPM.
Successfully calculated Volume SMAs for META.
Successfully calculated Volume SMAs for MSFT.
Successfully calculated Volume SMAs for MU.
Successfully calculated Volume SMAs for NVDA.
Successfully calculated Volume SMAs for RXRX.
Successfully calculated Volume SMAs for TSLA.

--- Sample DataFrame (AMD with new Volume SMA columns, after initial NaNs) ---


Unnamed: 0,Date,Close,High,Low,Open,Volume,symbol,EMA_5,EMA_10,EMA_20,...,RSI_21,MACD,Signal_Line,MACD_Histogram,ATR_5,ATR_14,ATR_21,Volume_SMA_5,Volume_SMA_20,Volume_SMA_60
59,1980-06-10,2.96875,2.96875,2.864583,2.8645830154418945,464400.0,AMD,2.974341,2.980616,2.922139,...,54.196593,0.081706,0.094584,-0.012878,0.12077,0.108962,0.106797,336960.0,336480.0,380900.0
60,1980-06-11,3.052083,3.052083,2.958333,2.96875,182400.0,AMD,3.000255,2.99361,2.934514,...,56.713038,0.082063,0.09208,-0.010017,0.111763,0.106934,0.105611,329760.0,334920.0,380280.0
61,1980-06-12,3.09375,3.104167,3.020833,3.0520830154418945,404400.0,AMD,3.03142,3.011817,2.94968,...,57.926599,0.084732,0.09061,-0.005878,0.102287,0.103787,0.103586,381120.0,337860.0,374900.0
62,1980-06-13,3.09375,3.104167,3.072917,3.09375,67200.0,AMD,3.052197,3.026714,2.963401,...,57.926599,0.085857,0.089659,-0.003802,0.078608,0.094116,0.09701,308880.0,326580.0,371100.0
63,1980-06-16,3.03125,3.09375,3.03125,3.09375,242400.0,AMD,3.045214,3.027539,2.969863,...,55.359949,0.080774,0.087882,-0.007108,0.073239,0.0899,0.093872,272160.0,331920.0,372480.0
64,1980-06-17,3.0625,3.072917,3.052083,3.0520830154418945,214800.0,AMD,3.050976,3.033895,2.978685,...,56.374761,0.078365,0.085979,-0.007614,0.062715,0.083469,0.089126,222240.0,339360.0,373880.0
65,1980-06-18,3.020833,3.052083,3.0,3.0520830154418945,606000.0,AMD,3.040929,3.03152,2.982699,...,54.635878,0.07226,0.083235,-0.010975,0.062643,0.080673,0.086706,306960.0,346980.0,376700.0
66,1980-06-19,2.96875,3.052083,2.96875,3.0208330154418945,220800.0,AMD,3.016869,3.020108,2.981371,...,52.510082,0.062499,0.079088,-0.016589,0.06954,0.081028,0.086399,270240.0,332400.0,369620.0
67,1980-06-20,2.927083,2.9375,2.927083,2.9375,99600.0,AMD,2.98694,3.003194,2.9762,...,50.848183,0.050815,0.073433,-0.022618,0.060249,0.07578,0.082333,276720.0,312420.0,363500.0
68,1980-06-23,2.9375,2.958333,2.916667,2.9270830154418945,198000.0,AMD,2.97046,2.99125,2.972515,...,51.253176,0.041913,0.067129,-0.025216,0.054055,0.071231,0.078636,267840.0,301740.0,347980.0


### Calculate On-Balance Volume (OBV) and its Rolling Z-Scores (20-day and 50-day)


In [None]:
import pandas as pd
import numpy as np

# Define the rolling window periods for OBV Z-score
obv_zscore_periods = [20, 50] # Uncommented this line

# Loop through each stock in the stock_data dictionary
for symbol, df in stock_data.items():
    # Ensure 'Close' and 'Volume' columns are numeric
    df['Close'] = pd.to_numeric(df['Close'], errors='coerce')
    df['Volume'] = pd.to_numeric(df['Volume'], errors='coerce')

    # Ensure 'Date' column is datetime and sorted for correct time-series calculations
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df = df.sort_values(by='Date').reset_index(drop=True)

    # --- Calculate OBV (On-Balance Volume) ---
    # Initialize OBV column with NaNs or 0, then calculate based on price changes
    df['OBV'] = np.nan

    if len(df) > 0:
        # The first OBV value is typically the first day's volume
        df.loc[0, 'OBV'] = df.loc[0, 'Volume']

        # Calculate OBV for subsequent rows using a loop for clarity and correctness
        # (though vectorized approaches exist, this is robust for varied initial data)
        for i in range(1, len(df)):
            if pd.isna(df.loc[i, 'Close']) or pd.isna(df.loc[i-1, 'Close']) or pd.isna(df.loc[i, 'Volume']):
                df.loc[i, 'OBV'] = df.loc[i-1, 'OBV'] # Maintain previous OBV if data is missing
                continue

            if df.loc[i, 'Close'] > df.loc[i-1, 'Close']:
                df.loc[i, 'OBV'] = df.loc[i-1, 'OBV'] + df.loc[i, 'Volume']
            elif df.loc[i, 'Close'] < df.loc[i-1, 'Close']:
                df.loc[i, 'OBV'] = df.loc[i-1, 'OBV'] - df.loc[i, 'Volume']
            else: # Close == Prev Close
                df.loc[i, 'OBV'] = df.loc[i-1, 'OBV']

        for period in obv_zscore_periods:
            rolling_mean_col = f'OBV_Rolling_Mean_{period}'
            rolling_std_col = f'OBV_Rolling_Std_{period}'
            z_score_col = f'OBV_Z_Score_{period}'

            # Calculate rolling mean and standard deviation of OBV
            # min_periods=1 allows calculation to start as soon as 1 data point is available
            df[rolling_mean_col] = df['OBV'].rolling(window=period, min_periods=1).mean()
            df[rolling_std_col] = df['OBV'].rolling(window=period, min_periods=1).std()

            # Calculate OBV Z-score. Handle division by zero for std dev (will result in NaN).
            df[z_score_col] = (df['OBV'] - df[rolling_mean_col]) / df[rolling_std_col]
            # Fix: Avoid inplace=True with chained assignment to prevent FutureWarning
            df[z_score_col] = df[z_score_col].replace([np.inf, -np.inf], np.nan) # Replace inf with NaN if std dev is 0

            # Drop temporary rolling mean and std columns if desired (optional)
            df.drop(columns=[rolling_mean_col, rolling_std_col], inplace=True, errors='ignore')

    # Update the DataFrame in the dictionary (Corrected Indentation)
    stock_data[symbol] = df
    print(f"Successfully calculated OBV and Z-scores for {symbol}.") # Changed message as Z-score is calculated now

print("\n--- Sample DataFrame (AMD with new OBV column, after initial NaNs) ---")
# Display a sample with cleaned NaNs to see valid OBV values
sample_df = stock_data['AMD'].copy()
obv_cols = ['OBV'] # Updated to only show OBV
sample_df_clean = sample_df.dropna(subset=obv_cols)
display(sample_df_clean.head(10))

Successfully calculated OBV and Z-scores for AMD.
Successfully calculated OBV and Z-scores for GLD.
Successfully calculated OBV and Z-scores for GS.
Successfully calculated OBV and Z-scores for INTC.
Successfully calculated OBV and Z-scores for JPM.
Successfully calculated OBV and Z-scores for META.
Successfully calculated OBV and Z-scores for MSFT.
Successfully calculated OBV and Z-scores for MU.
Successfully calculated OBV and Z-scores for NVDA.
Successfully calculated OBV and Z-scores for RXRX.
Successfully calculated OBV and Z-scores for TSLA.

--- Sample DataFrame (AMD with new OBV column, after initial NaNs) ---


Unnamed: 0,Date,Close,High,Low,Open,Volume,symbol,EMA_5,EMA_10,EMA_20,...,K_14,D_14,MFI_5,MFI_14,MFI_21,BB_Middle_20,BB_Upper_20,BB_Lower_20,OBV_Z_Score_20,OBV_Z_Score_50
0,1980-03-17,3.145833,3.302083,3.125,3.125,219600.0,AMD,3.145833,3.145833,3.145833,...,,,0.0,0.0,0.0,,,,,
1,1980-03-18,3.03125,3.125,2.9375,3.125,727200.0,AMD,3.107639,3.125,3.13492,...,,,0.0,0.0,0.0,,,,-0.707107,-0.707107
2,1980-03-19,3.041667,3.083333,3.020833,3.03125,295200.0,AMD,3.085648,3.109848,3.126039,...,,,28.990665,28.990665,28.990665,,,,-0.124679,-0.124679
3,1980-03-20,3.010417,3.0625,3.010417,3.041666984558105,159600.0,AMD,3.060571,3.09177,3.115027,...,,,25.085655,25.085655,25.085655,,,,-0.487399,-0.487399
4,1980-03-21,2.916667,3.020833,2.90625,3.010416984558105,130800.0,AMD,3.012603,3.059933,3.096136,...,,,22.651101,22.651101,22.651101,,,,-0.755066,-0.755066
5,1980-03-24,2.666667,2.916667,2.635417,2.916666984558105,436800.0,AMD,2.897291,2.98843,3.055234,...,,,17.408011,17.408011,17.408011,,,,-1.447393,-1.447393
6,1980-03-25,2.604167,2.75,2.552083,2.666666984558105,645600.0,AMD,2.799583,2.918564,3.012275,...,,,19.283901,13.097476,13.097476,,,,-1.796418,-1.796418
7,1980-03-26,2.447917,2.604167,2.427083,2.604166984558105,466800.0,AMD,2.682361,2.832992,2.958527,...,,,0.0,11.200471,11.200471,,,,-1.747819,-1.747819
8,1980-03-27,2.375,2.375,2.239583,2.375,1129200.0,AMD,2.579907,2.749721,2.902953,...,,,0.0,8.43771,8.43771,,,,-2.02015,-2.02015
9,1980-03-28,2.541667,2.583333,2.458333,2.4583330154418945,666000.0,AMD,2.567161,2.711893,2.868545,...,,,20.098582,20.919794,20.919794,,,,-1.209071,-1.209071


### Calculate 5-day and 14-day Stochastic Oscillator



In [None]:
# Re-display the sample DataFrame to revert any interactive changes
import pandas as pd
sample_df = stock_data['AMD'].copy()
obv_cols = ['OBV'] # Showing only OBV as per previous modification
sample_df_clean = sample_df.dropna(subset=obv_cols)
display(sample_df_clean.head(10))

%K and %D lines for periods of 5 and 14 days.\
%D is 3-day SMA of %K.

In [None]:
import pandas as pd
import numpy as np

# Define the Stochastic periods to calculate
stochastic_periods = [5, 14]

# Define the %D smoothing period (standard is 3)
d_period = 3

# Loop through each stock in the stock_data dictionary
for symbol, df in stock_data.items():
    # Ensure relevant columns are numeric and Date is datetime
    df['Close'] = pd.to_numeric(df['Close'], errors='coerce')
    df['High'] = pd.to_numeric(df['High'], errors='coerce')
    df['Low'] = pd.to_numeric(df['Low'], errors='coerce')

    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df = df.sort_values(by='Date').reset_index(drop=True)

    for period in stochastic_periods:
        # Calculate Highest High (HH) and Lowest Low (LL) over the period
        df[f'HH_{period}'] = df['High'].rolling(window=period).max()
        df[f'LL_{period}'] = df['Low'].rolling(window=period).min()

        # Calculate Raw %K
        # Avoid division by zero if (HH - LL) is zero
        denominator = (df[f'HH_{period}'] - df[f'LL_{period}']).replace(0, np.nan) # Replace 0 with NaN to avoid inf
        df[f'K_{period}'] = ((df['Close'] - df[f'LL_{period}']) / denominator) * 100

        # Calculate %D (3-period SMA of %K)
        df[f'D_{period}'] = df[f'K_{period}'].rolling(window=d_period).mean()

        # Drop temporary HH and LL columns
        df.drop(columns=[f'HH_{period}', f'LL_{period}'], inplace=True, errors='ignore')

    # Update the DataFrame in the dictionary
    stock_data[symbol] = df
    print(f"Successfully calculated Stochastic Oscillators for {symbol}.")

print("\n--- Sample DataFrame (AMD with new Stochastic columns, after initial NaNs) ---")
# Display a sample with cleaned NaNs to see valid Stochastic values
sample_df = stock_data['AMD'].copy()
stochastic_cols = [col for col in sample_df.columns if col.startswith('K_') or col.startswith('D_')]
sample_df_clean = sample_df.dropna(subset=stochastic_cols)
display(sample_df_clean.head(10))


Successfully calculated Stochastic Oscillators for AMD.
Successfully calculated Stochastic Oscillators for GLD.
Successfully calculated Stochastic Oscillators for GS.
Successfully calculated Stochastic Oscillators for INTC.
Successfully calculated Stochastic Oscillators for JPM.
Successfully calculated Stochastic Oscillators for META.
Successfully calculated Stochastic Oscillators for MSFT.
Successfully calculated Stochastic Oscillators for MU.
Successfully calculated Stochastic Oscillators for NVDA.
Successfully calculated Stochastic Oscillators for RXRX.
Successfully calculated Stochastic Oscillators for TSLA.

--- Sample DataFrame (AMD with new Stochastic columns, after initial NaNs) ---


Unnamed: 0,Date,Close,High,Low,Open,Volume,symbol,EMA_5,EMA_10,EMA_20,...,Volume_SMA_5,Volume_SMA_20,Volume_SMA_60,OBV,OBV_Z_Score_20,OBV_Z_Score_50,K_5,D_5,K_14,D_14
15,1980-04-08,2.59375,2.625,2.5,2.541666984558105,765600.0,AMD,2.596417,2.638037,2.751749,...,424560.0,,,-1669200.0,-0.12478,-0.12478,34.615425,33.205294,41.975346,37.782672
16,1980-04-09,2.666667,2.666667,2.583333,2.59375,134400.0,AMD,2.619834,2.643242,2.743646,...,353040.0,,,-1534800.0,0.007168,0.007168,69.565442,37.504818,51.898791,42.663948
17,1980-04-10,2.739583,2.739583,2.65625,2.666666984558105,174000.0,AMD,2.65975,2.660759,2.743259,...,331200.0,,,-1360800.0,0.178823,0.178823,100.0,68.060289,64.0,52.624713
18,1980-04-11,2.635417,2.770833,2.635417,2.7395830154418945,109200.0,AMD,2.651639,2.656151,2.732989,...,307680.0,,,-1470000.0,0.062994,0.062994,50.000176,73.188539,58.461577,58.120123
19,1980-04-14,2.5625,2.645833,2.5,2.635416984558105,267600.0,AMD,2.621926,2.639124,2.716752,...,290160.0,405780.0,,-1737600.0,-0.219245,-0.219245,23.07695,57.692375,60.784374,61.081984
20,1980-04-15,2.5625,2.5625,2.520833,2.5625,505200.0,AMD,2.602117,2.625192,2.702061,...,238080.0,420060.0,,-1737600.0,-0.124975,-0.213971,23.07695,32.051359,60.784374,60.010108
21,1980-04-16,2.354167,2.572917,2.34375,2.5625,358800.0,AMD,2.519467,2.575915,2.668928,...,282960.0,401640.0,,-2096400.0,-0.496062,-0.600823,2.439101,16.197667,21.568747,47.712498
22,1980-04-17,2.427083,2.427083,2.270833,2.354166984558105,696000.0,AMD,2.488672,2.548854,2.645895,...,387360.0,421680.0,,-1400400.0,0.547654,0.194346,31.25,18.922017,31.25,37.867707
23,1980-04-18,2.322917,2.46875,2.3125,2.4270830154418945,342000.0,AMD,2.433421,2.507775,2.615136,...,433920.0,430800.0,,-1742400.0,0.169458,-0.203036,13.889058,15.859386,10.416794,21.078514
24,1980-04-21,2.291667,2.364583,2.28125,2.322916984558105,498000.0,AMD,2.386169,2.468483,2.584329,...,480000.0,449160.0,,-2240400.0,-0.598987,-0.774832,6.896748,17.345269,4.166794,15.277863


### Calculate 5-day, 14-day, and 21-day Money Flow Index (MFI)

In [None]:
import pandas as pd
import numpy as np

# Define the MFI periods to calculate
mfi_periods = [5, 14, 21]

# Loop through each stock in the stock_data dictionary
for symbol, df in stock_data.items():
    # Ensure relevant columns are numeric and Date is datetime
    df['Close'] = pd.to_numeric(df['Close'], errors='coerce')
    df['High'] = pd.to_numeric(df['High'], errors='coerce')
    df['Low'] = pd.to_numeric(df['Low'], errors='coerce')
    df['Volume'] = pd.to_numeric(df['Volume'], errors='coerce')

    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df = df.sort_values(by='Date').reset_index(drop=True)

    # Calculate Typical Price (TP)
    df['TP'] = (df['High'] + df['Low'] + df['Close']) / 3

    # Calculate Raw Money Flow (MF)
    df['Money_Flow'] = df['TP'] * df['Volume']

    # Calculate Positive and Negative Money Flow
    # Shift TP by 1 to compare with previous day's typical price
    df['Positive_MF'] = np.where(df['TP'] > df['TP'].shift(1), df['Money_Flow'], 0)
    df['Negative_MF'] = np.where(df['TP'] < df['TP'].shift(1), df['Money_Flow'], 0)

    for period in mfi_periods:
        # Calculate Money Ratio (MR)
        # Rolling sum of positive and negative money flow over the period
        positive_money_flow_sum = df['Positive_MF'].rolling(window=period, min_periods=1).sum()
        negative_money_flow_sum = df['Negative_MF'].rolling(window=period, min_periods=1).sum()

        # Avoid division by zero
        money_ratio = positive_money_flow_sum / negative_money_flow_sum
        money_ratio.replace([np.inf, -np.inf], np.nan, inplace=True)
        money_ratio.fillna(0, inplace=True) # Handle cases where negative_money_flow_sum is 0

        # Calculate MFI
        df[f'MFI_{period}'] = 100 - (100 / (1 + money_ratio))

    # Drop temporary columns if desired
    df.drop(columns=['TP', 'Money_Flow', 'Positive_MF', 'Negative_MF'], inplace=True, errors='ignore')

    # Update the DataFrame in the dictionary
    stock_data[symbol] = df
    print(f"Successfully calculated MFI indicators for {symbol}.")

print("\n--- Sample DataFrame (AMD with new MFI columns, after initial NaNs) ---")
# Display a sample with cleaned NaNs to see valid MFI values
sample_df = stock_data['AMD'].copy()
mfi_cols = [col for col in sample_df.columns if col.startswith('MFI_')]
sample_df_clean = sample_df.dropna(subset=mfi_cols)
display(sample_df_clean.head(10))


Successfully calculated MFI indicators for AMD.
Successfully calculated MFI indicators for GLD.
Successfully calculated MFI indicators for GS.
Successfully calculated MFI indicators for INTC.
Successfully calculated MFI indicators for JPM.
Successfully calculated MFI indicators for META.
Successfully calculated MFI indicators for MSFT.
Successfully calculated MFI indicators for MU.
Successfully calculated MFI indicators for NVDA.
Successfully calculated MFI indicators for RXRX.
Successfully calculated MFI indicators for TSLA.

--- Sample DataFrame (AMD with new MFI columns, after initial NaNs) ---


Unnamed: 0,Date,Close,High,Low,Open,Volume,symbol,EMA_5,EMA_10,EMA_20,...,OBV,OBV_Z_Score_20,OBV_Z_Score_50,K_5,D_5,K_14,D_14,MFI_5,MFI_14,MFI_21
0,1980-03-17,3.145833,3.302083,3.125,3.125,219600.0,AMD,3.145833,3.145833,3.145833,...,219600.0,,,,,,,0.0,0.0,0.0
1,1980-03-18,3.03125,3.125,2.9375,3.125,727200.0,AMD,3.107639,3.125,3.13492,...,-507600.0,-0.707107,-0.707107,,,,,0.0,0.0,0.0
2,1980-03-19,3.041667,3.083333,3.020833,3.03125,295200.0,AMD,3.085648,3.109848,3.126039,...,-212400.0,-0.124679,-0.124679,,,,,28.990665,28.990665,28.990665
3,1980-03-20,3.010417,3.0625,3.010417,3.041666984558105,159600.0,AMD,3.060571,3.09177,3.115027,...,-372000.0,-0.487399,-0.487399,,,,,25.085655,25.085655,25.085655
4,1980-03-21,2.916667,3.020833,2.90625,3.010416984558105,130800.0,AMD,3.012603,3.059933,3.096136,...,-502800.0,-0.755066,-0.755066,2.631661,,,,22.651101,22.651101,22.651101
5,1980-03-24,2.666667,2.916667,2.635417,2.916666984558105,436800.0,AMD,2.897291,2.98843,3.055234,...,-939600.0,-1.447393,-1.447393,6.382983,,,,17.408011,17.408011,17.408011
6,1980-03-25,2.604167,2.75,2.552083,2.666666984558105,645600.0,AMD,2.799583,2.918564,3.012275,...,-1585200.0,-1.796418,-1.796418,9.804041,6.272895,,,19.283901,13.097476,13.097476
7,1980-03-26,2.447917,2.604167,2.427083,2.604166984558105,466800.0,AMD,2.682361,2.832992,2.958527,...,-2052000.0,-1.747819,-1.747819,3.278787,6.488604,,,0.0,11.200471,11.200471
8,1980-03-27,2.375,2.375,2.239583,2.375,1129200.0,AMD,2.579907,2.749721,2.902953,...,-3181200.0,-2.02015,-2.02015,17.333374,10.138734,,,0.0,8.43771,8.43771
9,1980-03-28,2.541667,2.583333,2.458333,2.4583330154418945,666000.0,AMD,2.567161,2.711893,2.868545,...,-2515200.0,-1.209071,-1.209071,44.615437,21.742533,,,20.098582,20.919794,20.919794


### Calculate Bollinger Bands (Middle, Upper, and Lower Bands)


A standard 20-day period for the Middle Band (SMA) and 2 standard deviations for the Upper and Lower Bands are used.

In [None]:
import pandas as pd

# Define Bollinger Band period and standard deviation multiplier
bb_period = 20
bb_std_dev_multiplier = 2

# Loop through each stock in the stock_data dictionary
for symbol, df in stock_data.items():
    # Ensure 'Close' column is numeric
    df['Close'] = pd.to_numeric(df['Close'], errors='coerce')

    # Ensure 'Date' column is datetime and sorted for correct time-series calculations
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df = df.sort_values(by='Date').reset_index(drop=True)

    # Calculate Middle Band (20-day SMA)
    df[f'BB_Middle_{bb_period}'] = df['Close'].rolling(window=bb_period).mean()

    # Calculate Standard Deviation over the same period
    df[f'BB_StdDev_{bb_period}'] = df['Close'].rolling(window=bb_period).std()

    # Calculate Upper Band
    df[f'BB_Upper_{bb_period}'] = df[f'BB_Middle_{bb_period}'] + (df[f'BB_StdDev_{bb_period}'] * bb_std_dev_multiplier)

    # Calculate Lower Band
    df[f'BB_Lower_{bb_period}'] = df[f'BB_Middle_{bb_period}'] - (df[f'BB_StdDev_{bb_period}'] * bb_std_dev_multiplier)

    # Drop the temporary Standard Deviation column if desired
    df.drop(columns=[f'BB_StdDev_{bb_period}'], inplace=True, errors='ignore')

    # Update the DataFrame in the dictionary
    stock_data[symbol] = df
    print(f"Successfully calculated Bollinger Bands for {symbol}.")

print("\n--- Sample DataFrame (AMD with new Bollinger Band columns, after initial NaNs) ---")
# Display a sample with cleaned NaNs to see valid Bollinger Band values
sample_df = stock_data['AMD'].copy()
bb_cols = [col for col in sample_df.columns if col.startswith('BB_')]
sample_df_clean = sample_df.dropna(subset=bb_cols)
display(sample_df_clean.head(10))


Successfully calculated Bollinger Bands for AMD.
Successfully calculated Bollinger Bands for GLD.
Successfully calculated Bollinger Bands for GS.
Successfully calculated Bollinger Bands for INTC.
Successfully calculated Bollinger Bands for JPM.
Successfully calculated Bollinger Bands for META.
Successfully calculated Bollinger Bands for MSFT.
Successfully calculated Bollinger Bands for MU.
Successfully calculated Bollinger Bands for NVDA.
Successfully calculated Bollinger Bands for RXRX.
Successfully calculated Bollinger Bands for TSLA.

--- Sample DataFrame (AMD with new Bollinger Band columns, after initial NaNs) ---


Unnamed: 0,Date,Close,High,Low,Open,Volume,symbol,EMA_5,EMA_10,EMA_20,...,K_5,D_5,K_14,D_14,MFI_5,MFI_14,MFI_21,BB_Middle_20,BB_Upper_20,BB_Lower_20
19,1980-04-14,2.5625,2.645833,2.5,2.635416984558105,267600.0,AMD,2.621926,2.639124,2.716752,...,23.07695,57.692375,60.784374,61.081984,74.043105,44.149784,37.506449,2.703125,3.131443,2.274807
20,1980-04-15,2.5625,2.5625,2.520833,2.5625,505200.0,AMD,2.602117,2.625192,2.702061,...,23.07695,32.051359,60.784374,60.010108,26.710393,45.34981,35.321066,2.673958,3.051817,2.2961
21,1980-04-16,2.354167,2.572917,2.34375,2.5625,358800.0,AMD,2.519467,2.575915,2.668928,...,2.439101,16.197667,21.568747,47.712498,13.073272,46.243204,33.983728,2.640104,3.004255,2.275954
22,1980-04-17,2.427083,2.427083,2.270833,2.354166984558105,696000.0,AMD,2.488672,2.548854,2.645895,...,31.25,18.922017,31.25,37.867707,0.0,49.483779,34.819568,2.609375,2.93223,2.28652
23,1980-04-18,2.322917,2.46875,2.3125,2.4270830154418945,342000.0,AMD,2.433421,2.507775,2.615136,...,13.889058,15.859386,10.416794,21.078514,0.0,39.918614,30.929064,2.575,2.862533,2.287467
24,1980-04-21,2.291667,2.364583,2.28125,2.322916984558105,498000.0,AMD,2.386169,2.468483,2.584329,...,6.896748,17.345269,4.166794,15.277863,0.0,31.39621,30.030023,2.54375,2.809998,2.277502
25,1980-04-22,2.5625,2.604167,2.375,2.375,880800.0,AMD,2.444946,2.485577,2.58225,...,87.499928,36.095245,58.333397,24.305662,33.056524,35.735451,36.73792,2.538542,2.798671,2.278413
26,1980-04-23,2.614583,2.729167,2.59375,2.59375,588000.0,AMD,2.501492,2.509032,2.585329,...,74.999896,56.465524,68.75,43.750064,51.052772,44.29577,42.392068,2.539063,2.799786,2.278339
27,1980-04-24,2.78125,2.8125,2.614583,2.6145830154418945,733200.0,AMD,2.594745,2.558526,2.603988,...,94.117647,85.539157,94.230773,73.77139,74.649762,52.803568,49.756523,2.555729,2.833949,2.277509
28,1980-04-25,2.708333,2.760417,2.6875,2.760416984558105,343200.0,AMD,2.632607,2.585764,2.613926,...,80.392097,83.16988,80.769183,81.249985,73.479799,52.729271,50.21099,2.572396,2.844909,2.299883


###Calculate Past Return

1, 5, 10, 20, 50, 100, 250 days of periods are chosen,

In [None]:
import pandas as pd

# Define the return periods to calculate
return_periods = [1, 5, 10, 20, 50, 100, 250]

# Loop through each stock in the stock_data dictionary
for symbol, df in stock_data.items():
    # Ensure 'Close' column is numeric
    df['Close'] = pd.to_numeric(df['Close'], errors='coerce')

    # Ensure 'Date' column is datetime and sorted for correct time-series calculations
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df = df.sort_values(by='Date').reset_index(drop=True)

    # Calculate returns for each period
    for period in return_periods:
        df[f'{period}d_return'] = (df['Close'] / df['Close'].shift(period)) - 1

    # Update the DataFrame in the dictionary
    stock_data[symbol] = df
    print(f"Successfully calculated past returns for {symbol} for periods: {return_periods}.")

print("\n--- Sample DataFrame (AMD with new return columns, after initial NaNs) ---")
# Display a sample with cleaned NaNs to see valid return values
sample_df = stock_data['AMD'].copy()
return_cols = [col for col in sample_df.columns if 'return' in col]
sample_df_clean = sample_df.dropna(subset=return_cols)
display(sample_df_clean.head(10))

Successfully calculated past returns for AMD for periods: [1, 5, 10, 20, 50, 100, 250].
Successfully calculated past returns for GLD for periods: [1, 5, 10, 20, 50, 100, 250].
Successfully calculated past returns for GS for periods: [1, 5, 10, 20, 50, 100, 250].
Successfully calculated past returns for INTC for periods: [1, 5, 10, 20, 50, 100, 250].
Successfully calculated past returns for JPM for periods: [1, 5, 10, 20, 50, 100, 250].
Successfully calculated past returns for META for periods: [1, 5, 10, 20, 50, 100, 250].
Successfully calculated past returns for MSFT for periods: [1, 5, 10, 20, 50, 100, 250].
Successfully calculated past returns for MU for periods: [1, 5, 10, 20, 50, 100, 250].
Successfully calculated past returns for NVDA for periods: [1, 5, 10, 20, 50, 100, 250].
Successfully calculated past returns for RXRX for periods: [1, 5, 10, 20, 50, 100, 250].
Successfully calculated past returns for TSLA for periods: [1, 5, 10, 20, 50, 100, 250].

--- Sample DataFrame (AMD w

Unnamed: 0,Date,Close,High,Low,Open,Volume,symbol,EMA_5,EMA_10,EMA_20,...,BB_Middle_20,BB_Upper_20,BB_Lower_20,1d_return,5d_return,10d_return,20d_return,50d_return,100d_return,250d_return
250,1981-03-13,3.958333,4.125,3.916667,4.083333015441895,273600.0,AMD,3.927523,3.902431,3.928566,...,3.861458,4.090186,3.632731,-0.030612,0.032609,0.021505,0.038251,-0.311594,-0.222904,0.258278
251,1981-03-16,4.229167,4.25,3.9375,3.9583330154418954,405000.0,AMD,4.028071,3.961838,3.957195,...,3.884375,4.161577,3.607173,0.068421,0.097297,0.035714,0.121547,-0.280142,-0.171429,0.395189
252,1981-03-17,4.1875,4.4375,4.1875,4.2291669845581055,543000.0,AMD,4.081214,4.002867,3.979129,...,3.9,4.208446,3.591554,-0.009852,0.098361,0.046875,0.080645,-0.229885,-0.181263,0.376712
253,1981-03-18,4.166667,4.291667,4.125,4.1875,263400.0,AMD,4.109698,4.032649,3.99699,...,3.913542,4.244198,3.582885,-0.004975,0.098901,0.052632,0.069519,-0.215686,-0.196787,0.384083
254,1981-03-19,4.104167,4.166667,3.958333,4.1666669845581055,153600.0,AMD,4.107855,4.045652,4.007197,...,3.929167,4.265067,3.593266,-0.015,0.005102,0.094445,0.082418,-0.208835,-0.187629,0.407143
255,1981-03-20,4.166667,4.208333,4.104167,4.1041669845581055,301200.0,AMD,4.127459,4.067655,4.022385,...,3.952083,4.287085,3.617082,0.015228,0.052632,0.086957,0.123596,-0.156118,-0.206349,0.5625
256,1981-03-23,4.333333,4.354167,4.166667,4.1666669845581055,539400.0,AMD,4.196083,4.11596,4.051999,...,3.976042,4.348051,3.604032,0.04,0.02463,0.124324,0.124324,-0.045872,-0.118644,0.664
257,1981-03-24,4.395833,4.520833,4.333333,4.333333015441895,438600.0,AMD,4.262667,4.166846,4.084745,...,4.0,4.415129,3.584871,0.014423,0.049751,0.153005,0.12234,-0.058036,-0.120833,0.795744
258,1981-03-25,4.520833,4.520833,4.354167,4.395833015441895,446400.0,AMD,4.348722,4.231207,4.126277,...,4.038542,4.49683,3.580253,0.028436,0.085,0.192308,0.205555,-0.013636,-0.103306,0.903509
259,1981-03-26,4.458333,4.541667,4.416667,4.520833015441895,361200.0,AMD,4.385259,4.272503,4.157901,...,4.078125,4.537926,3.618324,-0.013825,0.086294,0.091837,0.215909,-0.036036,-0.013825,0.754098
