In [27]:
#loading file
import pandas as pd
import numpy as np
df = pd.read_csv("/Users/hoyinchui/Downloads/GLD-20041118-20250119.csv")


In [28]:
df['Date'] = pd.to_datetime(df['Date'], utc=True)

In [29]:
print(df.dtypes)

Date             datetime64[ns, UTC]
Open                         float64
High                         float64
Low                          float64
Close                        float64
Volume                         int64
Dividends                    float64
Stock Splits                 float64
Capital Gains                float64
dtype: object


In [30]:
df_date_removed = df.drop(columns=['Date'])


In [31]:
#normalizing the data in the dataframe
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df_normalized = scaler.fit_transform(df_date_removed)


In [35]:
#####################################################################
#               individual-prediction-model design data             #
#####################################################################
import pickle

window_sizes = [7, 14, 30, 60] 
target_days = 7  

def create_sliding_window_data(data, window_size, target_days):
    X, y = [], []

    # Loop through the data to create sliding windows
    for i in range(len(data) - window_size - target_days+1):
        X_window = data[i:i + window_size]  # Data from [i, i+window_size-1]
        y_target = data[i + window_size:i + window_size + target_days, 1:3]  # Target: Low and High columns

        X.append(X_window)
        y.append(y_target)

    # Convert to NumPy arrays
    X = np.array(X)
    y = np.array(y)

    return X, y

# Create sliding window data
for i in range(len(window_sizes)):
    X, y = create_sliding_window_data(df_normalized, window_sizes[i], target_days)
    print()
    print(f"Window size: {window_sizes[i]} - X: {X.shape}, y: {y.shape}")
    with open(f'/Users/hoyinchui/Downloads/X_{window_sizes[i]}days_i.pkl', 'wb') as file:
        pickle.dump(X, file)
    print("Done Saving X")    
    with open(f'/Users/hoyinchui/Downloads/y_{window_sizes[i]}days_i.pkl', 'wb') as file:
        pickle.dump(y, file) 
    print("Done Saving y")    



Window size: 7 - X: (5061, 7, 8), y: (5061, 7, 2)
Done Saving X
Done Saving y

Window size: 14 - X: (5054, 14, 8), y: (5054, 7, 2)
Done Saving X
Done Saving y

Window size: 30 - X: (5038, 30, 8), y: (5038, 7, 2)
Done Saving X
Done Saving y

Window size: 60 - X: (5008, 60, 8), y: (5008, 7, 2)
Done Saving X
Done Saving y


In [36]:
########################################################
#               All-in-1-model design data             #
########################################################
window_sizes = [7, 14, 30, 60] 
max_window_size = max(window_sizes)
target_days = 7  

def create_sliding_window_data_a(i_from_window_size_loop, data, window_size, max_window_size, target_days):
    X, y = [], []

    # Loop through the data to create sliding windows
    for i in range(max_window_size, len(data) - target_days+1):
        if i_from_window_size_loop == 0:
            y_target = data[i :i + target_days, 1:3] 
            y.append(y_target)
        X_window = data[i-window_size:i]  # Data from [i, i+window_size-1]
        #X_window = data[i-window_size:i, 1:3] 
        X.append(X_window)


    # Convert to NumPy arrays
    X = np.array(X)
    y = np.array(y)

    return X, y

# Create sliding window data
for i in range(len(window_sizes)):
    X, y = create_sliding_window_data_a(i, df_normalized, window_sizes[i], max_window_size, target_days)
    print()
    print(f"Window size: {window_sizes[i]} - X: {X.shape}, y: {y.shape}")
    # Save to pickle instead of csv, plk can store multi-dimensional array, and csv will turn it into string
    with open(f'/Users/hoyinchui/Downloads/X_{window_sizes[i]}days_a.pkl', 'wb') as file:
        pickle.dump(X, file)
    print("Done Saving X") 
    if i == 0:
        with open(f'/Users/hoyinchui/Downloads/y_a.pkl', 'wb') as file:
            pickle.dump(y, file)
            print("Done Saving y")  



Window size: 7 - X: (5008, 7, 8), y: (5008, 7, 2)
Done Saving X
Done Saving y

Window size: 14 - X: (5008, 14, 8), y: (0,)
Done Saving X

Window size: 30 - X: (5008, 30, 8), y: (0,)
Done Saving X

Window size: 60 - X: (5008, 60, 8), y: (0,)
Done Saving X
