In [None]:
# using regression to deal with missing interest rate values
# impute data into interest rates; interest rates are only from 2000 - 2024, but gprices are from 1979 to 2025

from datetime import datetime
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import numpy as np
#import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.linear_model import LinearRegression

#interest rates factored in

# Load the interest rates data
ir = pd.read_csv('./gold_historical/real-long-term-rates-2000-2024.csv')

# Load the gold prices data
gp = pd.read_csv('./gold_historical/Prices_cleaned.csv')

# Convert the 'Date' columns to datetime format
ir['Date'] = pd.to_datetime(ir['Date'], errors='coerce')
gp['Date'] = pd.to_datetime(gp['Date'], errors='coerce')

# Set 'Date' as the index for both dataframes
ir.set_index('Date', inplace=True)
gp.set_index('Date', inplace=True)

gp['IR'] = ir['LT-Real-Average']

# Create a new column 'Time' as a numerical representation of the date for regression purposes
gp['Time'] = np.arange(len(gp))

# Separate the data into training (non-missing) and prediction (missing) sets
train_data = gp.dropna(subset=['IR'])
predict_data = gp[gp['IR'].isnull()]
print(train_data.head())
print(predict_data.head())

# Train a linear regression model on the non-missing data
X_train = train_data[['Time']]
y_train = train_data['IR']
model = LinearRegression()
model.fit(X_train, y_train)

# Predict missing values using the trained model
X_predict = predict_data[['Time']]
predicted_values = model.predict(X_predict)

# Fill in the missing values with the predicted values
gp.loc[gp['IR'].isnull(), 'IR'] = predicted_values

print(gp.head())
print(gp.shape)
print(gp.iloc[::-1].head())

gp.to_csv('./gold_historical/gInterest.csv')

In [None]:
# using backfill / forwardfill to deal with missing interest rate values

import pandas as pd

# Load the interest rates data
ir = pd.read_csv('./gold_historical/real-long-term-rates-2000-2024.csv')

# Load the gold prices data
gp = pd.read_csv('./gold_historical/Prices_cleaned.csv')

# Convert the 'Date' columns to datetime format
ir['Date'] = pd.to_datetime(ir['Date'], errors='coerce')
gp['Date'] = pd.to_datetime(gp['Date'], errors='coerce')

# Set 'Date' as the index for both dataframes
ir.set_index('Date', inplace=True)
gp.set_index('Date', inplace=True)

gp['IR'] = ir['LT-Real-Average']

gp['IR'] = gp['IR'].fillna(method='bfill').fillna(method='ffill')

print(gp.head())
print(gp.iloc[::-1].head())
print(gp.shape)

gp.to_csv('./gold_historical/gInterestFill.csv')

In [None]:
# looking at the relationship between interest rate and prices

import pandas as pd

data = pd.read_csv('./gold_historical/gInterest.csv', usecols=lambda col: col != 'index')
# Parse the date and price columns
data['Date'] = pd.to_datetime(data['Date'], errors='coerce')
data.set_index('Date', inplace=True)

# Convert the 'Price' column to numeric
data['Price'] = data['Price'].str.replace(',', '').astype(float)
data['IR'] = data['IR'].astype(float)

correlation = data['Price'].corr(data['IR'])
print(correlation)

In [None]:
# actual model

from datetime import datetime
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import numpy as np
#import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

#predict price then calc return

data = pd.read_csv('./gold_historical/gInterest.csv', usecols=lambda col: col != 'index')
# Parse the date and price columns
data['Date'] = pd.to_datetime(data['Date'], errors='coerce')
data.set_index('Date', inplace=True)

# Convert the 'Price' column to numeric
data['Price'] = data['Price'].str.replace(',', '').astype(float)
data['IR'] = data['IR'].astype(float)

# Calculate log returns
data['Return'] = np.log(data['Price'] / data['Price'].shift(1))

data.dropna(subset=['Return'], inplace=True)

print(data.head())
print(data.size)


scaler = StandardScaler()
scaled_data = scaler.fit_transform(data[['Price', 'IR']])


train_size = int(len(scaled_data) * 0.95)
train_data = scaled_data[0:int(train_size), :]

X_train = []
Y_train = []

look_back = 14


for i in range(look_back, len(train_data)):
    X_train.append(train_data[i - look_back: i])
    Y_train.append(train_data[i, 0])

X_train, Y_train = np.array(X_train), np.array(Y_train)
#X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

testing = scaled_data[train_size - look_back:, :]
X_test = []

Y_test = scaled_data[train_size:, 0]
for i in range(look_back, len(testing)):
    X_test.append(testing[i - look_back: i])

X_test, Y_test = np.array(X_test), np.array(Y_test)
#X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

#implanted code
from keras import regularizers

model = Sequential()
model.add(LSTM(units=64, return_sequences=True, input_shape=(look_back, 2)))
#model.add(keras.layers.LSTM(units=64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(units=64))
model.add(Dense(128, kernel_regularizer=regularizers.L2(0.002)))
model.add(keras.layers.Dropout(0.5))
model.add(Dense(1))

from keras.metrics import RootMeanSquaredError
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

learning_rate = 0.0008
optimizer = Adam(learning_rate=learning_rate)

# Implement early stopping and learning rate reduction
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.00001)

model.compile(optimizer=optimizer, loss='mae', metrics=[RootMeanSquaredError()])

history = model.fit(X_train, Y_train, epochs=40, callbacks=[early_stopping, reduce_lr], validation_split=0.2)


pred = model.predict(X_test)

train = data[:train_size]
test = data[train_size:].copy(deep=True)
test['pred'] = scaler.inverse_transform(np.concatenate((pred, np.zeros((pred.shape[0], 1))), axis=1))[:, 0]


Y_test = Y_test.reshape(-1)
test_loss, test_rmse = model.evaluate(X_test, Y_test)

print(f"Test Loss (MAE): {test_loss}")
print(f"Test RMSE: {test_rmse}")

from sklearn.metrics import mean_absolute_error

# Ensure Y_test is a 2D array before inverse transforming
y_test_inv = scaler.inverse_transform(np.concatenate((Y_test.reshape(-1, 1), np.zeros((Y_test.shape[0], 1))), axis=1))[:, 0]

# Calculate mean absolute error
#mae = mean_absolute_error(y_test_inv.flatten(), test['pred'])
mae = mean_absolute_error(y_test_inv, test['pred'])

print(f"Mean Absolute Error (MAE): {mae:.2f}")

plt.figure(figsize=(10, 8))
plt.plot(train["Price"], c="b")
plt.plot(test[["Price", "pred"]])
plt.ylabel("price")
plt.legend(['train', 'test', 'predic'])
plt.show()

from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error

# Calculate log returns for the predicted prices
test['pred_log_return'] = np.log(test['pred'] / test['pred'].shift(1))

# Calculate log returns for the actual prices
test['actual_log_return'] = np.log(test['Price'] / test['Price'].shift(1))

# Drop the first row with NaN values in log returns
test = test.dropna()

# Calculate mean absolute error and mean percentage error in predicted returns compared to actual returns
mae_log_return = mean_absolute_error(test['actual_log_return'], test['pred_log_return'])
mape_log_return = mean_absolute_percentage_error(test['actual_log_return'], test['pred_log_return'])

print(f"Mean Absolute Error (MAE) in log returns: {mae_log_return:.6f}")
print(f"Mean Absolute Percentage Error (MAPE) in log returns: {mape_log_return:.6f}")

# Plot the actual and predicted log returns
plt.figure(figsize=(10, 8))
plt.plot(test.index, test['actual_log_return'], label='Actual Log Return', color='blue')
plt.plot(test.index, test['pred_log_return'], label='Predicted Log Return', color='red')
plt.xlabel('Date')
plt.ylabel('Log Return')
plt.legend()
plt.show()

# Predict tomorrow's price and return
last_sequence = scaled_data[-look_back:]
last_sequence = np.reshape(last_sequence, (1, look_back, 2))

print(X_train.shape)
print(last_sequence.shape)
# Predict the next price
predicted_price_scaled = model.predict(last_sequence)
predicted_price = scaler.inverse_transform(np.concatenate((predicted_price_scaled, np.zeros((predicted_price_scaled.shape[0], 1))), axis=1))[:, 0]

# Calculate the predicted return for tomorrow
last_price = data['Price'].iloc[-1]
#predicted_return = (predicted_price[0][0] - last_price) / last_price
predicted_return = np.log(predicted_price[0] / last_price)

print(f"Predicted Price for Tomorrow: {predicted_price[0]:.2f}")
print(f"Predicted Return for Tomorrow: {predicted_return:.6f}")


In [None]:
# if i use forward fill / backward fill for interest rates