In [None]:
import pandas as pd
import numpy as np
from scipy.stats import norm
from scipy.optimize import minimize
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import mean_squared_error
import tensorflow as tf

# Step 1: Configure TensorFlow for memory management
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

# Set mixed precision policy for reduced memory usage and faster computation
tf.keras.mixed_precision.set_global_policy('mixed_float16')

# Step 2: Load and preprocess data with error handling and optimized date parsing
file_path = 'gold_dec24-GC-F-_1wk.csv'
try:
    # Load the CSV file and parse dates using ISO8601 format
    df = pd.read_csv(file_path, skiprows=2,
                     names=['Date', 'Price', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume'],
                     parse_dates=['Date'], 
                     date_parser=lambda x: pd.to_datetime(x.strip(), format='%Y-%m-%d %H:%M:%S%z'))
except FileNotFoundError:
    raise Exception(f"File not found: {file_path}")
except ValueError as e:
    print("Date parsing error:", e)

# Filter to keep only essential columns and drop rows with NaN values in 'Price'
df = df[['Date', 'Price']].dropna()

# Convert 'Price' to a numeric type to ensure proper data handling
df['Price'] = pd.to_numeric(df['Price'], errors='coerce')

# Check the shape of the DataFrame after filtering
print(f"Data shape after filtering: {df.shape}")

# Step 3: Define Black-Scholes model (remains unchanged)
def black_scholes(S, K, T, r, sigma):
    d1 = (np.log(S / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)
    call_price = S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
    return call_price

# Set up parameters for Black-Scholes
S = df['Price'].values  
K = np.mean(S)                 
T = 1                          
r = 0.01                       

# Define objective function to estimate volatility (remains unchanged)
def objective_function(sigma):
    call_prices = black_scholes(S, K, T, r, sigma)
    return np.mean((call_prices - S) ** 2)

# Estimate volatility using optimization (remains unchanged)
initial_sigma = 0.2
result = minimize(objective_function, initial_sigma, bounds=[(0.01, 1)])
estimated_sigma = result.x[0]   

# Step 4: Prepare data for LSTM model (Deep Learning)
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df[['Price']])

train_size = int(len(scaled_data) * 0.8)
train_data = scaled_data[:train_size]
test_data = scaled_data[train_size:]

def create_sequences(data, look_back=60):
    X, y = [], []
    for i in range(look_back, len(data)):
        X.append(data[i-look_back:i])
        y.append(data[i])
    return np.array(X), np.array(y)

look_back = 60
X_train, y_train = create_sequences(train_data, look_back)
X_test, y_test = create_sequences(test_data, look_back)

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Step 5: Build LSTM Model with improved architecture (remains unchanged)
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=25))  # Reduced units to save memory
model.add(Dropout(0.2))
model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')

# Step 6: Create efficient data pipelines using tf.data API (remains unchanged)
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(32).prefetch(tf.data.AUTOTUNE)

# Step 7: Train the LSTM Model with early stopping (remains unchanged)
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='loss', patience=5)
model.fit(train_dataset, epochs=50, callbacks=[early_stopping])

# Step 8: Predict future prices using LSTM model on test set (remains unchanged)
predicted_prices_lstm = model.predict(X_test)
predicted_prices_lstm_scaled_back = scaler.inverse_transform(predicted_prices_lstm)

# Evaluate model performance on test set (remains unchanged)
mse_test = mean_squared_error(df['Price'][train_size + look_back:], predicted_prices_lstm_scaled_back.flatten())
print(f'Mean Squared Error on Test Set: {mse_test:.2f}')

# Step 9: Predict next four weeks (28 days) of prices based on last known data point (remains unchanged)
last_sequence = test_data[-look_back:]
forecasted_prices_lstm_scaled_back = []
for _ in range(28):  
    last_sequence_reshaped = last_sequence.reshape((1, look_back, 1))
    next_price_scaled = model.predict(last_sequence_reshaped)[0][0]
    next_price_actual = scaler.inverse_transform([[next_price_scaled]])[0][0]
    forecasted_prices_lstm_scaled_back.append(next_price_actual)
    
    last_sequence = np.append(last_sequence[1:], [[next_price_scaled]], axis=0)

# Step 10: Plot actual vs predicted prices and forecasted future prices with enhancements (remains unchanged)
plt.figure(figsize=(14,7))
plt.plot(df['Date'][train_size + look_back:], df['Price'][train_size + look_back:], label='Actual Prices', color='blue')
plt.plot(df['Date'][train_size + look_back:], predicted_prices_lstm_scaled_back.flatten(), label='Predicted Prices (LSTM)', color='orange')
plt.axvline(x=df['Date'].iloc[-28], color='red', linestyle='--', label='Prediction Start')
plt.plot(pd.date_range(df['Date'].iloc[-28], periods=28), forecasted_prices_lstm_scaled_back, label='Forecasted Prices (Next 4 weeks)', color='green')
plt.title('LSTM Predicted Prices vs Actual Prices with Forecast')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid()
plt.tight_layout()  
plt.show()


Date parsing error: time data "Date" doesn't match format "%Y-%m-%d %H:%M:%S%z", at position 0. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.


  df = pd.read_csv(file_path, skiprows=2,


NameError: name 'df' is not defined

In [None]:
import tensorflow as tf

# Check if GPU is available
physical_devices = tf.config.list_physical_devices('GPU')
print("Available GPUs:", physical_devices)

# Example model training (using CIFAR-100 dataset)
cifar = tf.keras.datasets.cifar100
(x_train, y_train), (x_test, y_test) = cifar.load_data()

model = tf.keras.applications.ResNet50(
    include_top=True,
    weights=None,
    input_shape=(32, 32, 3),
    classes=100,
)

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
model.compile(optimizer="adam", loss=loss_fn, metrics=["accuracy"])

model.fit(x_train, y_train, epochs=5, batch_size=64)


Available GPUs: []
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz
[1m169001437/169001437[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 0us/step
Epoch 1/5


In [1]:
import tensorflow as tf

# Check if GPU is available
physical_devices = tf.config.list_physical_devices('GPU')
print("Available GPUs:", physical_devices)

Available GPUs: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [1]:
import pandas as pd
import numpy as np
from scipy.stats import norm
from scipy.optimize import minimize
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import mean_squared_error
import tensorflow as tf

# Step 1: Configure TensorFlow for memory management
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

# Set mixed precision policy for reduced memory usage and faster computation
tf.keras.mixed_precision.set_global_policy('mixed_float16')

# Step 2: Load and preprocess data with error handling and optimized date parsing
file_path = 'gold_dec24(GC=F)_1wk.csv'

def load_and_preprocess_data(file_path):
    df = None  # Initialize df to None
    try:
        # Load the CSV file and parse dates
        df = pd.read_csv(file_path, skiprows=2,
                         names=['Date', 'Price', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume'],
                         parse_dates=['Date'], 
                         date_parser=lambda x: pd.to_datetime(x.strip(), errors='coerce'))

        # Filter to keep only essential columns and drop rows with NaN values in 'Price'
        df = df[['Date', 'Price']].dropna()
        df['Price'] = pd.to_numeric(df['Price'], errors='coerce')

        # Display DataFrame shape
        print(f"Data shape after filtering: {df.shape}")
    except FileNotFoundError:
        raise Exception(f"File not found: {file_path}")
    except ValueError as e:
        print("Date parsing error:", e)

    return df  # Return df, which may be None if an error occurred

# Load data
df = load_and_preprocess_data(file_path)

# Step 3: Define Black-Scholes model
def black_scholes(S, K, T, r, sigma):
    d1 = (np.log(S / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)
    call_price = S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
    return call_price

# Set up parameters for Black-Scholes
S = df['Price'].values  
K = np.mean(S)                  
T = 1                           
r = 0.01                        

# Define objective function to estimate volatility
def objective_function(sigma):
    call_prices = black_scholes(S, K, T, r, sigma)
    return np.mean((call_prices - S) ** 2)

# Estimate volatility using optimization
initial_sigma = 0.2
result = minimize(objective_function, initial_sigma, bounds=[(0.01, 1)])
estimated_sigma = result.x[0]

print(f'Estimated Volatility: {estimated_sigma:.4f}')

# Step 4: Prepare data for LSTM model (Deep Learning)
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df[['Price']])

train_size = int(len(scaled_data) * 0.8)
train_data = scaled_data[:train_size]
test_data = scaled_data[train_size:]

def create_sequences(data, look_back=60):
    X, y = [], []
    for i in range(look_back, len(data)):
        X.append(data[i-look_back:i])
        y.append(data[i])
    return np.array(X), np.array(y)

look_back = 60
X_train, y_train = create_sequences(train_data, look_back)
X_test, y_test = create_sequences(test_data, look_back)

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Step 5: Build LSTM Model with improved architecture
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=25))  
model.add(Dropout(0.2))
model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')

# Step 6: Create efficient data pipelines using tf.data API
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(32).prefetch(tf.data.AUTOTUNE)

# Step 7: Train the LSTM Model with early stopping
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='loss', patience=5)
model.fit(train_dataset, epochs=50, callbacks=[early_stopping])

# Step 8: Predict future prices using LSTM model on test set
predicted_prices_lstm = model.predict(X_test)
predicted_prices_lstm_scaled_back = scaler.inverse_transform(predicted_prices_lstm)

# Evaluate model performance on test set
mse_test = mean_squared_error(df['Price'][train_size + look_back:], predicted_prices_lstm_scaled_back.flatten())
print(f'Mean Squared Error on Test Set: {mse_test:.2f}')

# Step 9: Predict next four weeks (28 days) of prices based on last known data point
last_sequence = test_data[-look_back:]
forecasted_prices_lstm_scaled_back = []
for _ in range(28):  
    last_sequence_reshaped = last_sequence.reshape((1, look_back, 1))
    next_price_scaled = model.predict(last_sequence_reshaped)[0][0]
    next_price_actual = scaler.inverse_transform([[next_price_scaled]])[0][0]
    forecasted_prices_lstm_scaled_back.append(next_price_actual)
    
    last_sequence = np.append(last_sequence[1:], [[next_price_scaled]], axis=0)

# Step 10: Plot actual vs predicted prices and forecasted future prices with enhancements
plt.figure(figsize=(14,7))
plt.plot(df['Date'][train_size + look_back:], df['Price'][train_size + look_back:], label='Actual Prices', color='blue')
plt.plot(df['Date'][train_size + look_back:], predicted_prices_lstm_scaled_back.flatten(), label='Predicted Prices (LSTM)', color='orange')
plt.axvline(x=df['Date'].iloc[-28], color='red', linestyle='--', label='Prediction Start')
plt.plot(pd.date_range(df['Date'].iloc[-28], periods=28), forecasted_prices_lstm_scaled_back, label='Forecasted Prices (Next 4 weeks)', color='green')
plt.title('LSTM Predicted Prices vs Actual Prices with Forecast')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid()
plt.tight_layout()  
plt.show()


  df = pd.read_csv(file_path, skiprows=2,
2024-11-17 21:49:33.390129: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Max
2024-11-17 21:49:33.390160: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 32.00 GB
2024-11-17 21:49:33.390164: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 10.67 GB
2024-11-17 21:49:33.390180: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-11-17 21:49:33.390192: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
  super().__init__(**kwargs)


Data shape after filtering: (1044, 2)
Estimated Volatility: 1.0000
Epoch 1/50


: 