# Build Prediction Models with LSTM

**Add a brief introduction of LSTM**

Finish this later

In [1]:
import pandas as pd
from datetime import datetime
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [4]:
# Read training and testing datasets
df_train = pd.read_csv("../data/train_dataset.csv", parse_dates=['date'])
df_test = pd.read_csv("../data/test_dataset.csv", parse_dates=['date'])

df_all = pd.concat([df_train, df_test], ignore_index=True)


In [5]:
# Normalize spy_return
scaler = StandardScaler()
df_all['spy_return_scaled'] = scaler.fit_transform(df_all[['spy_return']])

# Create sequences
def create_sequences(data, labels, window_size):
    X, y = [], []
    for i in range(window_size, len(data)):
        X.append(data[i - window_size:i])
        y.append(labels[i])
    return np.array(X), np.array(y)

window_size = 10
X_all, y_all = create_sequences(
    df_all['spy_return_scaled'].values,
    df_all['spy_direction'].values,
    window_size
)

# Split back into train/test
train_size = len(df_train) - window_size
X_train, X_test = X_all[:train_size], X_all[train_size:]
y_train, y_test = y_all[:train_size], y_all[train_size:]

In [6]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

# Convert to PyTorch tensors
X_train_t = torch.tensor(X_train, dtype=torch.float32).unsqueeze(-1)
y_train_t = torch.tensor(y_train, dtype=torch.float32).unsqueeze(-1)
X_test_t = torch.tensor(X_test, dtype=torch.float32).unsqueeze(-1)
y_test_t = torch.tensor(y_test, dtype=torch.float32).unsqueeze(-1)

# Dataset and DataLoader
train_ds = TensorDataset(X_train_t, y_train_t)
train_dl = DataLoader(train_ds, batch_size=16, shuffle=True)

# Define the model
class LSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_size=64):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.dropout = nn.Dropout(0.2)
        self.fc = nn.Linear(hidden_size, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]  # take the last output
        out = self.dropout(out)
        out = self.fc(out)
        return self.sigmoid(out)

model = LSTMModel()
loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [8]:
epochs = 200
for epoch in range(epochs):
    model.train()
    epoch_loss = 0
    for X_batch, y_batch in train_dl:
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}")

Epoch 1/200, Loss: 9.4694
Epoch 2/200, Loss: 9.4852
Epoch 3/200, Loss: 9.5239
Epoch 4/200, Loss: 9.4589
Epoch 5/200, Loss: 9.4741
Epoch 6/200, Loss: 9.3292
Epoch 7/200, Loss: 9.3844
Epoch 8/200, Loss: 9.4170
Epoch 9/200, Loss: 9.4405
Epoch 10/200, Loss: 9.3306
Epoch 11/200, Loss: 9.3918
Epoch 12/200, Loss: 9.2471
Epoch 13/200, Loss: 9.2748
Epoch 14/200, Loss: 9.3068
Epoch 15/200, Loss: 9.2315
Epoch 16/200, Loss: 9.2199
Epoch 17/200, Loss: 9.2513
Epoch 18/200, Loss: 9.1846
Epoch 19/200, Loss: 9.2298
Epoch 20/200, Loss: 9.0184
Epoch 21/200, Loss: 9.1986
Epoch 22/200, Loss: 9.0961
Epoch 23/200, Loss: 9.0614
Epoch 24/200, Loss: 9.1773
Epoch 25/200, Loss: 9.0254
Epoch 26/200, Loss: 8.9755
Epoch 27/200, Loss: 9.0829
Epoch 28/200, Loss: 8.9700
Epoch 29/200, Loss: 8.8920
Epoch 30/200, Loss: 8.8928
Epoch 31/200, Loss: 8.8474
Epoch 32/200, Loss: 8.8355
Epoch 33/200, Loss: 8.7372
Epoch 34/200, Loss: 8.8225
Epoch 35/200, Loss: 8.5790
Epoch 36/200, Loss: 8.9062
Epoch 37/200, Loss: 8.7964
Epoch 38/2

In [9]:
model.eval()
with torch.no_grad():
    y_pred_prob = model(X_test_t).squeeze()
    y_pred = (y_pred_prob > 0.5).int()

from sklearn.metrics import classification_report, confusion_matrix

print(classification_report(y_test_t.int().numpy(), y_pred.numpy()))
print(confusion_matrix(y_test_t.int().numpy(), y_pred.numpy()))

              precision    recall  f1-score   support

           0       0.39      0.57      0.46        21
           1       0.64      0.46      0.53        35

    accuracy                           0.50        56
   macro avg       0.51      0.51      0.50        56
weighted avg       0.55      0.50      0.51        56

[[12  9]
 [19 16]]


**Comment on LSTM performance**

* No strong sequential patterns: Daily market direction is noisy and often not autocorrelated enough for LSTM to detect patterns
* Too little data: LSTMs need hundreds to thousands of samples to generalize well. You only have ~200 rows for training
* Simple input feature: Using only spy_return might not be enough â€” youâ€™re feeding it a nearly random walk
* Shallow architecture: A basic LSTM with 1 layer and 64 units may not have enough capacity (but adding depth can overfit too)
* Output shape or loss mismatch: For classification, sigmoid + BCE is correct, but it's sensitive to class imbalance and label quality
* No regularization: If your training set is small, even small noise can cause instability
