In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from torchinfo import summary

In [15]:
# Load data
df = pd.read_csv("../data/processed_stock_data_aapl.csv")
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,sentiment
0,2013-12-31,79.167145,80.182854,79.142860,80.145714,74.571281,55771100,0.0
1,2014-01-02,79.382858,79.575714,78.860001,79.018570,73.522530,58671200,1.0
2,2014-01-03,78.980003,79.099998,77.204285,77.282860,71.907555,98116900,-1.0
3,2014-01-06,76.778572,78.114288,76.228569,77.704285,72.299644,103152700,1.0
4,2014-01-07,77.760002,77.994286,76.845711,77.148575,71.782608,79302300,1.0
...,...,...,...,...,...,...,...,...
561,2016-03-24,105.470001,106.250000,104.889999,105.669998,102.653854,26133000,0.0
562,2016-03-28,106.000000,106.190002,105.059998,105.190002,102.187561,19411400,-1.0
563,2016-03-29,104.889999,107.790001,104.879997,107.680000,104.606491,31190100,-1.0
564,2016-03-30,108.650002,110.419998,108.599998,109.559998,106.432831,45601100,1.0


In [16]:
# Prepare stock data
def prepare_stock_data(df, ma_periods=[5, 10, 20, 50]):
    data = df.copy()
    data['Date'] = pd.to_datetime(data['Date'])
    data.set_index('Date', inplace=True)
    for period in ma_periods:
        data[f'MA_{period}'] = data['Adj Close'].rolling(window=period).mean()
    data['Price_Change'] = data['Adj Close'].pct_change()
    data['Volume_Change'] = data['Volume'].pct_change()
    selected_features = ['Adj Close', 'Volume', 'Price_Change', 'Volume_Change', 'sentiment'] + \
                        [f'MA_{period}' for period in ma_periods]
    processed_data = data[selected_features]
    processed_data.dropna(inplace=True)
    return processed_data

df = prepare_stock_data(df)
df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  processed_data.dropna(inplace=True)


Unnamed: 0_level_0,Adj Close,Volume,Price_Change,Volume_Change,sentiment,MA_5,MA_10,MA_20,MA_50
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2014-03-13,70.956673,64435700,-0.011107,0.293069,0.0,71.263152,71.044394,71.109581,71.029325
2014-03-14,70.159729,59299800,-0.011231,-0.079706,1.0,71.109377,71.023669,70.977604,70.941094
2014-03-17,70.433861,49886200,0.003907,-0.158746,-1.0,70.997594,71.010031,70.862274,70.879320
2014-03-18,71.056946,52411800,0.008846,0.050627,1.0,70.872167,71.012169,70.764726,70.862308
2014-03-19,71.038239,56189000,-0.000263,0.072068,-1.0,70.729090,70.997459,70.723875,70.837080
...,...,...,...,...,...,...,...,...,...
2016-03-24,102.653854,26133000,-0.004334,0.016710,0.0,103.042441,102.146759,99.868692,95.960872
2016-03-28,102.187561,19411400,-0.004542,-0.257207,-1.0,102.900607,102.431397,100.270875,96.122631
2016-03-29,104.606491,31190100,0.023671,0.606793,-1.0,103.244504,102.932668,100.804691,96.291609
2016-03-30,106.432831,45601100,0.017459,0.462038,1.0,103.796294,103.416454,101.243304,96.543298


In [18]:
def create_sequences(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:(i + sequence_length), :])
        y.append(data[i + sequence_length, 0])
    return np.array(X), np.array(y)

In [19]:
class StockDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.FloatTensor(y)
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [20]:
class StockLSTM(nn.Module):
    def __init__(self, input_size, hidden_size=128, num_layers=3, dropout=0.3):
        super(StockLSTM, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # Bidirectional LSTM layers
        self.lstm1 = nn.LSTM(input_size, hidden_size, 
                           batch_first=True, bidirectional=True)
        self.bn1 = nn.BatchNorm1d(hidden_size * 2)
        self.dropout1 = nn.Dropout(dropout)
        
        self.lstm2 = nn.LSTM(hidden_size * 2, hidden_size // 2,
                           batch_first=True, bidirectional=True)
        self.bn2 = nn.BatchNorm1d(hidden_size)
        self.dropout2 = nn.Dropout(dropout)
        
        self.lstm3 = nn.LSTM(hidden_size, hidden_size // 4,
                           batch_first=True)
        self.bn3 = nn.BatchNorm1d(hidden_size // 4)
        self.dropout3 = nn.Dropout(0.2)
        
        # Dense layers
        self.fc1 = nn.Linear(hidden_size // 4, 16)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(16, 1)
        
    def forward(self, x):
        # First LSTM layer
        x, _ = self.lstm1(x)
        x = self.bn1(x.permute(0, 2, 1)).permute(0, 2, 1)
        x = self.dropout1(x)
        
        # Second LSTM layer
        x, _ = self.lstm2(x)
        x = self.bn2(x.permute(0, 2, 1)).permute(0, 2, 1)
        x = self.dropout2(x)
        
        # Third LSTM layer
        x, _ = self.lstm3(x)
        x = self.bn3(x.permute(0, 2, 1)).permute(0, 2, 1)
        x = self.dropout3(x)
        
        # Take the last time step
        x = x[:, -1, :]
        
        # Dense layers
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        
        return x

In [21]:
def count_parameters(model):
    """Count trainable parameters in the model"""
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def analyze_model_complexity(model, input_shape):
    """Analyze model complexity and architecture"""
    # Create dummy input for visualization
    batch_size = 32
    dummy_input = torch.zeros(batch_size, *input_shape)
    
    # Get model summary
    model_summary = summary(model, input_size=(batch_size, *input_shape), 
                          verbose=2, col_names=['input_size', 'output_size', 
                                              'num_params', 'kernel_size', 
                                              'mult_adds'])
    
    # Calculate total parameters
    total_params = count_parameters(model)
    
    # Calculate model complexity metrics
    complexity_metrics = {
        'Total Parameters': total_params,
        'LSTM Layers': 3,
        'Hidden Units': [256, 128, 32],  # Bidirectional doubles first two
        'Dropout Rates': [0.3, 0.3, 0.2],
        'Dense Layers': 2
    }
    
    return complexity_metrics, model_summary


In [22]:
def train_model(model, train_loader, val_loader, epochs=100, learning_rate=0.001):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    
    criterion = nn.HuberLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)
    
    train_losses = []
    val_losses = []
    best_val_loss = float('inf')
    
    for epoch in range(epochs):
        # Training
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            
        train_loss /= len(train_loader)
        train_losses.append(train_loss)
        
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                y_pred = model(X_batch)
                val_loss += criterion(y_pred, y_batch).item()
        
        val_loss /= len(val_loader)
        val_losses.append(val_loss)
        
        # Learning rate scheduling
        scheduler.step(val_loss)
        
        # Save best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_model.pth')
        
        if epoch % 10 == 0:
            print(f'Epoch {epoch}: Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')
    
    return train_losses, val_losses

In [23]:
def prepare_and_analyze_model(df, features, sequence_length=20, test_size=0.2):
    # Prepare data
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df[features])
    
    # Create sequences
    X, y = create_sequences(scaled_data, sequence_length)
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, shuffle=False)
    
    # Create datasets and dataloaders
    train_dataset = StockDataset(X_train, y_train)
    test_dataset = StockDataset(X_test, y_test)
    
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
    
    # Initialize model
    input_size = len(features)
    model = StockLSTM(input_size=input_size)
    
    # Analyze model complexity
    complexity_metrics, model_summary = analyze_model_complexity(model, (sequence_length, input_size))
    
    print("\nModel Complexity Analysis:")
    for metric, value in complexity_metrics.items():
        print(f"{metric}: {value}")
    
    print("\nDetailed Layer Analysis:")
    print(model_summary)
    
    return model, train_loader, test_loader, scaler

In [24]:
features = ['Adj Close', 'Volume', 'Price_Change', 'Volume_Change', 'MA_5', 'MA_10', 'MA_20', 'MA_50']
model, train_loader, test_loader, scaler = prepare_and_analyze_model(df, features)

Layer (type:depth-idx)                   Input Shape               Output Shape              Param #                   Kernel Shape              Mult-Adds
StockLSTM                                [32, 20, 8]               [32, 1]                   --                        --                        --
├─LSTM: 1-1                              [32, 20, 8]               [32, 20, 256]             141,312                   --                        90,439,680
│    └─weight_ih_l0                                                                          ├─4,096                   [512, 8]
│    └─weight_hh_l0                                                                          ├─65,536                  [512, 128]
│    └─bias_ih_l0                                                                            ├─512                     [512]
│    └─bias_hh_l0                                                                            ├─512                     [512]
│    └─weight_ih_l0_reverse      