In [None]:
# %% [markdown]
"""
This notebook implements a deep learning approach to predict the next day's closing price for Amazon (AMZN) stock using historical data.
"""
# %%
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

def big_model_baba(filepath: str):
    # %% [markdown]
    """
    ## Data Loading and Exploration
    """
    # %%
    # Load the data
    df = pd.read_csv(filepath, parse_dates=['Date'], index_col='Date')
    df.sort_index(inplace=True)

    # Check for problematic values
    print("Data summary before processing:")
    print(df.describe())

    # Check for NA values
    print("\nNA values in each column:")
    print(df.isna().sum())

    # Handle any problematic values
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.dropna(inplace=True)
    df.sort_index(inplace=True)
    print(df.head())

    # Plot the closing price
    plt.figure(figsize=(12, 6))
    plt.plot(df['Close'])
    plt.title('Closing Price History')
    plt.xlabel('Date')
    plt.ylabel('Price ($)')
    plt.grid()
    plt.show()

    # %% [markdown]
    """
    ## Feature Engineering and Preprocessing

    We'll create several features:
    - First-order differences (day-to-day changes)
    - Second-order differences (changes of changes)
    - Moving averages (5-day and 10-day)
    - Volatility (standard deviation of last 5 days)
    """
    # %%
    # Create features
    def create_features(data, window_size=5):
        df = data.copy()
        
        # First-order differences
        df['Diff_1'] = df['Close'].diff()
        
        # Second-order differences
        df['Diff_2'] = df['Diff_1'].diff()
        
        # Moving averages
        df['MA_5'] = df['Close'].rolling(window=window_size).mean()
        df['MA_10'] = df['Close'].rolling(window=window_size*2).mean()
        
        # Volatility
        df['Volatility'] = df['Close'].rolling(window=window_size).std()
        
        # Volume change
        df['Volume_Change'] = df['Volume'].pct_change()
        
        # Drop NaN values created by rolling windows and diffs
        df.dropna(inplace=True)
        
        # Replace infinite values with NaN and then drop them
        df.replace([np.inf, -np.inf], np.nan, inplace=True)
        df.dropna(inplace=True)
        
        return df

    featured_df = create_features(df)
    print(featured_df.head())

    # %% [markdown]
    """
    ## Train-Test-Validation Split

    For time series data, we must split sequentially to avoid lookahead bias.
    We'll use:
    - 70% for training
    - 15% for validation
    - 15% for testing
    """
    # %%
    def train_test_val_split(data, train_ratio=0.7, val_ratio=0.15):
        n = len(data)
        train_end = int(n * train_ratio)
        val_end = train_end + int(n * val_ratio)
        
        train = data.iloc[:train_end]
        val = data.iloc[train_end:val_end]
        test = data.iloc[val_end:]
        
        return train, val, test

    train_data, val_data, test_data = train_test_val_split(featured_df)
    print(f"Train size: {len(train_data)}, Validation size: {len(val_data)}, Test size: {len(test_data)}")

    # %% [markdown]
    """
    ## Data Scaling

    We'll scale our features using MinMaxScaler to [0,1] range, being careful to:
    1. Fit the scaler only on training data
    2. Transform all sets with the same scaler
    """
    # %%
    # Initialize scalers
    target_scaler = MinMaxScaler(feature_range=(0, 1))
    feature_scaler = MinMaxScaler(feature_range=(0, 1))

    # Fit scalers on training data only
    target_scaler.fit(train_data[['Close']])
    feature_cols = ['Open', 'High', 'Low', 'Volume', 'Diff_1', 'Diff_2', 'MA_5', 'MA_10', 'Volatility', 'Volume_Change']
    feature_scaler.fit(train_data[feature_cols])

    # Transform all datasets
    def scale_dataset(data, feature_scaler, target_scaler, feature_cols):
        # Ensure we're working with finite values
        X = data[feature_cols].replace([np.inf, -np.inf], np.nan)
        y = data[['Close']].replace([np.inf, -np.inf], np.nan)
        
        # Drop any remaining NA values
        valid_idx = X.notna().all(axis=1) & y.notna().all(axis=1)
        X = X[valid_idx]
        y = y[valid_idx]
        
        # Scale the data
        X_scaled = feature_scaler.transform(X)
        y_scaled = target_scaler.transform(y)
        
        return X_scaled, y_scaled

    X_train, y_train = scale_dataset(train_data, feature_scaler, target_scaler, feature_cols)
    X_val, y_val = scale_dataset(val_data, feature_scaler, target_scaler, feature_cols)
    X_test, y_test = scale_dataset(test_data, feature_scaler, target_scaler, feature_cols)

    # %% [markdown]
    """
    ## Sliding Window Dataset Creation

    We'll create sequences of historical data points to predict the next value.
    A window size of 10 is commonly used for daily stock data.
    """
    # %%
    class StockDataset(Dataset):
        def __init__(self, X, y, window_size=10):
            self.X = X
            self.y = y
            self.window_size = window_size
            
        def __len__(self):
            return len(self.X) - self.window_size
        
        def __getitem__(self, idx):
            features = self.X[idx:idx+self.window_size]
            target = self.y[idx+self.window_size]
            return torch.FloatTensor(features), torch.FloatTensor(target)

    window_size = 10
    train_dataset = StockDataset(X_train, y_train, window_size)
    val_dataset = StockDataset(X_val, y_val, window_size)
    test_dataset = StockDataset(X_test, y_test, window_size)

    batch_size = 16
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # %% [markdown]
    """
    ## Model Architecture

    We'll use an LSTM-based model, which is well-suited for time series forecasting.
    The architecture includes:
    - LSTM layers to capture temporal patterns
    - Dropout for regularization
    - Fully connected layers for final prediction
    """
    # %%
    class LSTMModel(nn.Module):
        def __init__(self, input_size, hidden_size, num_layers, output_size, dropout_prob=0.2):
            super(LSTMModel, self).__init__()
            self.hidden_size = hidden_size
            self.num_layers = num_layers
            
            self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_prob)
            self.dropout = nn.Dropout(dropout_prob)
            self.fc = nn.Linear(hidden_size, output_size)
            
        def forward(self, x):
            # Initialize hidden state and cell state
            h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
            c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
            
            # Forward propagate LSTM
            out, _ = self.lstm(x, (h0, c0))
            
            # Only take the output from the last time step
            out = out[:, -1, :]
            
            # Apply dropout
            out = self.dropout(out)
            
            # Decode the hidden state of the last time step
            out = self.fc(out)
            return out

    # Model parameters
    input_size = X_train.shape[1]  # Number of features
    hidden_size = 64
    num_layers = 2
    output_size = 1
    dropout_prob = 0.2

    model = LSTMModel(input_size, hidden_size, num_layers, output_size, dropout_prob)
    print(model)

    # %% [markdown]
    """
    ## Training Setup

    We'll use:
    - MSE Loss (appropriate for regression)
    - Adam optimizer
    - Learning rate scheduler
    - Early stopping
    """
    # %%
    # Training parameters
    learning_rate = 0.001
    num_epochs = 200
    patience = 20  # For early stopping

    # Loss and optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)

    # Device configuration
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    # %% [markdown]
    """
    ## Training Loop
    """
    # %%
    def train_model(model, train_loader, val_loader, num_epochs, patience):
        best_val_loss = float('inf')
        best_model = None
        epochs_no_improve = 0
        
        train_losses = []
        val_losses = []
        
        for epoch in range(num_epochs):
            # Training
            model.train()
            train_loss = 0
            for batch_X, batch_y in train_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                
                # Forward pass
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                
                # Backward and optimize
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                train_loss += loss.item()
            
            avg_train_loss = train_loss / len(train_loader)
            train_losses.append(avg_train_loss)
            
            # Validation
            model.eval()
            val_loss = 0
            with torch.no_grad():
                for batch_X, batch_y in val_loader:
                    batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                    outputs = model(batch_X)
                    val_loss += criterion(outputs, batch_y).item()
            
            avg_val_loss = val_loss / len(val_loader)
            val_losses.append(avg_val_loss)
            scheduler.step(avg_val_loss)
            
            # Early stopping check
            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                best_model = model.state_dict()
                epochs_no_improve = 0
            else:
                epochs_no_improve += 1
                if epochs_no_improve >= patience:
                    print(f'Early stopping at epoch {epoch+1}')
                    break
            
            if (epoch+1) % 10 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.6f}, Val Loss: {avg_val_loss:.6f}')
        
        # Load best model
        model.load_state_dict(best_model)
        
        # Plot training history
        plt.figure(figsize=(10, 5))
        plt.plot(train_losses, label='Train Loss')
        plt.plot(val_losses, label='Validation Loss')
        plt.title('Training History')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.grid()
        plt.show()
        
        return model

    # Train the model
    trained_model = train_model(model, train_loader, val_loader, num_epochs, patience)

    # %% [markdown]
    """
    ## Evaluation on Test Set

    We'll evaluate using:
    - Mean Squared Error (MSE)
    - Root Mean Squared Error (RMSE)
    - Mean Absolute Error (MAE)
    - Mean Absolute Percentage Error (MAPE)
    """
    # %%
    def evaluate_model(model, test_loader):
        model.eval()
        predictions = []
        actuals = []
        
        with torch.no_grad():
            for batch_X, batch_y in test_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                
                predictions.extend(outputs.cpu().numpy())
                actuals.extend(batch_y.cpu().numpy())
        
        predictions = np.array(predictions).flatten()
        actuals = np.array(actuals).flatten()
        
        # Inverse transform the scaled values
        predictions = target_scaler.inverse_transform(predictions.reshape(-1, 1)).flatten()
        actuals = target_scaler.inverse_transform(actuals.reshape(-1, 1)).flatten()
        
        # Calculate metrics
        mse = mean_squared_error(actuals, predictions)
        rmse = math.sqrt(mse)
        mae = mean_absolute_error(actuals, predictions)
        mape = np.mean(np.abs((actuals - predictions) / actuals)) * 100
        ssr = np.sum((actuals - predictions) ** 2)  # Sum of squared residuals
        sst = np.sum((actuals - np.mean(actuals)) ** 2)  # Total sum of squares
        r2 = 1 - (ssr / sst) if sst != 0 else 0.0  # Handle division by zero

        stats = {
            "mse": mse,
            "rmse": rmse,
            "mae": mae,
            "mape": mape,
            "r2": r2
        }
        
        print(f'MSE: {mse:.4f}')
        print(f'RMSE: {rmse:.4f}')
        print(f'MAE: {mae:.4f}')
        print(f'MAPE: {mape:.4f}%')
        
        # Plot predictions vs actual
        plt.figure(figsize=(12, 6))
        plt.plot(actuals, label='Actual Prices')
        plt.plot(predictions, label='Predicted Prices')
        plt.title('Actual vs Predicted Stock Prices')
        plt.xlabel('Time Step')
        plt.ylabel('Price (Rs. )')
        plt.legend()
        plt.grid()
        plt.show()
        
        return predictions, actuals

    test_predictions, test_actuals = evaluate_model(trained_model, test_loader)

    # %% [markdown]
    """
    ## Full Dataset Training for Next Timestep Prediction

    Now we'll train on the full dataset to predict the next timestep.
    """
    # %%
    # Prepare full dataset
    full_X, full_y = scale_dataset(featured_df, feature_scaler, target_scaler, feature_cols)
    full_dataset = StockDataset(full_X, full_y, window_size)
    full_loader = DataLoader(full_dataset, batch_size=batch_size, shuffle=False)

    # Reinitialize model
    full_model = LSTMModel(input_size, hidden_size, num_layers, output_size, dropout_prob).to(device)

    # Train on full dataset
    full_optimizer = torch.optim.Adam(full_model.parameters(), lr=learning_rate)
    full_criterion = nn.MSELoss()

    # We'll train for fewer epochs since we're not doing validation
    for epoch in range(num_epochs // 2):
        full_model.train()
        train_loss = 0
        for batch_X, batch_y in full_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            
            outputs = full_model(batch_X)
            loss = full_criterion(outputs, batch_y)
            
            full_optimizer.zero_grad()
            loss.backward()
            full_optimizer.step()
            
            train_loss += loss.item()
        
        if (epoch+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs//2}], Loss: {train_loss/len(full_loader):.6f}')

    # %% [markdown]
    """
    ## Next Timestep Prediction

    Now we'll predict the next day's closing price using the most recent window.
    """
    # %%
    def predict_next_timestep(model, data, window_size, feature_scaler, target_scaler, feature_cols):
        # Get the most recent window
        last_window = data.tail(window_size)
        
        # Scale the features
        X = feature_scaler.transform(last_window[feature_cols])
        X = torch.FloatTensor(X).unsqueeze(0).to(device)  # Add batch dimension
        
        # Predict
        model.eval()
        with torch.no_grad():
            prediction = model(X)
        
        # Inverse transform the prediction
        prediction = target_scaler.inverse_transform(prediction.cpu().numpy())
        
        return prediction[0][0]

    next_price_prediction = predict_next_timestep(full_model, featured_df, window_size, 
                                                feature_scaler, target_scaler, feature_cols)

    print(f"Predicted next day closing price: Rs. {next_price_prediction:.2f}")

    # # %% [markdown]
    # """
    # ## Save the Model
    # """
    # # %%
    # # Save the trained model
    # torch.save(full_model.state_dict(), 'amzn_stock_predictor.pth')

    # # Also save the scalers for future use
    # import joblib
    # joblib.dump(feature_scaler, 'feature_scaler.pkl')
    # joblib.dump(target_scaler, 'target_scaler.pkl')

    # print("Model and scalers saved successfully.")