## LSTM Stock Price Predictor 

Model will predict stock prices 15 days into the future

In [4]:
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input

### Functions

In [5]:
# Fetch stock data
def get_stock_data(ticker, start_date, end_date):
    stock_data = yf.download(ticker, start=start_date, end=end_date)
    return stock_data['Close']

# Process data
def preprocess_data(data, look_back, prediction_steps):
    from sklearn.preprocessing import MinMaxScaler
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data.reshape(-1, 1))
    
    X, y = [], []
    for i in range(look_back, len(scaled_data) - prediction_steps + 1):
        X.append(scaled_data[i - look_back:i, 0])  # Last `look_back` days
        y.append(scaled_data[i:i + prediction_steps, 0])  # Next `prediction_steps` days
    
    return np.array(X), np.array(y), scaler

    
# Build the model
def build_lstm_model(look_back, output_steps):
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import LSTM, Dense, Input

    model = Sequential([
        Input(shape=(look_back, 1)),  # Explicitly define the input shape
        LSTM(units=50, activation='tanh', return_sequences=True),
        LSTM(units=50, activation='sigmoid', return_sequences=True),
        LSTM(units=50, activation='relu', return_sequences=True),
        Dense(units=output_steps, activation='relu')  # Output layer with 'output_steps' timesteps
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')  # Compile the model
    return model

# Training step
def train_model(model, X_train, y_train, epochs=50, batch_size=32):
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1)

# Prediction step
def make_predictions(model, X_test, scaler):
    predicted_stock_price = model.predict(X_test)
    predicted_stock_price = scaler.inverse_transform(predicted_stock_price)  # Inverse scaling
    return predicted_stock_price

# Visualization step
def plot_predictions(real, predicted, title):
    import matplotlib.pyplot as plt

    plt.figure(figsize=(12, 6))
    plt.plot(real.flatten(), color='blue', label='Real Stock Price')
    plt.plot(predicted.flatten(), color='red', label='Predicted Stock Price')
    plt.title(title)
    plt.xlabel('Time')
    plt.ylabel('Stock Price')
    plt.legend()
    plt.show()

### Parameters

In [6]:
# Parameters
ticker = "AAPL"
start_date = "2000-01-01"
end_date = "2024-12-05"
look_back = 60  # Use the last 60 days to predict
output_steps = 15  # Predict 15 days ahead

### Main Process

In [7]:
# Get data
data = get_stock_data(ticker, start_date, end_date)
data_values = data.values

# Train-Test Split
train_size = int(len(data_values) * 0.8)
train_data, test_data = data_values[:train_size], data_values[train_size:]

# Preprocess data
X_train, y_train, scaler = preprocess_data(train_data, look_back, output_steps)
X_test, y_test, _ = preprocess_data(test_data, look_back, output_steps)

# Reshape data for LSTM
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

print("Shape of X_train:", X_train.shape)  # Should be (samples, look_back, 1)
print("Shape of y_train:", y_train.shape)  # Should be (samples, output_steps)

# Build the model
model = build_lstm_model(look_back, output_steps)

# Train the model
train_model(model, X_train, y_train, epochs=20, batch_size=32)

# Make predictions
predicted_stock_price = make_predictions(model, X_test, scaler)

# Inverse scale the actual prices for comparison
y_test_actual = scaler.inverse_transform(y_test)

# Visualize predictions
plot_predictions(y_test_actual, predicted_stock_price, "Stock Price Prediction")

[*********************100%***********************]  1 of 1 completed


Shape of X_train: (4942, 60, 1)
Shape of y_train: (4942, 15)
Epoch 1/20


InvalidArgumentError: Graph execution error:

Detected at node gradient_tape/compile_loss/mean_squared_error/sub/BroadcastGradientArgs defined at (most recent call last):
  File "/usr/lib/python3.12/runpy.py", line 198, in _run_module_as_main

  File "/usr/lib/python3.12/runpy.py", line 88, in _run_code

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/usr/lib/python3.12/asyncio/base_events.py", line 641, in run_forever

  File "/usr/lib/python3.12/asyncio/base_events.py", line 1987, in _run_once

  File "/usr/lib/python3.12/asyncio/events.py", line 88, in _run

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 534, in process_one

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 362, in execute_request

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 778, in execute_request

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 449, in do_execute

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3075, in run_cell

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3130, in _run_cell

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3334, in run_cell_async

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3517, in run_ast_nodes

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3577, in run_code

  File "/tmp/ipykernel_67347/260683867.py", line 24, in <module>

  File "/tmp/ipykernel_67347/3095149304.py", line 37, in train_model

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 320, in fit

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 121, in one_step_on_iterator

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 108, in one_step_on_data

  File "/home/oem/Documents/github/magnimind_hw/magenv/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 70, in train_step

Incompatible shapes: [32,15] vs. [32,60,15]
	 [[{{node gradient_tape/compile_loss/mean_squared_error/sub/BroadcastGradientArgs}}]] [Op:__inference_one_step_on_iterator_5191]

### Make price predictions

In [None]:
# Extend plot_predictions to include future prices
def plot_predictions_with_future(real, predicted, future, future_dates, title):
    import matplotlib.pyplot as plt

    plt.figure(figsize=(12, 6))
    
    # Plot historical prices
    plt.plot(real.index, real, color='blue', label='Real Stock Price')
    
    # Ensure x-axis matches the predictions' length
    predicted_x = real.index[-len(predicted):]
    min_length = min(len(predicted_x), len(predicted))
    predicted_x = predicted_x[-min_length:]
    predicted = predicted[-min_length:]
    plt.plot(predicted_x, predicted, color='red', label='Predicted Stock Price (Test)')
    
    # Plot future prices
    plt.plot(future_dates, future, color='green', label='Predicted Future Prices (Next 15 days)', linestyle='--')
    
    # Format x-axis
    plt.title(title)
    plt.xlabel('Date')
    plt.ylabel('Stock Price')
    plt.legend()
    plt.tight_layout()
    plt.show()


# Predict the next 15 days
def predict_future_prices(model, recent_data, scaler, prediction_steps):
    recent_data_scaled = scaler.transform(recent_data[-look_back:].reshape(-1, 1))
    future_predictions = []
    input_seq = recent_data_scaled.reshape(1, look_back, 1)  # Correct input shape for LSTM

    for _ in range(prediction_steps):
        # Predict the next value
        prediction = model.predict(input_seq, verbose=0)  # Shape: (1, 1)
        future_predictions.append(prediction[0, 0])  # Append the scalar value
        
        # Update the sequence: Remove the oldest value and append the new prediction
        input_seq = np.append(input_seq[:, 1:, :], [[[prediction[0, 0]]]], axis=1)

    # Inverse scale the future predictions
    future_predictions = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1)).flatten()
    return future_predictions

# Predict future prices
future_predictions = predict_future_prices(model, data_values, scaler, output_steps)

# Generate future dates
last_date = data.index[-1]
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=output_steps, freq='B')

# Ensure correct columns from `data`
plot_predictions_with_future(
    real=data['AAPL'],  # Replace with the correct column
    predicted=predicted_stock_price.flatten(),
    future=future_predictions,
    future_dates=future_dates,
    title="Stock Price Prediction with Future Overlay"
)




In [None]:
future_predictions

In [None]:
future_dates

In [None]:
print(f"Length of real: {len(data['AAPL'])}")
print(f"Length of predicted_stock_price: {len(predicted_stock_price.flatten())}")
print(f"Length of predicted_x: {len(data['AAPL'].index[-len(predicted_stock_price.flatten()):])}")
