In [None]:
# Install necessary libraries (run these in your terminal or a code cell with '!')
# !pip install numpy pandas matplotlib yfinance scikit-learn torch

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import torch
import torch.nn as nn
import torch.optim as optim
from datetime import datetime, timedelta

# Get today's date
today = datetime.now().date()
end_date = today.strftime('%Y-%m-%d')
start_date = '2020-01-01'

# Check for CUDA availability and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 1. Data Retrieval
ticker = 'AAPL'  # You can change this to any stock ticker

data = yf.download(ticker, start=start_date, end=end_date)
print(data.head())

# Visualize the closing prices
plt.figure(figsize=(12, 6))
plt.plot(data['Close'], label='Close Price')
plt.title(f'{ticker} Stock Price')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.grid(True)
plt.show()

# 2. Data Preprocessing
close_prices = data['Close'].values.reshape(-1, 1)

# Scale the data
scaler = MinMaxScaler()
scaled_prices = scaler.fit_transform(close_prices)

# Create sequences
sequence_length = 30

def create_sequences(data, seq_length):
    sequences = []
    targets = []
    for i in range(len(data) - seq_length):
        seq = data[i : i + seq_length]
        target = data[i + seq_length]
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences), np.array(targets)

sequences, targets = create_sequences(scaled_prices, sequence_length)

# Split data into training and testing sets
train_size = int(0.8 * len(sequences))
train_sequences = torch.tensor(sequences[:train_size], dtype=torch.float32).to(device)
train_targets = torch.tensor(targets[:train_size], dtype=torch.float32).to(device)
test_sequences = torch.tensor(sequences[train_size:], dtype=torch.float32).to(device)
test_targets = torch.tensor(targets[train_size:], dtype=torch.float32).to(device)

print("Shape of training sequences:", train_sequences.shape)
print("Shape of training targets:", train_targets.shape)
print("Shape of testing sequences:", test_sequences.shape)
print("Shape of testing targets:", test_targets.shape)

# 3. Model Definition
class PredictionModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(PredictionModel, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.linear = nn.Linear(hidden_size, output_size)

    def forward(self, input_seq):
        lstm_out, _ = self.lstm(input_seq)
        # Take the last time step's output for prediction
        prediction = self.linear(lstm_out[:, -1, :])
        return prediction

# Instantiate the model
input_size = 1  # Only predicting based on closing price
hidden_size = 50
output_size = 1
model = PredictionModel(input_size, hidden_size, output_size).to(device)

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 4. Training Loop
epochs = 200
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(train_sequences)
    loss = criterion(outputs, train_targets)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 25 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

# 5. Evaluation
model.eval()
with torch.no_grad():
    test_predictions = model(test_sequences)

# Inverse transform the predictions and actual values
predicted_prices = scaler.inverse_transform(test_predictions.cpu().numpy())
actual_prices = scaler.inverse_transform(test_targets.cpu().numpy())

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(actual_prices, predicted_prices))
print(f'Test RMSE: {rmse:.2f}')

# 6. Future Predictions
last_sequence_scaled = scaled_prices[-sequence_length:].reshape(1, sequence_length, 1)
last_sequence_tensor = torch.tensor(last_sequence_scaled, dtype=torch.float32).to(device)

future_predictions_scaled = []
n_future_days = [10, 20, 50]
all_future_predictions = {}

model.eval()
with torch.no_grad():
    for n_days in n_future_days:
        future_sequence = last_sequence_tensor.clone()
        predictions = []
        for _ in range(n_days):
            output = model(future_sequence)
            predictions.append(output.cpu().numpy()[0, 0])

            # FIX: Reshape output properly to match dimensions for concatenation
            output_reshaped = output.view(1, 1, 1)  # Reshape to [1, 1, 1]
            future_sequence = torch.cat((future_sequence[:, 1:, :], output_reshaped), dim=1)

        future_predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))
        all_future_predictions[n_days] = future_predictions

# Create future dates
last_date = data.index[-1]
future_dates = {}
for n_days in n_future_days:
    dates = [last_date + timedelta(days=i) for i in range(1, n_days + 1)]
    future_dates[n_days] = dates

# 7. Visualization of Results and Future Predictions
plt.figure(figsize=(16, 10))

# Plot actual vs predicted prices
plt.plot(data.index[len(train_sequences) + sequence_length:], actual_prices, label='Actual Price', color='blue')
plt.plot(data.index[len(train_sequences) + sequence_length:], predicted_prices, label='Predicted Price', color='red')

# Plot future predictions
for n_days, predictions in all_future_predictions.items():
    future_date_list = future_dates[n_days]
    plt.plot(future_date_list, predictions, label=f'Next {n_days} Days Prediction', linestyle='--')

plt.title(f'{ticker} Stock Price Prediction and Future Forecast')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# Print the future predictions
for n_days, predictions in all_future_predictions.items():
    future_date_list = future_dates[n_days]
    print(f"\nPredictions for the next {n_days} days:")
    for i in range(len(future_date_list)):
        print(f"{future_date_list[i].strftime('%Y-%m-%d')}: {predictions[i, 0]:.2f}")