In [None]:
import yfinance as yf
import pandas as pd

# Define the ticker symbol for the desired stock
ticker_symbol = 'AAPL' # Example: Apple Inc.

# Define the start and end dates for the historical data
start_date = '2020-01-01'
end_date = '2024-01-01'

# Download the historical stock data
df_stock = yf.download(ticker_symbol, start=start_date, end=end_date)

# Display the first few rows of the DataFrame
print(f"Downloaded historical data for {ticker_symbol} from {start_date} to {end_date}.")
df_stock.head()

In [None]:
print("Shape of the DataFrame:", df_stock.shape)
print("Columns and their types:")
df_stock.info()
print("\nChecking for missing values:")
print(df_stock.isnull().sum())

: 

In [None]:
df_stock.columns = df_stock.columns.droplevel(1)
print("Cleaned column names:")
print(df_stock.columns)

# Optionally, rename columns to be lowercase for consistency
df_stock.columns = df_stock.columns.str.lower()
print("\nLowercased column names:")
print(df_stock.columns)

# Display the first few rows with cleaned column names
df_stock.head()

In [None]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Select the 'close' price for prediction
data = df_stock['close'].values

# Reshape the data to be 2D, as MinMaxScaler expects 2D input
data = data.reshape(-1, 1)

# Scale the data to be between 0 and 1
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

print("Shape of original 'close' data:", data.shape)
print("Shape of scaled 'close' data:", scaled_data.shape)
print("First 5 original 'close' values:\n", data[:5].flatten())
print("First 5 scaled 'close' values:\n", scaled_data[:5].flatten())


In [None]:
def create_sequences(data, time_step):
    X, Y = [], []
    for i in range(len(data) - time_step):
        X.append(data[i:(i + time_step), 0])
        Y.append(data[i + time_step, 0])
    return np.array(X), np.array(Y)

# Define the time step (e.g., 60 days for a look-back window)
time_step = 60

# Create sequences from the scaled data
X, y = create_sequences(scaled_data, time_step)

print(f"Shape of X (features): {X.shape}")
print(f"Shape of y (target): {y.shape}")
print("First sample of X:\n", X[0])
print("First sample of y:\n", y[0])

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets (e.g., 80% train, 20% test)
train_size = int(len(X) * 0.8)
X_train, X_test = X[0:train_size], X[train_size:len(X)]
y_train, y_test = y[0:train_size], y[train_size:len(y)]

# Reshape input to be [samples, time_steps, features] for LSTM
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

print(f"Shape of X_train: {X_train.shape}")
print(f"Shape of y_train: {y_train.shape}")
print(f"Shape of X_test: {X_test.shape}")
print(f"Shape of y_test: {y_test.shape}")

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Initialize the Sequential model
model = Sequential()

# Add the first LSTM layer with return_sequences=True to pass sequences to the next LSTM layer
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))

# Add a second LSTM layer (optional, but often improves performance)
model.add(LSTM(units=50, return_sequences=False)) # Last LSTM layer doesn't return sequences
model.add(Dropout(0.2))

# Add a Dense output layer
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Print the model summary
print("LSTM Model Summary:")
model.summary()


In [None]:
model.fit(X_train, y_train, epochs=100, batch_size=64, verbose=1)
print("Model training complete.")

In [None]:
from sklearn.metrics import mean_squared_error
import math

# Make predictions on the test set
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# Inverse transform the predictions and actual values back to original scale
train_predict = scaler.inverse_transform(train_predict)
y_train_inverse = scaler.inverse_transform(y_train.reshape(-1, 1))
test_predict = scaler.inverse_transform(test_predict)
y_test_inverse = scaler.inverse_transform(y_test.reshape(-1, 1))

# Calculate RMSE for training and test data
rmse_train = math.sqrt(mean_squared_error(y_train_inverse, train_predict))
rmse_test = math.sqrt(mean_squared_error(y_test_inverse, test_predict))

print(f"Train RMSE: {rmse_train}")
print(f"Test RMSE: {rmse_test}")


In [None]:
import matplotlib.pyplot as plt

# Shift train predictions for plotting
train_plot = np.empty_like(scaled_data)
train_plot[:, :] = np.nan
train_plot[time_step:len(train_predict) + time_step, :] = train_predict

# Shift test predictions for plotting
test_plot = np.empty_like(scaled_data)
test_plot[:, :] = np.nan
test_plot[len(train_predict) + (time_step * 2):len(scaled_data), :] = test_predict

# Plot the original data, train predictions, and test predictions
plt.figure(figsize=(15, 7))
plt.plot(scaler.inverse_transform(scaled_data), label='Original Close Price')
plt.plot(train_plot, label='Train Predictions')
plt.plot(test_plot, label='Test Predictions')

plt.title(f'Stock Price Prediction for {ticker_symbol}')
plt.xlabel('Time (Days)')
plt.ylabel('Stock Price')
plt.legend()
plt.grid(True)
plt.show()
print("Visualization of stock price predictions complete.")

In [None]:
import matplotlib.pyplot as plt

# Shift train predictions for plotting
train_plot = np.empty_like(scaled_data)
train_plot[:, :] = np.nan
train_plot[time_step:len(train_predict) + time_step, :] = train_predict

# Shift test predictions for plotting
test_plot = np.empty_like(scaled_data)
test_plot[:, :] = np.nan
# The test predictions should start after the last training prediction, respecting the time_step offset
test_plot[len(train_predict) + time_step:len(scaled_data), :] = test_predict

# Plot the original data, train predictions, and test predictions
plt.figure(figsize=(15, 7))
plt.plot(scaler.inverse_transform(scaled_data), label='Original Close Price')
plt.plot(train_plot, label='Train Predictions')
plt.plot(test_plot, label='Test Predictions')

plt.title(f'Stock Price Prediction for {ticker_symbol}')
plt.xlabel('Time (Days)')
plt.ylabel('Stock Price')
plt.legend()
plt.grid(True)
plt.show()
print("Visualization of stock price predictions complete.")