In [None]:
# !pip install scikit-learn
# !pip install keras
# !pip install pydot
# !pip install matplotlib
# !pip install tensorflow


# Stock price prediction using our Keras’ LSTMs model trained on past stocks data.

## Step 1 - Import the Libraries

import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import matplotlib
import matplotlib.dates as mdates

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import linear_model

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import plot_model
from datetime import datetime, timedelta

## Step 2 – Reading our training data and getting our training data in shape

def load_stock_data(ticker, start_date, end_date):
    data = yf.download(ticker, start=start_date, end=end_date)
    # data.reset_index(inplace=True)
    data.drop(['Adj Close'], axis=1, inplace=True)
    return data

def feature_engineering(data):
    data['SMA_10'] = data['Close'].rolling(window=10).mean()
    data['SMA_50'] = data['Close'].rolling(window=50).mean() 
    data.dropna(inplace=True)
    return data

# Define the stock ticker and the training time frame
stock_ticker = "AAPL"
start_date = "2010-01-01"  # Start with a long enough historical period
end_date = "2023-01-01"   

# Load and prepare the training data
dataset_train = load_stock_data(stock_ticker, start_date, end_date)
dataset_train = feature_engineering(dataset_train)

#Print the shape of Dataframe  and Check for Null Values
print("Dataframe Shape: ", dataset_train.shape)
print("Null Value Present: ", dataset_train.isnull().values.any())

## Step 3 - Setting the Target Variable, Selecting the Features, and Scaling

## Step 3 - Setting the Target Variable, Selecting the Features, and Scaling
output_var = pd.DataFrame(dataset_train['Close'])
features = ['Open', 'High', 'Low', 'Volume', 'SMA_10', 'SMA_50']

scaler = MinMaxScaler()
feature_transform = scaler.fit_transform(dataset_train[features])
output_var_scaled = scaler.fit_transform(output_var)  # Scaling the target variable

# Convert back to DataFrame
feature_transform = pd.DataFrame(columns=features, data=feature_transform, index=dataset_train.index)
output_var_scaled = pd.DataFrame(columns=['Close'], data=output_var_scaled, index=dataset_train.index)


## Step 4 - Splitting to Training set and Test set

train_size = int(len(feature_transform) * 0.8)
X_train, X_test = feature_transform[:train_size], feature_transform[train_size:]
y_train, y_test = output_var_scaled[:train_size].values.ravel(), output_var_scaled[train_size:].values.ravel()

X_train = np.array(X_train).reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = np.array(X_test).reshape((X_test.shape[0], 1, X_test.shape[1]))

## Step 5 - Building and Training the LSTM Model

lstm = Sequential()
lstm.add(LSTM(32, input_shape=(1, X_train.shape[2]), activation='relu', return_sequences=False))
lstm.add(Dense(1))
lstm.compile(loss='mean_squared_error', optimizer='adam')

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = lstm.fit(X_train, y_train, epochs=100, batch_size=8, verbose=1, shuffle=False)

## Step 6 - Making the Prediction

y_pred = lstm.predict(X_test)

def predict_future_prices(model, last_known_data, days_to_predict, scaler, feature_columns):
    predictions = []
    current_data = last_known_data

    for _ in range(days_to_predict):
        # Predict the next day's price
        pred = model.predict(current_data)
        predictions.append(pred[0, 0])
        
        # Prepare the new data point for the next prediction
        # Remove the first row and append the predicted value
        current_data = np.roll(current_data, shift=-1, axis=1)
        current_data[0, -1, 0] = pred

    return predictions

days_to_predict = 10
last_known_data = np.array(feature_transform[-1:]).reshape((1, 1, len(features)))
future_predictions = predict_future_prices(lstm, last_known_data, days_to_predict, scaler, features)
future_predictions = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1))
future_predictions = future_predictions.flatten()
future_predictions

## Step 7- Evaluate Model

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'MSE on Test Set: {mse:.4f}')
print(f'R2 Score on Test Set: {r2:.4f}')

## Step 8 - Plot Predicted vs True Adj Close Value – LSTM


import matplotlib.pyplot as plt
import pandas as pd

# Prepare dates for plotting
last_date = dataset_train.index[-1]
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=days_to_predict)

# Prepare historical dates for plotting
historical_dates = dataset_train.index
test_dates = historical_dates[-len(y_test):]
predicted_dates = historical_dates[-len(y_pred):]

# Plot historical and predicted data
plt.figure(figsize=(14, 7))
plt.plot(historical_dates, dataset_train['Close'], label='Historical Data', color='blue')
plt.plot(test_dates, scaler.inverse_transform(y_test.reshape(-1, 1)), label='True Values', color='green')
plt.plot(predicted_dates, scaler.inverse_transform(y_pred.reshape(-1, 1)), label='LSTM Predictions', color='red')
plt.plot(future_dates, future_predictions, linestyle='--', color='orange', label='Future Predictions')

# Add labels and legend
plt.title(f'Stock Price Prediction for {stock_ticker} with Extended Forecast')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()


# Zoomed-In Plot for the Last 15 Days
plt.figure(figsize=(14, 7))

# Extracting the last 15 days data
zoom_start_date = dataset_train.index[-15]
zoom_end_date = dataset_train.index[-1]
zoom_dates = dataset_train.index[-15:]
zoom_true_values = scaler.inverse_transform(y_test[-15:].reshape(-1, 1))
zoom_lstm_predictions = scaler.inverse_transform(y_pred[-15:].reshape(-1, 1))
zoom_future_predictions = future_predictions[:15]  # Only the first 15 future days

plt.plot(zoom_dates, scaler.inverse_transform(output_var_scaled.loc[zoom_dates].values), label='True Values', color='green')
plt.plot(zoom_dates, zoom_lstm_predictions, label='LSTM Predictions', color='red')
plt.plot(future_dates[:15], zoom_future_predictions, linestyle='--', color='orange', label='Future Predictions')

# Add labels and legend
plt.title(f'Zoomed-In View: Last 15 Days with LSTM Predictions for {stock_ticker}')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()

# Calculate the last number in zoom_lstm_predictions
last_lstm_prediction = zoom_lstm_predictions[-1]

# Calculate the average of the first three numbers in zoom_future_predictions
average_future_predictions = np.mean(zoom_future_predictions[:3])

# Determine the message to output
if last_lstm_prediction < average_future_predictions:
    message = f"According to the LSTM model, you should buy the {stock_ticker}"
elif last_lstm_prediction > average_future_predictions:
    message = f"According to the LSTM model, you should sell the {stock_ticker}"
else:
    message = "According to the LSTM model, the market seems to not change significantly"

print(message)

import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import EarlyStopping

def load_stock_data(ticker, start_date, end_date):
    data = yf.download(ticker, start=start_date, end=end_date)
    data.drop(['Adj Close'], axis=1, inplace=True)
    return data

def feature_engineering(data):
    data['SMA_10'] = data['Close'].rolling(window=10).mean()
    data['SMA_50'] = data['Close'].rolling(window=50).mean() 
    data.dropna(inplace=True)
    return data

def predict_future_prices(model, last_known_data, days_to_predict, scaler, feature_columns):
    predictions = []
    current_data = last_known_data

    for _ in range(days_to_predict):
        # Predict the next day's price
        pred = model.predict(current_data)
        predictions.append(pred[0, 0])

        # Prepare the new data point for the next prediction
        # Remove the first row and append the predicted value
        current_data = np.roll(current_data, shift=-1, axis=1)
        current_data[0, -1, 0] = pred

    return predictions

# Define the stock ticker and the training time frame
stock_ticker = "AAPL"
start_date = "2010-01-01"
end_date = "2023-01-01"

# Load and prepare the training data
dataset_train = load_stock_data(stock_ticker, start_date, end_date)
dataset_train = feature_engineering(dataset_train)

# Initialize the DataFrame to store the end dates and related values
end_dates_df = pd.DataFrame(columns=["End Date", "Close", "Last LSTM Prediction", "Avg Future Predictions"])

# Start the loop to add one day until 2023-01-31
while end_date != "2023-01-31":
    # Convert end_date string to datetime object
    end_date_dt = datetime.strptime(end_date, "%Y-%m-%d")
    
    # Increment the date by one day
    end_date_dt += timedelta(days=1)
    
    # Convert back to string format
    end_date = end_date_dt.strftime("%Y-%m-%d")
    
    # Load and prepare the data for the new end date
    dataset_train = load_stock_data(stock_ticker, start_date, end_date)
    dataset_train = feature_engineering(dataset_train)
    
    output_var = pd.DataFrame(dataset_train['Close'])
    features = ['Open', 'High', 'Low', 'Volume', 'SMA_10', 'SMA_50']

    scaler = MinMaxScaler()
    feature_transform = scaler.fit_transform(dataset_train[features])
    output_var_scaled = scaler.fit_transform(output_var)  # Scaling the target variable

    # Convert back to DataFrame
    feature_transform = pd.DataFrame(columns=features, data=feature_transform, index=dataset_train.index)
    output_var_scaled = pd.DataFrame(columns=['Close'], data=output_var_scaled, index=dataset_train.index)

    train_size = int(len(feature_transform) * 0.8)
    X_train, X_test = feature_transform[:train_size], feature_transform[train_size:]
    y_train, y_test = output_var_scaled[:train_size].values.ravel(), output_var_scaled[train_size:].values.ravel()

    X_train = np.array(X_train).reshape((X_train.shape[0], 1, X_train.shape[1]))
    X_test = np.array(X_test).reshape((X_test.shape[0], 1, X_test.shape[1]))

    lstm = Sequential()
    lstm.add(LSTM(32, input_shape=(1, X_train.shape[2]), activation='relu', return_sequences=False))
    lstm.add(Dense(1))
    lstm.compile(loss='mean_squared_error', optimizer='adam')

    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    history = lstm.fit(X_train, y_train, epochs=100, batch_size=8, verbose=1, shuffle=False) 
    y_pred = lstm.predict(X_test)

    # Predict future prices
    days_to_predict = 10
    last_known_data = np.array(feature_transform[-1:]).reshape((1, 1, len(features)))
    future_predictions = predict_future_prices(lstm, last_known_data, days_to_predict, scaler, features)
    future_predictions = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1))
    future_predictions = future_predictions.flatten()
    
    # Prepare dates for plotting
    last_date = dataset_train.index[-1]
    future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=days_to_predict)

    # Prepare historical dates for plotting
    historical_dates = dataset_train.index
    test_dates = historical_dates[-len(y_test):]
    predicted_dates = historical_dates[-len(y_pred):]
    
    # Extract the last 15 days data
    zoom_start_date = dataset_train.index[-15]
    zoom_end_date = dataset_train.index[-1]
    zoom_dates = dataset_train.index[-15:]
    zoom_true_values = scaler.inverse_transform(y_test[-15:].reshape(-1, 1))
    zoom_lstm_predictions = scaler.inverse_transform(y_pred[-15:].reshape(-1, 1))
    zoom_future_predictions = future_predictions[:15]  # Only the first 15 future days
    
    # Calculate the last number in zoom_lstm_predictions
    last_lstm_prediction = zoom_lstm_predictions[-1]

    # Calculate the average of the first three numbers in zoom_future_predictions
    average_future_predictions = np.mean(zoom_future_predictions[:3])

    # Record the 'Close' value, last_lstm_prediction, and average_future_predictions
    new_row = pd.DataFrame({
        "End Date": [end_date],
        "Close": [dataset_train['Close'].iloc[-1]],  # Last close value in the current DataFrame
        "Last LSTM Prediction": [last_lstm_prediction],
        "Avg Future Predictions": [average_future_predictions]
    })
    end_dates_df = pd.concat([end_dates_df, new_row], ignore_index=True)

# Print the DataFrame with end dates and predictions
print(end_dates_df)


# Convert End Date column to datetime format in end_dates_df
end_dates_df['End Date'] = pd.to_datetime(end_dates_df['End Date'], format='%Y-%m-%d')

# Convert the index of dataset_train to datetime format
dataset_train.index = pd.to_datetime(dataset_train.index, format='%Y-%m-%d')

# Initialize variables for shares and money
initial_shares = 100
shares_on_hand = initial_shares
money_spent = 0
money_earned = 0

# Get the closing price of the first day in dataset_train
first_date = dataset_train.index[0]
first_close_price = dataset_train.loc[first_date, 'Close']

# Track the money spent on initial 100 shares
initial_investment = initial_shares * first_close_price

# Lists to store results for each day
shares_on_hand_list = []
money_spent_list = []
money_earned_list = []

# Iterate over the rows in end_dates_df
for index, row in end_dates_df.iterrows():
    # Use the 'Close' column from end_dates_df
    close_price = row['Close']

    action = row['action']

    if action == 1:
        # Buy one share
        shares_on_hand += 1
        money_spent += close_price
    elif action == -1:
        # Sell one share
        shares_on_hand -= 1
        money_earned += close_price
    # If action == 0, do nothing

    # Record the shares and money spent/earned for the day
    shares_on_hand_list.append(shares_on_hand)
    money_spent_list.append(money_spent)
    money_earned_list.append(money_earned)

# Add the new columns to the end_dates_df DataFrame
end_dates_df['Shares on Hand'] = shares_on_hand_list
end_dates_df['Money Spent'] = money_spent_list
end_dates_df['Money Earned'] = money_earned_list

# Print the DataFrame with the new columns
print(end_dates_df)

# Get the closing price of the last day in dataset_train
last_date = dataset_train.index[-1]
last_close_price = dataset_train.loc[last_date, 'Close']

# Calculate the number of shares bought (excluding the initial 100 shares)
shares_bought = shares_on_hand - initial_shares

# Get the last value in the Money Spent column
last_money_spent = end_dates_df['Money Spent'].iloc[-1]

# Calculate the total value of the shares bought at the last day's closing price
total_value_from_bought_shares = shares_bought * last_close_price

# Calculate the money spent on buying the shares including losses and gains
# Subtract the initial investment for the initial 100 shares
money_spent_on_bought_shares = last_money_spent - initial_investment

# Calculate net gain/loss from selling the bought shares
money_earned_from_selling_bought_shares = total_value_from_bought_shares - money_spent_on_bought_shares

# Print the results
print(f"Closing Price on the Last Day: {last_close_price}")
print(f"Total Value of Bought Shares at Last Day's Closing Price: {total_value_from_bought_shares}")
print(f"Money Spent on Bought Shares: {money_spent_on_bought_shares}")
print(f"Net Gain/Loss from Selling Bought Shares: {money_earned_from_selling_bought_shares}")


