### LSTM Stock Predictor Using Fear and Greed Index

#### Data Preparation

In [8]:
# Initial Imports 
import pandas as pd 
import numpy as np
import hvplot.pandas 

%matplotlib inline

In [9]:
# Set the random seed for reproducibility
# Note: This is for the homework solution, but it is good practice to comment this out and run multiple experiments to evaluate your model
from numpy.random import seed
seed(1)
from tensorflow import random
random.set_seed(2)

In [10]:
# Load the fear and greed sentiment data for Bitcoin
df = pd.read_csv('btc_sentiment.csv', index_col = 'date', infer_datetime_format=True, 
                 parse_dates = True
                )
df.drop('fng_classification', axis = 1, inplace = True)
df.head()

Unnamed: 0_level_0,fng_value
date,Unnamed: 1_level_1
2019-07-29,19
2019-07-28,16
2019-07-27,47
2019-07-26,24
2019-07-25,42


In [11]:
# Load the historical closing prices for Bitcoin
df2 = pd.read_csv('btc_historic.csv', index_col = 'Date', infer_datetime_format=True, parse_dates = True)['Close']
df2.sort_index(inplace=True)
df2.tail()

Date
2019-07-25    9882.429688
2019-07-26    9847.450195
2019-07-27    9478.320313
2019-07-28    9531.769531
2019-07-29    9529.889648
Name: Close, dtype: float64

In [12]:

# Join the data into a single DataFrame
df = df.join(df2, how='inner')
df.tail()

Unnamed: 0,fng_value,Close
2019-07-25,42,9882.429688
2019-07-26,24,9847.450195
2019-07-27,47,9478.320313
2019-07-28,16,9531.769531
2019-07-29,19,9529.889648


In [13]:
df.head()

Unnamed: 0,fng_value,Close
2018-01-02,30,14754.12988
2018-01-03,38,15156.62012
2018-01-04,16,15180.08008
2018-01-05,56,16954.7793
2018-01-06,24,17172.30078


In [14]:
df.shape

(508, 2)

In [15]:
# This function accepts the column number for the features (X) and the target (y)
# It chunks the data up with a rolling window of Xt-n to predict Xt
# It returns a numpy array of X any y
def window_data(df, window, feature_col_number, target_col_number):
    X = []
    y = []
    for i in range(len(df) - window - 1):
        features = df.iloc[i:(i + window), feature_col_number]
        target = df.iloc[(i + window), target_col_number]
        X.append(features)
        y.append(target)
    return np.array(X), np.array(y).reshape(-1, 1)

In [16]:
# Predict Closing Prices using a 10 day window of fear and greed index values and a target of the 11th day closing price
# Try a window size anywhere from 1 to 10 and see how the model performance changes
window_size = 1

# Column index 1 is the `Close` column
feature_column = 0
target_column = 1
X, y = window_data(df, window_size, feature_column, target_column)

In [18]:
# Print a few sample values from X and y
print("***Feature Data***")
print (f"X sample values:\n{X[:3]} \n")
print("***Target Data")
print (f"y sample values:\n{y[:3]}")

***Feature Data***
X sample values:
[[30]
 [38]
 [16]] 

***Target Data
y sample values:
[[15156.62012]
 [15180.08008]
 [16954.7793 ]]


In [24]:
# Use 70% of the data for training and the remainder for testing
# YOUR CODE HERE!
split = int(0.70*len(X))
X_train = X[:split-1]
X_test = X[split:]
y_train = y[:split-1]
y_test = y[split:]

In [25]:
# Use MinMaxScaler to scale the data between 0 and 1. 
# YOUR CODE HERE!
# Importing MixMaxScaler from sklearn
from sklearn.preprocessing import MinMaxScaler

In [26]:
# # Scale the features training and testing sets
scaler = MinMaxScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
scaler = MinMaxScaler().fit(y_train)
y_train = scaler.transform(y_train)
y_test = scaler.transform(y_test)

In [27]:
display(X_train.shape)
display(y_train.shape)

(353, 1)

(353, 1)

In [28]:
# Reshape the features for the model
# YOUR CODE HERE!
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1],1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1],1))
# Print some sample data after reshaping the datasets
#print (f"X_train sample values:\n{X_train[:3]} \n")
#print (f"X_test sample values:\n{X_test[:3]}")

### Build and Train the LSTM RNN

In [29]:

# Importing the required keras modules
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, LSTM, Dropout

In [30]:
display(X_train.shape)

(353, 1, 1)

In [31]:
# Build the LSTM model. 
# The return sequences need to be set to True if you are adding additional LSTM layers, but 
# You don't have to do this for the final layer. 
# YOUR CODE HERE!
number_units = 30
drop_out_franction = 0.1 # 20% of neurons will be randomly droped during each epoch


model = Sequential()

# add 1st layer
model.add(LSTM(units = number_units, 
               return_sequences = True,  
               input_shape = (X_train.shape[1],1)
          ))
model.add(Dropout(drop_out_franction))
# add 2nd layer
model.add(LSTM(units = number_units, return_sequences=True))
model.add(Dropout(drop_out_franction))
# add 3rd layer 
model.add(LSTM(units = number_units))
model.add(Dropout(drop_out_franction))

# add output layer
model.add(Dense(units=1))

In [32]:

# Compile the model
# YOUR CODE HERE!
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse'])

In [33]:
# Summarize the model
# YOUR CODE HERE!
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 1, 30)             3840      
_________________________________________________________________
dropout (Dropout)            (None, 1, 30)             0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 1, 30)             7320      
_________________________________________________________________
dropout_1 (Dropout)          (None, 1, 30)             0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 30)                7320      
_________________________________________________________________
dropout_2 (Dropout)          (None, 30)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 3

In [34]:

# Train the model
# Use at least 10 epochs
# Do not shuffle the data
# Experiement with the batch size, but a smaller batch size is recommended
# YOUR CODE HERE!
model.fit(X_train,y_train, epochs =50, batch_size=10, verbose = 0, shuffle=False)

<tensorflow.python.keras.callbacks.History at 0x23f1ca936d0>

#### Model Performance

In [35]:
# Evaluate the model
# YOUR CODE HERE!
model.evaluate(X_test, y_test)



[0.04047460854053497, 0.04047460854053497]

In [36]:
# Make some predictions
# YOUR CODE HERE!
predictions = model.predict(X_test)

In [37]:

# Recover the original prices instead of the scaled version
predicted_prices = scaler.inverse_transform(predictions.reshape(-1,1))
real_prices = scaler.inverse_transform(y_test.reshape(-1,1))

In [38]:
# Create a DataFrame of Real and Predicted values
stocks = pd.DataFrame({
    'Real': real_prices.ravel(), 
    'Predicted': predicted_prices.ravel()
})
stocks.head()

Unnamed: 0,Real,Predicted
0,3467.209961,6378.393555
1,3467.209961,6199.685547
2,3434.129883,6408.851074
3,3461.629883,6258.481445
4,3508.679932,6659.326172


In [39]:
# Plot the real vs predicted values as a line chart
# YOUR CODE HERE!
stocks.hvplot(title = 'Actual vs Predicted Prices')