In [24]:
# Import Libraries

import numpy as np
import pandas as pd
import hvplot.pandas

%matplotlib inline

In [2]:
# Set the random seed for reproducibility
# Note: This is for the homework solution, but it is good practice to comment this out and run multiple experiments to evaluate your model

from numpy.random import seed
seed(1)
from tensorflow import random
random.set_seed(2)

In [3]:
# Load 'btc_sentiment.csv'

sentiment = pd.read_csv('btc_sentiment.csv', index_col="date", infer_datetime_format=True, parse_dates=True)
sentiment = sentiment.drop(columns="fng_classification")
sentiment.head()

Unnamed: 0_level_0,fng_value
date,Unnamed: 1_level_1
2019-07-29,19
2019-07-28,16
2019-07-27,47
2019-07-26,24
2019-07-25,42


In [4]:
# Load 'btc_historic.csv'

historic = pd.read_csv('btc_historic.csv', index_col="Date", infer_datetime_format=True, parse_dates=True)['Close']
historic = historic.sort_index()
historic.tail()

Date
2019-07-25    9882.429688
2019-07-26    9847.450195
2019-07-27    9478.320313
2019-07-28    9531.769531
2019-07-29    9529.889648
Name: Close, dtype: float64

In [5]:
# Join the Data As One DF

btc = sentiment.join(historic, how="inner")
btc.tail()

Unnamed: 0,fng_value,Close
2019-07-25,42,9882.429688
2019-07-26,24,9847.450195
2019-07-27,47,9478.320313
2019-07-28,16,9531.769531
2019-07-29,19,9529.889648


In [6]:
btc.head

<bound method NDFrame.head of              fng_value         Close
2018-02-01          30   9114.719727
2018-02-02          15   8870.820313
2018-02-03          40   9251.269531
2018-02-04          24   8218.049805
2018-02-05          11   6937.080078
2018-02-06           8   7701.250000
2018-02-07          36   7592.720215
2018-02-08          30   8260.690430
2018-02-09          44   8696.830078
2018-02-10          54   8569.290039
2018-02-11          31   8084.609863
2018-02-12          42   8911.269531
2018-02-13          35   8544.690430
2018-02-14          55   9485.639648
2018-02-15          71  10033.750000
2018-02-16          67  10188.730469
2018-02-17          74  11097.209961
2018-02-18          63  10417.230469
2018-02-19          67  11182.280273
2018-02-20          74  11256.429688
2018-02-21          54  10481.660156
2018-02-22          44   9847.959961
2018-02-23          39  10175.509766
2018-02-24          31   9705.730469
2018-02-25          33   9610.110352
2018-02-

In [7]:
# This function accepts the column number for the features (X) and the target (y)
# It chunks the data up with a rolling window of Xt-n to predict Xt
# It returns a numpy array of X any y
def window_data(btc, window, feature_col_number, target_col_number):
    X = []
    y = []
    for i in range(len(btc) - window - 1):
        features = btc.iloc[i:(i + window), feature_col_number]
        target = btc.iloc[(i + window), target_col_number]
        X.append(features)
        y.append(target)
    return np.array(X), np.array(y).reshape(-1, 1)

In [8]:
# Predict Closing Prices using a 10 day window of fear and greed index values and a target of the 11th day closing price
# Try a window size anywhere from 1 to 10 and see how the model performance changes
window_size = 1

# Column index 1 is the `Close` column
feature_column = 0
target_column = 1
X, y = window_data(btc, window_size, feature_column, target_column)

In [9]:
# Use 70% of the data for training and the remainder for testing

split = int(.7 * len(X))
X_train = X[:split - 1]
X_test = X[split:]
y_train = y[:split - 1]
y_test = y[split:]

In [10]:
from sklearn.preprocessing import MinMaxScaler

# Use the MinMaxScaler to scale data between 0 and 1.

x_train_scaler = MinMaxScaler()
x_test_scaler = MinMaxScaler()
y_train_scaler = MinMaxScaler()
y_test_scaler = MinMaxScaler()

# Fit the scaler training data

x_train_scaler.fit(X_train)
y_train_scaler.fit(y_train)

# Scale the training data

X_train = x_train_scaler.transform(X_train)
y_train = y_train_scaler.transform(y_train)

In [11]:
# fit the scaler for the testing the data

x_test_scaler.fit(X_test)
y_test_scaler.fit(y_test)

# Scale the test data

X_test = x_test_scaler.transform(X_test)
y_test = y_test_scaler.transform(y_test)

In [12]:
# Reshape the features for the model

X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

## Build and Train the Model

In [13]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [14]:
# Build the LSTM model. 
# The return sequences need to be set to True if you are adding additional LSTM layers, but 
# You don't have to do this for the final layer. 

model = Sequential()

number_units = 30
dropout_fraction = 0.2

# Layer 1

model.add(LSTM(
    units=number_units,
    return_sequences=True,
    input_shape=(X_train.shape[1], 1))
    )
model.add(Dropout(dropout_fraction))
# Layer 2

model.add(LSTM(units=number_units, return_sequences=True))
model.add(Dropout(dropout_fraction))
# Layer 3

model.add(LSTM(units=number_units))
model.add(Dropout(dropout_fraction))
# Output layer

model.add(Dense(1))

In [15]:
# Compile the model

model.compile(optimizer="adam", loss = "mean_squared_error")

In [16]:
# Summarize the model

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 1, 30)             3840      
_________________________________________________________________
dropout (Dropout)            (None, 1, 30)             0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 1, 30)             7320      
_________________________________________________________________
dropout_1 (Dropout)          (None, 1, 30)             0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 30)                7320      
_________________________________________________________________
dropout_2 (Dropout)          (None, 30)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 3

In [17]:
# Train the model
# Use at least 10 epochs
# Do not shuffle the data
# Experiement with the batch size, but a smaller batch size is recommended

model.fit(X_train, y_train, epochs=10, shuffle=False, batch_size=1, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x208c6ca0708>

## Model Performance

In [18]:
# Evaluate the model

model.evaluate(X_test, y_test)



0.11466586589813232

In [19]:
# Make some predictions

predicted = model.predict(X_test)
predicted

array([[0.11753437],
       [0.10923523],
       [0.09666777],
       [0.19444564],
       [0.20129962],
       [0.18020514],
       [0.18020514],
       [0.1874114 ],
       [0.19444564],
       [0.21443716],
       [0.1338903 ],
       [0.10506046],
       [0.10087083],
       [0.10087083],
       [0.11339369],
       [0.10923523],
       [0.12165583],
       [0.0882272 ],
       [0.08399255],
       [0.11339369],
       [0.1690929 ],
       [0.16149683],
       [0.16531283],
       [0.16531283],
       [0.1690929 ],
       [0.1690929 ],
       [0.16149683],
       [0.16531283],
       [0.16531283],
       [0.16149683],
       [0.17654029],
       [0.1690929 ],
       [0.1690929 ],
       [0.16531283],
       [0.19095056],
       [0.1690929 ],
       [0.14589979],
       [0.12165583],
       [0.12983525],
       [0.11753437],
       [0.12165583],
       [0.141924  ],
       [0.14589979],
       [0.1728358 ],
       [0.1690929 ],
       [0.19095056],
       [0.1838292 ],
       [0.220

In [20]:
# Recover the original prices instead of the scaled version

predicted_prices = y_test_scaler.inverse_transform(predicted)
real_prices = y_test_scaler.inverse_transform(y_test.reshape(-1, 1))

In [21]:
# Create a DataFrame of Real and Predicted values
stocks = pd.DataFrame({
    "Real": real_prices.ravel(),
    "Predicted": predicted_prices.ravel()
})
stocks.head()

Unnamed: 0,Real,Predicted
0,3670.919922,4757.214844
1,3670.919922,4680.51123
2,3912.570068,4564.358398
3,3924.23999,5468.056152
4,3974.050049,5531.40332


In [22]:
# Plot the real vs predicted values as a line chart

stocks.hvplot()

In [23]:
# FIN