In [1]:
import numpy as np
import pandas as pd
import hvplot.pandas

In [2]:
# Set the random seed for reproducibility
from numpy.random import seed
seed(1)
from tensorflow import random
random.set_seed(2)

In [6]:
# Load the fear and greed sentiment data for Bitcoin
df = pd.read_csv('btc_sentiment.csv', index_col="date", parse_dates=True, dayfirst=True)
df = df.drop(columns="fng_classification")
df.head()

Unnamed: 0_level_0,fng_value
date,Unnamed: 1_level_1
2019-07-29,19
2019-07-28,16
2019-07-27,47
2019-07-26,24
2019-07-25,42


In [7]:
# Load the historical closing prices for Bitcoin
df2 = pd.read_csv('btc_historic.csv', index_col="Date", parse_dates=True)['Close']
df2 = df2.sort_index()
df2.tail()

Date
2019-07-25    9882.429688
2019-07-26    9847.450195
2019-07-27    9478.320313
2019-07-28    9531.769531
2019-07-29    9529.889648
Name: Close, dtype: float64

In [5]:
# Join the data into a single DataFrame
df = df.join(df2, how="inner")
df.tail()

Unnamed: 0_level_0,fng_value,Close
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-07-25,42,9882.429688
2019-07-26,24,9847.450195
2019-07-27,47,9478.320313
2019-07-28,16,9531.769531
2019-07-29,19,9529.889648


In [8]:
df.head()

Unnamed: 0_level_0,fng_value
date,Unnamed: 1_level_1
2019-07-29,19
2019-07-28,16
2019-07-27,47
2019-07-26,24
2019-07-25,42


This function accepts the column number for the features (X) and the target (y) It chunks the data up with a rolling window of Xt-n to predict Xt and returns a numpy array of X any y

In [9]:
def window_data(df, window, feature_col_number, target_col_number):
    X = []
    y = []
    for i in range(len(df) - window - 1):
        features = df.iloc[i:(i + window), feature_col_number]
        target = df.iloc[(i + window), target_col_number]
        X.append(features)
        y.append(target)
    return np.array(X), np.array(y).reshape(-1, 1)

In [12]:
# Predict Closing Prices using a 10 day window of previous fng values
window_size = 10

# Column index 0 is the 'fng_value' column
# Using 'fng_value' column as both feature and target
feature_column = 0
target_column = 0

# Call window_data with the modified indices
X, y = window_data(df, window_size, feature_column, target_column)


In [13]:
# Use 70% of the data for training and the remaineder for testing
split = int(0.7 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]

In [14]:
from sklearn.preprocessing import MinMaxScaler
# Use the MinMaxScaler to scale data between 0 and 1.

scaler = MinMaxScaler()
scaler.fit(X)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
scaler.fit(y)
y_train = scaler.transform(y_train)
y_test = scaler.transform(y_test)

In [15]:
# Reshape the features for the model
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
print (f"X_train sample values:\n{X_train[:5]} \n")
print (f"X_test sample values:\n{X_test[:5]}")

X_train sample values:
[[[0.11627907]
  [0.08139535]
  [0.44186047]
  [0.1744186 ]
  [0.38372093]
  [0.12790698]
  [0.36781609]
  [0.3908046 ]
  [0.3908046 ]
  [0.29885057]]

 [[0.08139535]
  [0.44186047]
  [0.1744186 ]
  [0.38372093]
  [0.12790698]
  [0.36046512]
  [0.3908046 ]
  [0.3908046 ]
  [0.29885057]
  [0.3908046 ]]

 [[0.44186047]
  [0.1744186 ]
  [0.38372093]
  [0.12790698]
  [0.36046512]
  [0.38372093]
  [0.3908046 ]
  [0.29885057]
  [0.3908046 ]
  [0.36781609]]

 [[0.1744186 ]
  [0.38372093]
  [0.12790698]
  [0.36046512]
  [0.38372093]
  [0.38372093]
  [0.29885057]
  [0.3908046 ]
  [0.36781609]
  [0.12643678]]

 [[0.38372093]
  [0.12790698]
  [0.36046512]
  [0.38372093]
  [0.38372093]
  [0.29069767]
  [0.3908046 ]
  [0.36781609]
  [0.12643678]
  [0.29885057]]] 

X_test sample values:
[[[0.46511628]
  [0.40697674]
  [0.43023256]
  [0.39534884]
  [0.44186047]
  [0.40697674]
  [0.3908046 ]
  [0.35632184]
  [0.32183908]
  [0.27586207]]

 [[0.40697674]
  [0.43023256]
  [0.395348

### Build and Train the LSTM RNN

In [16]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

Build the LSTM model

In [17]:
model = Sequential()

number_units = 5
dropout_fraction = 0.2

# Layer 1
model.add(LSTM(
    units=number_units,
    return_sequences=True,
    input_shape=(X_train.shape[1], 1))
    )
model.add(Dropout(dropout_fraction))
# Layer 2
model.add(LSTM(units=number_units, return_sequences=True))
model.add(Dropout(dropout_fraction))
# Layer 3
model.add(LSTM(units=number_units))
model.add(Dropout(dropout_fraction))
# Output layer
model.add(Dense(1))

In [18]:
# Compile the model
model.compile(optimizer="adam", loss="mean_squared_error")

In [19]:
# Summarize the model
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 10, 5)             140       
                                                                 
 dropout (Dropout)           (None, 10, 5)             0         
                                                                 
 lstm_1 (LSTM)               (None, 10, 5)             220       
                                                                 
 dropout_1 (Dropout)         (None, 10, 5)             0         
                                                                 
 lstm_2 (LSTM)               (None, 5)                 220       
                                                                 
 dropout_2 (Dropout)         (None, 5)                 0         
                                                                 
 dense (Dense)               (None, 1)                 6

In [20]:
# Train the model
model.fit(X_train, y_train, epochs=10, shuffle=False, batch_size=1, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x28acdab50>

### Model Performance

In [21]:
# Evaluate the model
model.evaluate(X_test, y_test)



0.020336052402853966

In [22]:
# Make some predictions
predicted = model.predict(X_test)



In [23]:
# Recover the original prices instead of the scaled version
predicted_prices = scaler.inverse_transform(predicted)
real_prices = scaler.inverse_transform(y_test.reshape(-1, 1))

In [24]:
# Create a DataFrame of Real and Predicted values
stocks = pd.DataFrame({
    "Real": real_prices.ravel(),
    "Predicted": predicted_prices.ravel()
}, index = df.index[-len(real_prices): ]) 
stocks.head()

Unnamed: 0_level_0,Real,Predicted
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-07-13,29.0,36.524612
2018-07-12,29.0,35.862003
2018-07-11,33.0,35.209938
2018-07-10,29.0,34.558704
2018-07-09,37.0,33.98555


In [25]:
stocks.hvplot(title='')